1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the ARM-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// ARMGenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMSubtarget.h"
21#include "ARMTargetMachine.h"
22#include "MCTargetDesc/ARMAddressingModes.h"
23#include "llvm/CodeGen/Analysis.h"
24#include "llvm/CodeGen/FastISel.h"
25#include "llvm/CodeGen/FunctionLoweringInfo.h"
26#include "llvm/CodeGen/MachineConstantPool.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineMemOperand.h"
30#include "llvm/CodeGen/MachineModuleInfo.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/GlobalVariable.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/IntrinsicInst.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/CallSite.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/GetElementPtrTypeIterator.h"
44#include "llvm/Target/TargetInstrInfo.h"
45#include "llvm/Target/TargetLowering.h"
46#include "llvm/Target/TargetMachine.h"
47#include "llvm/Target/TargetOptions.h"
48using namespace llvm;
49
50extern cl::opt<bool> EnableARMLongCalls;
51
52namespace {
53
54  // All possible address modes, plus some.
55  typedef struct Address {
56    enum {
57      RegBase,
58      FrameIndexBase
59    } BaseType;
60
61    union {
62      unsigned Reg;
63      int FI;
64    } Base;
65
66    int Offset;
67
68    // Innocuous defaults for our address.
69    Address()
70     : BaseType(RegBase), Offset(0) {
71       Base.Reg = 0;
72     }
73  } Address;
74
75class ARMFastISel : public FastISel {
76
77  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
78  /// make the right decision when generating code for different targets.
79  const ARMSubtarget *Subtarget;
80  const TargetMachine &TM;
81  const TargetInstrInfo &TII;
82  const TargetLowering &TLI;
83  ARMFunctionInfo *AFI;
84
85  // Convenience variables to avoid some queries.
86  bool isThumb2;
87  LLVMContext *Context;
88
89  public:
90    explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
91                         const TargetLibraryInfo *libInfo)
92    : FastISel(funcInfo, libInfo),
93      TM(funcInfo.MF->getTarget()),
94      TII(*TM.getInstrInfo()),
95      TLI(*TM.getTargetLowering()) {
96      Subtarget = &TM.getSubtarget<ARMSubtarget>();
97      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
98      isThumb2 = AFI->isThumbFunction();
99      Context = &funcInfo.Fn->getContext();
100    }
101
102    // Code from FastISel.cpp.
103  private:
104    unsigned FastEmitInst_(unsigned MachineInstOpcode,
105                           const TargetRegisterClass *RC);
106    unsigned FastEmitInst_r(unsigned MachineInstOpcode,
107                            const TargetRegisterClass *RC,
108                            unsigned Op0, bool Op0IsKill);
109    unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
110                             const TargetRegisterClass *RC,
111                             unsigned Op0, bool Op0IsKill,
112                             unsigned Op1, bool Op1IsKill);
113    unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
114                              const TargetRegisterClass *RC,
115                              unsigned Op0, bool Op0IsKill,
116                              unsigned Op1, bool Op1IsKill,
117                              unsigned Op2, bool Op2IsKill);
118    unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
119                             const TargetRegisterClass *RC,
120                             unsigned Op0, bool Op0IsKill,
121                             uint64_t Imm);
122    unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
123                             const TargetRegisterClass *RC,
124                             unsigned Op0, bool Op0IsKill,
125                             const ConstantFP *FPImm);
126    unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
127                              const TargetRegisterClass *RC,
128                              unsigned Op0, bool Op0IsKill,
129                              unsigned Op1, bool Op1IsKill,
130                              uint64_t Imm);
131    unsigned FastEmitInst_i(unsigned MachineInstOpcode,
132                            const TargetRegisterClass *RC,
133                            uint64_t Imm);
134    unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
135                             const TargetRegisterClass *RC,
136                             uint64_t Imm1, uint64_t Imm2);
137
138    unsigned FastEmitInst_extractsubreg(MVT RetVT,
139                                        unsigned Op0, bool Op0IsKill,
140                                        uint32_t Idx);
141
142    // Backend specific FastISel code.
143  private:
144    virtual bool TargetSelectInstruction(const Instruction *I);
145    virtual unsigned TargetMaterializeConstant(const Constant *C);
146    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
147    virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
148                               const LoadInst *LI);
149    virtual bool FastLowerArguments();
150  private:
151  #include "ARMGenFastISel.inc"
152
153    // Instruction selection routines.
154  private:
155    bool SelectLoad(const Instruction *I);
156    bool SelectStore(const Instruction *I);
157    bool SelectBranch(const Instruction *I);
158    bool SelectIndirectBr(const Instruction *I);
159    bool SelectCmp(const Instruction *I);
160    bool SelectFPExt(const Instruction *I);
161    bool SelectFPTrunc(const Instruction *I);
162    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
163    bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
164    bool SelectIToFP(const Instruction *I, bool isSigned);
165    bool SelectFPToI(const Instruction *I, bool isSigned);
166    bool SelectDiv(const Instruction *I, bool isSigned);
167    bool SelectRem(const Instruction *I, bool isSigned);
168    bool SelectCall(const Instruction *I, const char *IntrMemName);
169    bool SelectIntrinsicCall(const IntrinsicInst &I);
170    bool SelectSelect(const Instruction *I);
171    bool SelectRet(const Instruction *I);
172    bool SelectTrunc(const Instruction *I);
173    bool SelectIntExt(const Instruction *I);
174    bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
175
176    // Utility routines.
177  private:
178    bool isTypeLegal(Type *Ty, MVT &VT);
179    bool isLoadTypeLegal(Type *Ty, MVT &VT);
180    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
181                    bool isZExt);
182    bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
183                     unsigned Alignment = 0, bool isZExt = true,
184                     bool allocReg = true);
185    bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
186                      unsigned Alignment = 0);
187    bool ARMComputeAddress(const Value *Obj, Address &Addr);
188    void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
189    bool ARMIsMemCpySmall(uint64_t Len);
190    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
191                               unsigned Alignment);
192    unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
193    unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
194    unsigned ARMMaterializeInt(const Constant *C, MVT VT);
195    unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
196    unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
197    unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
198    unsigned ARMSelectCallOp(bool UseReg);
199    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
200
201    // Call handling routines.
202  private:
203    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
204                                  bool Return,
205                                  bool isVarArg);
206    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
207                         SmallVectorImpl<unsigned> &ArgRegs,
208                         SmallVectorImpl<MVT> &ArgVTs,
209                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
210                         SmallVectorImpl<unsigned> &RegArgs,
211                         CallingConv::ID CC,
212                         unsigned &NumBytes,
213                         bool isVarArg);
214    unsigned getLibcallReg(const Twine &Name);
215    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
216                    const Instruction *I, CallingConv::ID CC,
217                    unsigned &NumBytes, bool isVarArg);
218    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
219
220    // OptionalDef handling routines.
221  private:
222    bool isARMNEONPred(const MachineInstr *MI);
223    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
224    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
225    void AddLoadStoreOperands(MVT VT, Address &Addr,
226                              const MachineInstrBuilder &MIB,
227                              unsigned Flags, bool useAM3);
228};
229
230} // end anonymous namespace
231
232#include "ARMGenCallingConv.inc"
233
234// DefinesOptionalPredicate - This is different from DefinesPredicate in that
235// we don't care about implicit defs here, just places we'll need to add a
236// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
237bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
238  if (!MI->hasOptionalDef())
239    return false;
240
241  // Look to see if our OptionalDef is defining CPSR or CCR.
242  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
243    const MachineOperand &MO = MI->getOperand(i);
244    if (!MO.isReg() || !MO.isDef()) continue;
245    if (MO.getReg() == ARM::CPSR)
246      *CPSR = true;
247  }
248  return true;
249}
250
251bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
252  const MCInstrDesc &MCID = MI->getDesc();
253
254  // If we're a thumb2 or not NEON function we were handled via isPredicable.
255  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
256       AFI->isThumb2Function())
257    return false;
258
259  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
260    if (MCID.OpInfo[i].isPredicate())
261      return true;
262
263  return false;
264}
265
266// If the machine is predicable go ahead and add the predicate operands, if
267// it needs default CC operands add those.
268// TODO: If we want to support thumb1 then we'll need to deal with optional
269// CPSR defs that need to be added before the remaining operands. See s_cc_out
270// for descriptions why.
271const MachineInstrBuilder &
272ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
273  MachineInstr *MI = &*MIB;
274
275  // Do we use a predicate? or...
276  // Are we NEON in ARM mode and have a predicate operand? If so, I know
277  // we're not predicable but add it anyways.
278  if (TII.isPredicable(MI) || isARMNEONPred(MI))
279    AddDefaultPred(MIB);
280
281  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
282  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
283  bool CPSR = false;
284  if (DefinesOptionalPredicate(MI, &CPSR)) {
285    if (CPSR)
286      AddDefaultT1CC(MIB);
287    else
288      AddDefaultCC(MIB);
289  }
290  return MIB;
291}
292
293unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
294                                    const TargetRegisterClass* RC) {
295  unsigned ResultReg = createResultReg(RC);
296  const MCInstrDesc &II = TII.get(MachineInstOpcode);
297
298  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
299  return ResultReg;
300}
301
302unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
303                                     const TargetRegisterClass *RC,
304                                     unsigned Op0, bool Op0IsKill) {
305  unsigned ResultReg = createResultReg(RC);
306  const MCInstrDesc &II = TII.get(MachineInstOpcode);
307
308  if (II.getNumDefs() >= 1) {
309    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
310                   .addReg(Op0, Op0IsKill * RegState::Kill));
311  } else {
312    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
313                   .addReg(Op0, Op0IsKill * RegState::Kill));
314    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
315                   TII.get(TargetOpcode::COPY), ResultReg)
316                   .addReg(II.ImplicitDefs[0]));
317  }
318  return ResultReg;
319}
320
321unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
322                                      const TargetRegisterClass *RC,
323                                      unsigned Op0, bool Op0IsKill,
324                                      unsigned Op1, bool Op1IsKill) {
325  unsigned ResultReg = createResultReg(RC);
326  const MCInstrDesc &II = TII.get(MachineInstOpcode);
327
328  if (II.getNumDefs() >= 1) {
329    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
330                   .addReg(Op0, Op0IsKill * RegState::Kill)
331                   .addReg(Op1, Op1IsKill * RegState::Kill));
332  } else {
333    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
334                   .addReg(Op0, Op0IsKill * RegState::Kill)
335                   .addReg(Op1, Op1IsKill * RegState::Kill));
336    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
337                           TII.get(TargetOpcode::COPY), ResultReg)
338                   .addReg(II.ImplicitDefs[0]));
339  }
340  return ResultReg;
341}
342
343unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
344                                       const TargetRegisterClass *RC,
345                                       unsigned Op0, bool Op0IsKill,
346                                       unsigned Op1, bool Op1IsKill,
347                                       unsigned Op2, bool Op2IsKill) {
348  unsigned ResultReg = createResultReg(RC);
349  const MCInstrDesc &II = TII.get(MachineInstOpcode);
350
351  if (II.getNumDefs() >= 1) {
352    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
353                   .addReg(Op0, Op0IsKill * RegState::Kill)
354                   .addReg(Op1, Op1IsKill * RegState::Kill)
355                   .addReg(Op2, Op2IsKill * RegState::Kill));
356  } else {
357    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
358                   .addReg(Op0, Op0IsKill * RegState::Kill)
359                   .addReg(Op1, Op1IsKill * RegState::Kill)
360                   .addReg(Op2, Op2IsKill * RegState::Kill));
361    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
362                           TII.get(TargetOpcode::COPY), ResultReg)
363                   .addReg(II.ImplicitDefs[0]));
364  }
365  return ResultReg;
366}
367
368unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
369                                      const TargetRegisterClass *RC,
370                                      unsigned Op0, bool Op0IsKill,
371                                      uint64_t Imm) {
372  unsigned ResultReg = createResultReg(RC);
373  const MCInstrDesc &II = TII.get(MachineInstOpcode);
374
375  if (II.getNumDefs() >= 1) {
376    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
377                   .addReg(Op0, Op0IsKill * RegState::Kill)
378                   .addImm(Imm));
379  } else {
380    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
381                   .addReg(Op0, Op0IsKill * RegState::Kill)
382                   .addImm(Imm));
383    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
384                           TII.get(TargetOpcode::COPY), ResultReg)
385                   .addReg(II.ImplicitDefs[0]));
386  }
387  return ResultReg;
388}
389
390unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
391                                      const TargetRegisterClass *RC,
392                                      unsigned Op0, bool Op0IsKill,
393                                      const ConstantFP *FPImm) {
394  unsigned ResultReg = createResultReg(RC);
395  const MCInstrDesc &II = TII.get(MachineInstOpcode);
396
397  if (II.getNumDefs() >= 1) {
398    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
399                   .addReg(Op0, Op0IsKill * RegState::Kill)
400                   .addFPImm(FPImm));
401  } else {
402    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
403                   .addReg(Op0, Op0IsKill * RegState::Kill)
404                   .addFPImm(FPImm));
405    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
406                           TII.get(TargetOpcode::COPY), ResultReg)
407                   .addReg(II.ImplicitDefs[0]));
408  }
409  return ResultReg;
410}
411
412unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
413                                       const TargetRegisterClass *RC,
414                                       unsigned Op0, bool Op0IsKill,
415                                       unsigned Op1, bool Op1IsKill,
416                                       uint64_t Imm) {
417  unsigned ResultReg = createResultReg(RC);
418  const MCInstrDesc &II = TII.get(MachineInstOpcode);
419
420  if (II.getNumDefs() >= 1) {
421    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
422                   .addReg(Op0, Op0IsKill * RegState::Kill)
423                   .addReg(Op1, Op1IsKill * RegState::Kill)
424                   .addImm(Imm));
425  } else {
426    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
427                   .addReg(Op0, Op0IsKill * RegState::Kill)
428                   .addReg(Op1, Op1IsKill * RegState::Kill)
429                   .addImm(Imm));
430    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
431                           TII.get(TargetOpcode::COPY), ResultReg)
432                   .addReg(II.ImplicitDefs[0]));
433  }
434  return ResultReg;
435}
436
437unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
438                                     const TargetRegisterClass *RC,
439                                     uint64_t Imm) {
440  unsigned ResultReg = createResultReg(RC);
441  const MCInstrDesc &II = TII.get(MachineInstOpcode);
442
443  if (II.getNumDefs() >= 1) {
444    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
445                   .addImm(Imm));
446  } else {
447    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
448                   .addImm(Imm));
449    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
450                           TII.get(TargetOpcode::COPY), ResultReg)
451                   .addReg(II.ImplicitDefs[0]));
452  }
453  return ResultReg;
454}
455
456unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
457                                      const TargetRegisterClass *RC,
458                                      uint64_t Imm1, uint64_t Imm2) {
459  unsigned ResultReg = createResultReg(RC);
460  const MCInstrDesc &II = TII.get(MachineInstOpcode);
461
462  if (II.getNumDefs() >= 1) {
463    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
464                    .addImm(Imm1).addImm(Imm2));
465  } else {
466    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
467                    .addImm(Imm1).addImm(Imm2));
468    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
469                            TII.get(TargetOpcode::COPY),
470                            ResultReg)
471                    .addReg(II.ImplicitDefs[0]));
472  }
473  return ResultReg;
474}
475
476unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
477                                                 unsigned Op0, bool Op0IsKill,
478                                                 uint32_t Idx) {
479  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
480  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
481         "Cannot yet extract from physregs");
482
483  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
484                          DL, TII.get(TargetOpcode::COPY), ResultReg)
485                  .addReg(Op0, getKillRegState(Op0IsKill), Idx));
486  return ResultReg;
487}
488
489// TODO: Don't worry about 64-bit now, but when this is fixed remove the
490// checks from the various callers.
491unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
492  if (VT == MVT::f64) return 0;
493
494  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
495  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
496                          TII.get(ARM::VMOVSR), MoveReg)
497                  .addReg(SrcReg));
498  return MoveReg;
499}
500
501unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
502  if (VT == MVT::i64) return 0;
503
504  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
505  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
506                          TII.get(ARM::VMOVRS), MoveReg)
507                  .addReg(SrcReg));
508  return MoveReg;
509}
510
511// For double width floating point we need to materialize two constants
512// (the high and the low) into integer registers then use a move to get
513// the combined constant into an FP reg.
514unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
515  const APFloat Val = CFP->getValueAPF();
516  bool is64bit = VT == MVT::f64;
517
518  // This checks to see if we can use VFP3 instructions to materialize
519  // a constant, otherwise we have to go through the constant pool.
520  if (TLI.isFPImmLegal(Val, VT)) {
521    int Imm;
522    unsigned Opc;
523    if (is64bit) {
524      Imm = ARM_AM::getFP64Imm(Val);
525      Opc = ARM::FCONSTD;
526    } else {
527      Imm = ARM_AM::getFP32Imm(Val);
528      Opc = ARM::FCONSTS;
529    }
530    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
531    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
532                            DestReg)
533                    .addImm(Imm));
534    return DestReg;
535  }
536
537  // Require VFP2 for loading fp constants.
538  if (!Subtarget->hasVFP2()) return false;
539
540  // MachineConstantPool wants an explicit alignment.
541  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
542  if (Align == 0) {
543    // TODO: Figure out if this is correct.
544    Align = TD.getTypeAllocSize(CFP->getType());
545  }
546  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
547  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
548  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
549
550  // The extra reg is for addrmode5.
551  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
552                          DestReg)
553                  .addConstantPoolIndex(Idx)
554                  .addReg(0));
555  return DestReg;
556}
557
558unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
559
560  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
561    return false;
562
563  // If we can do this in a single instruction without a constant pool entry
564  // do so now.
565  const ConstantInt *CI = cast<ConstantInt>(C);
566  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
567    unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
568    const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
569      &ARM::GPRRegClass;
570    unsigned ImmReg = createResultReg(RC);
571    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
572                            TII.get(Opc), ImmReg)
573                    .addImm(CI->getZExtValue()));
574    return ImmReg;
575  }
576
577  // Use MVN to emit negative constants.
578  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
579    unsigned Imm = (unsigned)~(CI->getSExtValue());
580    bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
581      (ARM_AM::getSOImmVal(Imm) != -1);
582    if (UseImm) {
583      unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
584      unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
585      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
586                              TII.get(Opc), ImmReg)
587                      .addImm(Imm));
588      return ImmReg;
589    }
590  }
591
592  // Load from constant pool.  For now 32-bit only.
593  if (VT != MVT::i32)
594    return false;
595
596  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
597
598  // MachineConstantPool wants an explicit alignment.
599  unsigned Align = TD.getPrefTypeAlignment(C->getType());
600  if (Align == 0) {
601    // TODO: Figure out if this is correct.
602    Align = TD.getTypeAllocSize(C->getType());
603  }
604  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
605
606  if (isThumb2)
607    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
608                            TII.get(ARM::t2LDRpci), DestReg)
609                    .addConstantPoolIndex(Idx));
610  else
611    // The extra immediate is for addrmode2.
612    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
613                            TII.get(ARM::LDRcp), DestReg)
614                    .addConstantPoolIndex(Idx)
615                    .addImm(0));
616
617  return DestReg;
618}
619
620unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
621  // For now 32-bit only.
622  if (VT != MVT::i32) return 0;
623
624  Reloc::Model RelocM = TM.getRelocationModel();
625  bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
626  const TargetRegisterClass *RC = isThumb2 ?
627    (const TargetRegisterClass*)&ARM::rGPRRegClass :
628    (const TargetRegisterClass*)&ARM::GPRRegClass;
629  unsigned DestReg = createResultReg(RC);
630
631  // Use movw+movt when possible, it avoids constant pool entries.
632  // Darwin targets don't support movt with Reloc::Static, see
633  // ARMTargetLowering::LowerGlobalAddressDarwin.  Other targets only support
634  // static movt relocations.
635  if (Subtarget->useMovt() &&
636      Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
637    unsigned Opc;
638    switch (RelocM) {
639    case Reloc::PIC_:
640      Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
641      break;
642    case Reloc::DynamicNoPIC:
643      Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
644      break;
645    default:
646      Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
647      break;
648    }
649    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
650                            DestReg).addGlobalAddress(GV));
651  } else {
652    // MachineConstantPool wants an explicit alignment.
653    unsigned Align = TD.getPrefTypeAlignment(GV->getType());
654    if (Align == 0) {
655      // TODO: Figure out if this is correct.
656      Align = TD.getTypeAllocSize(GV->getType());
657    }
658
659    if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
660      return ARMLowerPICELF(GV, Align, VT);
661
662    // Grab index.
663    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
664      (Subtarget->isThumb() ? 4 : 8);
665    unsigned Id = AFI->createPICLabelUId();
666    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
667                                                                ARMCP::CPValue,
668                                                                PCAdj);
669    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
670
671    // Load value.
672    MachineInstrBuilder MIB;
673    if (isThumb2) {
674      unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
675      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
676        .addConstantPoolIndex(Idx);
677      if (RelocM == Reloc::PIC_)
678        MIB.addImm(Id);
679      AddOptionalDefs(MIB);
680    } else {
681      // The extra immediate is for addrmode2.
682      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
683                    DestReg)
684        .addConstantPoolIndex(Idx)
685        .addImm(0);
686      AddOptionalDefs(MIB);
687
688      if (RelocM == Reloc::PIC_) {
689        unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
690        unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
691
692        MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
693                                          DL, TII.get(Opc), NewDestReg)
694                                  .addReg(DestReg)
695                                  .addImm(Id);
696        AddOptionalDefs(MIB);
697        return NewDestReg;
698      }
699    }
700  }
701
702  if (IsIndirect) {
703    MachineInstrBuilder MIB;
704    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
705    if (isThumb2)
706      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
707                    TII.get(ARM::t2LDRi12), NewDestReg)
708            .addReg(DestReg)
709            .addImm(0);
710    else
711      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
712                    NewDestReg)
713            .addReg(DestReg)
714            .addImm(0);
715    DestReg = NewDestReg;
716    AddOptionalDefs(MIB);
717  }
718
719  return DestReg;
720}
721
722unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
723  EVT CEVT = TLI.getValueType(C->getType(), true);
724
725  // Only handle simple types.
726  if (!CEVT.isSimple()) return 0;
727  MVT VT = CEVT.getSimpleVT();
728
729  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
730    return ARMMaterializeFP(CFP, VT);
731  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
732    return ARMMaterializeGV(GV, VT);
733  else if (isa<ConstantInt>(C))
734    return ARMMaterializeInt(C, VT);
735
736  return 0;
737}
738
739// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
740
741unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
742  // Don't handle dynamic allocas.
743  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
744
745  MVT VT;
746  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
747
748  DenseMap<const AllocaInst*, int>::iterator SI =
749    FuncInfo.StaticAllocaMap.find(AI);
750
751  // This will get lowered later into the correct offsets and registers
752  // via rewriteXFrameIndex.
753  if (SI != FuncInfo.StaticAllocaMap.end()) {
754    const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
755    unsigned ResultReg = createResultReg(RC);
756    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
757    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
758                            TII.get(Opc), ResultReg)
759                            .addFrameIndex(SI->second)
760                            .addImm(0));
761    return ResultReg;
762  }
763
764  return 0;
765}
766
767bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
768  EVT evt = TLI.getValueType(Ty, true);
769
770  // Only handle simple types.
771  if (evt == MVT::Other || !evt.isSimple()) return false;
772  VT = evt.getSimpleVT();
773
774  // Handle all legal types, i.e. a register that will directly hold this
775  // value.
776  return TLI.isTypeLegal(VT);
777}
778
779bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
780  if (isTypeLegal(Ty, VT)) return true;
781
782  // If this is a type than can be sign or zero-extended to a basic operation
783  // go ahead and accept it now.
784  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
785    return true;
786
787  return false;
788}
789
790// Computes the address to get to an object.
791bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
792  // Some boilerplate from the X86 FastISel.
793  const User *U = NULL;
794  unsigned Opcode = Instruction::UserOp1;
795  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
796    // Don't walk into other basic blocks unless the object is an alloca from
797    // another block, otherwise it may not have a virtual register assigned.
798    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
799        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
800      Opcode = I->getOpcode();
801      U = I;
802    }
803  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
804    Opcode = C->getOpcode();
805    U = C;
806  }
807
808  if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
809    if (Ty->getAddressSpace() > 255)
810      // Fast instruction selection doesn't support the special
811      // address spaces.
812      return false;
813
814  switch (Opcode) {
815    default:
816    break;
817    case Instruction::BitCast: {
818      // Look through bitcasts.
819      return ARMComputeAddress(U->getOperand(0), Addr);
820    }
821    case Instruction::IntToPtr: {
822      // Look past no-op inttoptrs.
823      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
824        return ARMComputeAddress(U->getOperand(0), Addr);
825      break;
826    }
827    case Instruction::PtrToInt: {
828      // Look past no-op ptrtoints.
829      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
830        return ARMComputeAddress(U->getOperand(0), Addr);
831      break;
832    }
833    case Instruction::GetElementPtr: {
834      Address SavedAddr = Addr;
835      int TmpOffset = Addr.Offset;
836
837      // Iterate through the GEP folding the constants into offsets where
838      // we can.
839      gep_type_iterator GTI = gep_type_begin(U);
840      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
841           i != e; ++i, ++GTI) {
842        const Value *Op = *i;
843        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
844          const StructLayout *SL = TD.getStructLayout(STy);
845          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
846          TmpOffset += SL->getElementOffset(Idx);
847        } else {
848          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
849          for (;;) {
850            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
851              // Constant-offset addressing.
852              TmpOffset += CI->getSExtValue() * S;
853              break;
854            }
855            if (isa<AddOperator>(Op) &&
856                (!isa<Instruction>(Op) ||
857                 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
858                 == FuncInfo.MBB) &&
859                isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
860              // An add (in the same block) with a constant operand. Fold the
861              // constant.
862              ConstantInt *CI =
863              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
864              TmpOffset += CI->getSExtValue() * S;
865              // Iterate on the other operand.
866              Op = cast<AddOperator>(Op)->getOperand(0);
867              continue;
868            }
869            // Unsupported
870            goto unsupported_gep;
871          }
872        }
873      }
874
875      // Try to grab the base operand now.
876      Addr.Offset = TmpOffset;
877      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
878
879      // We failed, restore everything and try the other options.
880      Addr = SavedAddr;
881
882      unsupported_gep:
883      break;
884    }
885    case Instruction::Alloca: {
886      const AllocaInst *AI = cast<AllocaInst>(Obj);
887      DenseMap<const AllocaInst*, int>::iterator SI =
888        FuncInfo.StaticAllocaMap.find(AI);
889      if (SI != FuncInfo.StaticAllocaMap.end()) {
890        Addr.BaseType = Address::FrameIndexBase;
891        Addr.Base.FI = SI->second;
892        return true;
893      }
894      break;
895    }
896  }
897
898  // Try to get this in a register if nothing else has worked.
899  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
900  return Addr.Base.Reg != 0;
901}
902
903void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
904  bool needsLowering = false;
905  switch (VT.SimpleTy) {
906    default: llvm_unreachable("Unhandled load/store type!");
907    case MVT::i1:
908    case MVT::i8:
909    case MVT::i16:
910    case MVT::i32:
911      if (!useAM3) {
912        // Integer loads/stores handle 12-bit offsets.
913        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
914        // Handle negative offsets.
915        if (needsLowering && isThumb2)
916          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
917                            Addr.Offset > -256);
918      } else {
919        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
920        needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
921      }
922      break;
923    case MVT::f32:
924    case MVT::f64:
925      // Floating point operands handle 8-bit offsets.
926      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
927      break;
928  }
929
930  // If this is a stack pointer and the offset needs to be simplified then
931  // put the alloca address into a register, set the base type back to
932  // register and continue. This should almost never happen.
933  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
934    const TargetRegisterClass *RC = isThumb2 ?
935      (const TargetRegisterClass*)&ARM::tGPRRegClass :
936      (const TargetRegisterClass*)&ARM::GPRRegClass;
937    unsigned ResultReg = createResultReg(RC);
938    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
939    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
940                            TII.get(Opc), ResultReg)
941                            .addFrameIndex(Addr.Base.FI)
942                            .addImm(0));
943    Addr.Base.Reg = ResultReg;
944    Addr.BaseType = Address::RegBase;
945  }
946
947  // Since the offset is too large for the load/store instruction
948  // get the reg+offset into a register.
949  if (needsLowering) {
950    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
951                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
952    Addr.Offset = 0;
953  }
954}
955
956void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
957                                       const MachineInstrBuilder &MIB,
958                                       unsigned Flags, bool useAM3) {
959  // addrmode5 output depends on the selection dag addressing dividing the
960  // offset by 4 that it then later multiplies. Do this here as well.
961  if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
962    Addr.Offset /= 4;
963
964  // Frame base works a bit differently. Handle it separately.
965  if (Addr.BaseType == Address::FrameIndexBase) {
966    int FI = Addr.Base.FI;
967    int Offset = Addr.Offset;
968    MachineMemOperand *MMO =
969          FuncInfo.MF->getMachineMemOperand(
970                                  MachinePointerInfo::getFixedStack(FI, Offset),
971                                  Flags,
972                                  MFI.getObjectSize(FI),
973                                  MFI.getObjectAlignment(FI));
974    // Now add the rest of the operands.
975    MIB.addFrameIndex(FI);
976
977    // ARM halfword load/stores and signed byte loads need an additional
978    // operand.
979    if (useAM3) {
980      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
981      MIB.addReg(0);
982      MIB.addImm(Imm);
983    } else {
984      MIB.addImm(Addr.Offset);
985    }
986    MIB.addMemOperand(MMO);
987  } else {
988    // Now add the rest of the operands.
989    MIB.addReg(Addr.Base.Reg);
990
991    // ARM halfword load/stores and signed byte loads need an additional
992    // operand.
993    if (useAM3) {
994      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
995      MIB.addReg(0);
996      MIB.addImm(Imm);
997    } else {
998      MIB.addImm(Addr.Offset);
999    }
1000  }
1001  AddOptionalDefs(MIB);
1002}
1003
1004bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
1005                              unsigned Alignment, bool isZExt, bool allocReg) {
1006  unsigned Opc;
1007  bool useAM3 = false;
1008  bool needVMOV = false;
1009  const TargetRegisterClass *RC;
1010  switch (VT.SimpleTy) {
1011    // This is mostly going to be Neon/vector support.
1012    default: return false;
1013    case MVT::i1:
1014    case MVT::i8:
1015      if (isThumb2) {
1016        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1017          Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
1018        else
1019          Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
1020      } else {
1021        if (isZExt) {
1022          Opc = ARM::LDRBi12;
1023        } else {
1024          Opc = ARM::LDRSB;
1025          useAM3 = true;
1026        }
1027      }
1028      RC = &ARM::GPRRegClass;
1029      break;
1030    case MVT::i16:
1031      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1032        return false;
1033
1034      if (isThumb2) {
1035        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1036          Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
1037        else
1038          Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
1039      } else {
1040        Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
1041        useAM3 = true;
1042      }
1043      RC = &ARM::GPRRegClass;
1044      break;
1045    case MVT::i32:
1046      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1047        return false;
1048
1049      if (isThumb2) {
1050        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1051          Opc = ARM::t2LDRi8;
1052        else
1053          Opc = ARM::t2LDRi12;
1054      } else {
1055        Opc = ARM::LDRi12;
1056      }
1057      RC = &ARM::GPRRegClass;
1058      break;
1059    case MVT::f32:
1060      if (!Subtarget->hasVFP2()) return false;
1061      // Unaligned loads need special handling. Floats require word-alignment.
1062      if (Alignment && Alignment < 4) {
1063        needVMOV = true;
1064        VT = MVT::i32;
1065        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1066        RC = &ARM::GPRRegClass;
1067      } else {
1068        Opc = ARM::VLDRS;
1069        RC = TLI.getRegClassFor(VT);
1070      }
1071      break;
1072    case MVT::f64:
1073      if (!Subtarget->hasVFP2()) return false;
1074      // FIXME: Unaligned loads need special handling.  Doublewords require
1075      // word-alignment.
1076      if (Alignment && Alignment < 4)
1077        return false;
1078
1079      Opc = ARM::VLDRD;
1080      RC = TLI.getRegClassFor(VT);
1081      break;
1082  }
1083  // Simplify this down to something we can handle.
1084  ARMSimplifyAddress(Addr, VT, useAM3);
1085
1086  // Create the base instruction, then add the operands.
1087  if (allocReg)
1088    ResultReg = createResultReg(RC);
1089  assert (ResultReg > 255 && "Expected an allocated virtual register.");
1090  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1091                                    TII.get(Opc), ResultReg);
1092  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1093
1094  // If we had an unaligned load of a float we've converted it to an regular
1095  // load.  Now we must move from the GRP to the FP register.
1096  if (needVMOV) {
1097    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1098    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1099                            TII.get(ARM::VMOVSR), MoveReg)
1100                    .addReg(ResultReg));
1101    ResultReg = MoveReg;
1102  }
1103  return true;
1104}
1105
1106bool ARMFastISel::SelectLoad(const Instruction *I) {
1107  // Atomic loads need special handling.
1108  if (cast<LoadInst>(I)->isAtomic())
1109    return false;
1110
1111  // Verify we have a legal type before going any further.
1112  MVT VT;
1113  if (!isLoadTypeLegal(I->getType(), VT))
1114    return false;
1115
1116  // See if we can handle this address.
1117  Address Addr;
1118  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1119
1120  unsigned ResultReg;
1121  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1122    return false;
1123  UpdateValueMap(I, ResultReg);
1124  return true;
1125}
1126
1127bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
1128                               unsigned Alignment) {
1129  unsigned StrOpc;
1130  bool useAM3 = false;
1131  switch (VT.SimpleTy) {
1132    // This is mostly going to be Neon/vector support.
1133    default: return false;
1134    case MVT::i1: {
1135      unsigned Res = createResultReg(isThumb2 ?
1136        (const TargetRegisterClass*)&ARM::tGPRRegClass :
1137        (const TargetRegisterClass*)&ARM::GPRRegClass);
1138      unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1139      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1140                              TII.get(Opc), Res)
1141                      .addReg(SrcReg).addImm(1));
1142      SrcReg = Res;
1143    } // Fallthrough here.
1144    case MVT::i8:
1145      if (isThumb2) {
1146        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1147          StrOpc = ARM::t2STRBi8;
1148        else
1149          StrOpc = ARM::t2STRBi12;
1150      } else {
1151        StrOpc = ARM::STRBi12;
1152      }
1153      break;
1154    case MVT::i16:
1155      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1156        return false;
1157
1158      if (isThumb2) {
1159        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1160          StrOpc = ARM::t2STRHi8;
1161        else
1162          StrOpc = ARM::t2STRHi12;
1163      } else {
1164        StrOpc = ARM::STRH;
1165        useAM3 = true;
1166      }
1167      break;
1168    case MVT::i32:
1169      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1170        return false;
1171
1172      if (isThumb2) {
1173        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1174          StrOpc = ARM::t2STRi8;
1175        else
1176          StrOpc = ARM::t2STRi12;
1177      } else {
1178        StrOpc = ARM::STRi12;
1179      }
1180      break;
1181    case MVT::f32:
1182      if (!Subtarget->hasVFP2()) return false;
1183      // Unaligned stores need special handling. Floats require word-alignment.
1184      if (Alignment && Alignment < 4) {
1185        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1186        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1187                                TII.get(ARM::VMOVRS), MoveReg)
1188                        .addReg(SrcReg));
1189        SrcReg = MoveReg;
1190        VT = MVT::i32;
1191        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1192      } else {
1193        StrOpc = ARM::VSTRS;
1194      }
1195      break;
1196    case MVT::f64:
1197      if (!Subtarget->hasVFP2()) return false;
1198      // FIXME: Unaligned stores need special handling.  Doublewords require
1199      // word-alignment.
1200      if (Alignment && Alignment < 4)
1201          return false;
1202
1203      StrOpc = ARM::VSTRD;
1204      break;
1205  }
1206  // Simplify this down to something we can handle.
1207  ARMSimplifyAddress(Addr, VT, useAM3);
1208
1209  // Create the base instruction, then add the operands.
1210  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1211                                    TII.get(StrOpc))
1212                            .addReg(SrcReg);
1213  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1214  return true;
1215}
1216
1217bool ARMFastISel::SelectStore(const Instruction *I) {
1218  Value *Op0 = I->getOperand(0);
1219  unsigned SrcReg = 0;
1220
1221  // Atomic stores need special handling.
1222  if (cast<StoreInst>(I)->isAtomic())
1223    return false;
1224
1225  // Verify we have a legal type before going any further.
1226  MVT VT;
1227  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1228    return false;
1229
1230  // Get the value to be stored into a register.
1231  SrcReg = getRegForValue(Op0);
1232  if (SrcReg == 0) return false;
1233
1234  // See if we can handle this address.
1235  Address Addr;
1236  if (!ARMComputeAddress(I->getOperand(1), Addr))
1237    return false;
1238
1239  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1240    return false;
1241  return true;
1242}
1243
1244static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1245  switch (Pred) {
1246    // Needs two compares...
1247    case CmpInst::FCMP_ONE:
1248    case CmpInst::FCMP_UEQ:
1249    default:
1250      // AL is our "false" for now. The other two need more compares.
1251      return ARMCC::AL;
1252    case CmpInst::ICMP_EQ:
1253    case CmpInst::FCMP_OEQ:
1254      return ARMCC::EQ;
1255    case CmpInst::ICMP_SGT:
1256    case CmpInst::FCMP_OGT:
1257      return ARMCC::GT;
1258    case CmpInst::ICMP_SGE:
1259    case CmpInst::FCMP_OGE:
1260      return ARMCC::GE;
1261    case CmpInst::ICMP_UGT:
1262    case CmpInst::FCMP_UGT:
1263      return ARMCC::HI;
1264    case CmpInst::FCMP_OLT:
1265      return ARMCC::MI;
1266    case CmpInst::ICMP_ULE:
1267    case CmpInst::FCMP_OLE:
1268      return ARMCC::LS;
1269    case CmpInst::FCMP_ORD:
1270      return ARMCC::VC;
1271    case CmpInst::FCMP_UNO:
1272      return ARMCC::VS;
1273    case CmpInst::FCMP_UGE:
1274      return ARMCC::PL;
1275    case CmpInst::ICMP_SLT:
1276    case CmpInst::FCMP_ULT:
1277      return ARMCC::LT;
1278    case CmpInst::ICMP_SLE:
1279    case CmpInst::FCMP_ULE:
1280      return ARMCC::LE;
1281    case CmpInst::FCMP_UNE:
1282    case CmpInst::ICMP_NE:
1283      return ARMCC::NE;
1284    case CmpInst::ICMP_UGE:
1285      return ARMCC::HS;
1286    case CmpInst::ICMP_ULT:
1287      return ARMCC::LO;
1288  }
1289}
1290
1291bool ARMFastISel::SelectBranch(const Instruction *I) {
1292  const BranchInst *BI = cast<BranchInst>(I);
1293  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1294  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1295
1296  // Simple branch support.
1297
1298  // If we can, avoid recomputing the compare - redoing it could lead to wonky
1299  // behavior.
1300  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1301    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1302
1303      // Get the compare predicate.
1304      // Try to take advantage of fallthrough opportunities.
1305      CmpInst::Predicate Predicate = CI->getPredicate();
1306      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1307        std::swap(TBB, FBB);
1308        Predicate = CmpInst::getInversePredicate(Predicate);
1309      }
1310
1311      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1312
1313      // We may not handle every CC for now.
1314      if (ARMPred == ARMCC::AL) return false;
1315
1316      // Emit the compare.
1317      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1318        return false;
1319
1320      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1321      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1322      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1323      FastEmitBranch(FBB, DL);
1324      FuncInfo.MBB->addSuccessor(TBB);
1325      return true;
1326    }
1327  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1328    MVT SourceVT;
1329    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1330        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1331      unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1332      unsigned OpReg = getRegForValue(TI->getOperand(0));
1333      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1334                              TII.get(TstOpc))
1335                      .addReg(OpReg).addImm(1));
1336
1337      unsigned CCMode = ARMCC::NE;
1338      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1339        std::swap(TBB, FBB);
1340        CCMode = ARMCC::EQ;
1341      }
1342
1343      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1344      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1345      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1346
1347      FastEmitBranch(FBB, DL);
1348      FuncInfo.MBB->addSuccessor(TBB);
1349      return true;
1350    }
1351  } else if (const ConstantInt *CI =
1352             dyn_cast<ConstantInt>(BI->getCondition())) {
1353    uint64_t Imm = CI->getZExtValue();
1354    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1355    FastEmitBranch(Target, DL);
1356    return true;
1357  }
1358
1359  unsigned CmpReg = getRegForValue(BI->getCondition());
1360  if (CmpReg == 0) return false;
1361
1362  // We've been divorced from our compare!  Our block was split, and
1363  // now our compare lives in a predecessor block.  We musn't
1364  // re-compare here, as the children of the compare aren't guaranteed
1365  // live across the block boundary (we *could* check for this).
1366  // Regardless, the compare has been done in the predecessor block,
1367  // and it left a value for us in a virtual register.  Ergo, we test
1368  // the one-bit value left in the virtual register.
1369  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1370  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
1371                  .addReg(CmpReg).addImm(1));
1372
1373  unsigned CCMode = ARMCC::NE;
1374  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1375    std::swap(TBB, FBB);
1376    CCMode = ARMCC::EQ;
1377  }
1378
1379  unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1380  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1381                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1382  FastEmitBranch(FBB, DL);
1383  FuncInfo.MBB->addSuccessor(TBB);
1384  return true;
1385}
1386
1387bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1388  unsigned AddrReg = getRegForValue(I->getOperand(0));
1389  if (AddrReg == 0) return false;
1390
1391  unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1392  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
1393                  .addReg(AddrReg));
1394
1395  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1396  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
1397    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
1398
1399  return true;
1400}
1401
1402bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1403                             bool isZExt) {
1404  Type *Ty = Src1Value->getType();
1405  EVT SrcEVT = TLI.getValueType(Ty, true);
1406  if (!SrcEVT.isSimple()) return false;
1407  MVT SrcVT = SrcEVT.getSimpleVT();
1408
1409  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
1410  if (isFloat && !Subtarget->hasVFP2())
1411    return false;
1412
1413  // Check to see if the 2nd operand is a constant that we can encode directly
1414  // in the compare.
1415  int Imm = 0;
1416  bool UseImm = false;
1417  bool isNegativeImm = false;
1418  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1419  // Thus, Src1Value may be a ConstantInt, but we're missing it.
1420  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1421    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1422        SrcVT == MVT::i1) {
1423      const APInt &CIVal = ConstInt->getValue();
1424      Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1425      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1426      // then a cmn, because there is no way to represent 2147483648 as a
1427      // signed 32-bit int.
1428      if (Imm < 0 && Imm != (int)0x80000000) {
1429        isNegativeImm = true;
1430        Imm = -Imm;
1431      }
1432      UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1433        (ARM_AM::getSOImmVal(Imm) != -1);
1434    }
1435  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1436    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1437      if (ConstFP->isZero() && !ConstFP->isNegative())
1438        UseImm = true;
1439  }
1440
1441  unsigned CmpOpc;
1442  bool isICmp = true;
1443  bool needsExt = false;
1444  switch (SrcVT.SimpleTy) {
1445    default: return false;
1446    // TODO: Verify compares.
1447    case MVT::f32:
1448      isICmp = false;
1449      CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
1450      break;
1451    case MVT::f64:
1452      isICmp = false;
1453      CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1454      break;
1455    case MVT::i1:
1456    case MVT::i8:
1457    case MVT::i16:
1458      needsExt = true;
1459    // Intentional fall-through.
1460    case MVT::i32:
1461      if (isThumb2) {
1462        if (!UseImm)
1463          CmpOpc = ARM::t2CMPrr;
1464        else
1465          CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1466      } else {
1467        if (!UseImm)
1468          CmpOpc = ARM::CMPrr;
1469        else
1470          CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1471      }
1472      break;
1473  }
1474
1475  unsigned SrcReg1 = getRegForValue(Src1Value);
1476  if (SrcReg1 == 0) return false;
1477
1478  unsigned SrcReg2 = 0;
1479  if (!UseImm) {
1480    SrcReg2 = getRegForValue(Src2Value);
1481    if (SrcReg2 == 0) return false;
1482  }
1483
1484  // We have i1, i8, or i16, we need to either zero extend or sign extend.
1485  if (needsExt) {
1486    SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1487    if (SrcReg1 == 0) return false;
1488    if (!UseImm) {
1489      SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1490      if (SrcReg2 == 0) return false;
1491    }
1492  }
1493
1494  if (!UseImm) {
1495    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1496                            TII.get(CmpOpc))
1497                    .addReg(SrcReg1).addReg(SrcReg2));
1498  } else {
1499    MachineInstrBuilder MIB;
1500    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1501      .addReg(SrcReg1);
1502
1503    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1504    if (isICmp)
1505      MIB.addImm(Imm);
1506    AddOptionalDefs(MIB);
1507  }
1508
1509  // For floating point we need to move the result to a comparison register
1510  // that we can then use for branches.
1511  if (Ty->isFloatTy() || Ty->isDoubleTy())
1512    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1513                            TII.get(ARM::FMSTAT)));
1514  return true;
1515}
1516
1517bool ARMFastISel::SelectCmp(const Instruction *I) {
1518  const CmpInst *CI = cast<CmpInst>(I);
1519
1520  // Get the compare predicate.
1521  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1522
1523  // We may not handle every CC for now.
1524  if (ARMPred == ARMCC::AL) return false;
1525
1526  // Emit the compare.
1527  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1528    return false;
1529
1530  // Now set a register based on the comparison. Explicitly set the predicates
1531  // here.
1532  unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1533  const TargetRegisterClass *RC = isThumb2 ?
1534    (const TargetRegisterClass*)&ARM::rGPRRegClass :
1535    (const TargetRegisterClass*)&ARM::GPRRegClass;
1536  unsigned DestReg = createResultReg(RC);
1537  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1538  unsigned ZeroReg = TargetMaterializeConstant(Zero);
1539  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1540  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
1541          .addReg(ZeroReg).addImm(1)
1542          .addImm(ARMPred).addReg(ARM::CPSR);
1543
1544  UpdateValueMap(I, DestReg);
1545  return true;
1546}
1547
1548bool ARMFastISel::SelectFPExt(const Instruction *I) {
1549  // Make sure we have VFP and that we're extending float to double.
1550  if (!Subtarget->hasVFP2()) return false;
1551
1552  Value *V = I->getOperand(0);
1553  if (!I->getType()->isDoubleTy() ||
1554      !V->getType()->isFloatTy()) return false;
1555
1556  unsigned Op = getRegForValue(V);
1557  if (Op == 0) return false;
1558
1559  unsigned Result = createResultReg(&ARM::DPRRegClass);
1560  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1561                          TII.get(ARM::VCVTDS), Result)
1562                  .addReg(Op));
1563  UpdateValueMap(I, Result);
1564  return true;
1565}
1566
1567bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1568  // Make sure we have VFP and that we're truncating double to float.
1569  if (!Subtarget->hasVFP2()) return false;
1570
1571  Value *V = I->getOperand(0);
1572  if (!(I->getType()->isFloatTy() &&
1573        V->getType()->isDoubleTy())) return false;
1574
1575  unsigned Op = getRegForValue(V);
1576  if (Op == 0) return false;
1577
1578  unsigned Result = createResultReg(&ARM::SPRRegClass);
1579  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1580                          TII.get(ARM::VCVTSD), Result)
1581                  .addReg(Op));
1582  UpdateValueMap(I, Result);
1583  return true;
1584}
1585
1586bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1587  // Make sure we have VFP.
1588  if (!Subtarget->hasVFP2()) return false;
1589
1590  MVT DstVT;
1591  Type *Ty = I->getType();
1592  if (!isTypeLegal(Ty, DstVT))
1593    return false;
1594
1595  Value *Src = I->getOperand(0);
1596  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
1597  if (!SrcEVT.isSimple())
1598    return false;
1599  MVT SrcVT = SrcEVT.getSimpleVT();
1600  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1601    return false;
1602
1603  unsigned SrcReg = getRegForValue(Src);
1604  if (SrcReg == 0) return false;
1605
1606  // Handle sign-extension.
1607  if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1608    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
1609                                       /*isZExt*/!isSigned);
1610    if (SrcReg == 0) return false;
1611  }
1612
1613  // The conversion routine works on fp-reg to fp-reg and the operand above
1614  // was an integer, move it to the fp registers if possible.
1615  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1616  if (FP == 0) return false;
1617
1618  unsigned Opc;
1619  if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1620  else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1621  else return false;
1622
1623  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1624  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1625                          ResultReg)
1626                  .addReg(FP));
1627  UpdateValueMap(I, ResultReg);
1628  return true;
1629}
1630
1631bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1632  // Make sure we have VFP.
1633  if (!Subtarget->hasVFP2()) return false;
1634
1635  MVT DstVT;
1636  Type *RetTy = I->getType();
1637  if (!isTypeLegal(RetTy, DstVT))
1638    return false;
1639
1640  unsigned Op = getRegForValue(I->getOperand(0));
1641  if (Op == 0) return false;
1642
1643  unsigned Opc;
1644  Type *OpTy = I->getOperand(0)->getType();
1645  if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1646  else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1647  else return false;
1648
1649  // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1650  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1651  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1652                          ResultReg)
1653                  .addReg(Op));
1654
1655  // This result needs to be in an integer register, but the conversion only
1656  // takes place in fp-regs.
1657  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1658  if (IntReg == 0) return false;
1659
1660  UpdateValueMap(I, IntReg);
1661  return true;
1662}
1663
1664bool ARMFastISel::SelectSelect(const Instruction *I) {
1665  MVT VT;
1666  if (!isTypeLegal(I->getType(), VT))
1667    return false;
1668
1669  // Things need to be register sized for register moves.
1670  if (VT != MVT::i32) return false;
1671
1672  unsigned CondReg = getRegForValue(I->getOperand(0));
1673  if (CondReg == 0) return false;
1674  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1675  if (Op1Reg == 0) return false;
1676
1677  // Check to see if we can use an immediate in the conditional move.
1678  int Imm = 0;
1679  bool UseImm = false;
1680  bool isNegativeImm = false;
1681  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1682    assert (VT == MVT::i32 && "Expecting an i32.");
1683    Imm = (int)ConstInt->getValue().getZExtValue();
1684    if (Imm < 0) {
1685      isNegativeImm = true;
1686      Imm = ~Imm;
1687    }
1688    UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1689      (ARM_AM::getSOImmVal(Imm) != -1);
1690  }
1691
1692  unsigned Op2Reg = 0;
1693  if (!UseImm) {
1694    Op2Reg = getRegForValue(I->getOperand(2));
1695    if (Op2Reg == 0) return false;
1696  }
1697
1698  unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
1699  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1700                  .addReg(CondReg).addImm(0));
1701
1702  unsigned MovCCOpc;
1703  const TargetRegisterClass *RC;
1704  if (!UseImm) {
1705    RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1706    MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1707  } else {
1708    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1709    if (!isNegativeImm)
1710      MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1711    else
1712      MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1713  }
1714  unsigned ResultReg = createResultReg(RC);
1715  if (!UseImm)
1716    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1717    .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
1718  else
1719    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1720    .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
1721  UpdateValueMap(I, ResultReg);
1722  return true;
1723}
1724
1725bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1726  MVT VT;
1727  Type *Ty = I->getType();
1728  if (!isTypeLegal(Ty, VT))
1729    return false;
1730
1731  // If we have integer div support we should have selected this automagically.
1732  // In case we have a real miss go ahead and return false and we'll pick
1733  // it up later.
1734  if (Subtarget->hasDivide()) return false;
1735
1736  // Otherwise emit a libcall.
1737  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1738  if (VT == MVT::i8)
1739    LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1740  else if (VT == MVT::i16)
1741    LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1742  else if (VT == MVT::i32)
1743    LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1744  else if (VT == MVT::i64)
1745    LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1746  else if (VT == MVT::i128)
1747    LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1748  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1749
1750  return ARMEmitLibcall(I, LC);
1751}
1752
1753bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1754  MVT VT;
1755  Type *Ty = I->getType();
1756  if (!isTypeLegal(Ty, VT))
1757    return false;
1758
1759  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1760  if (VT == MVT::i8)
1761    LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1762  else if (VT == MVT::i16)
1763    LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1764  else if (VT == MVT::i32)
1765    LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1766  else if (VT == MVT::i64)
1767    LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1768  else if (VT == MVT::i128)
1769    LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1770  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1771
1772  return ARMEmitLibcall(I, LC);
1773}
1774
1775bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1776  EVT DestVT  = TLI.getValueType(I->getType(), true);
1777
1778  // We can get here in the case when we have a binary operation on a non-legal
1779  // type and the target independent selector doesn't know how to handle it.
1780  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1781    return false;
1782
1783  unsigned Opc;
1784  switch (ISDOpcode) {
1785    default: return false;
1786    case ISD::ADD:
1787      Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1788      break;
1789    case ISD::OR:
1790      Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1791      break;
1792    case ISD::SUB:
1793      Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1794      break;
1795  }
1796
1797  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1798  if (SrcReg1 == 0) return false;
1799
1800  // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1801  // in the instruction, rather then materializing the value in a register.
1802  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1803  if (SrcReg2 == 0) return false;
1804
1805  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1806  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1807                          TII.get(Opc), ResultReg)
1808                  .addReg(SrcReg1).addReg(SrcReg2));
1809  UpdateValueMap(I, ResultReg);
1810  return true;
1811}
1812
1813bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1814  EVT FPVT = TLI.getValueType(I->getType(), true);
1815  if (!FPVT.isSimple()) return false;
1816  MVT VT = FPVT.getSimpleVT();
1817
1818  // We can get here in the case when we want to use NEON for our fp
1819  // operations, but can't figure out how to. Just use the vfp instructions
1820  // if we have them.
1821  // FIXME: It'd be nice to use NEON instructions.
1822  Type *Ty = I->getType();
1823  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1824  if (isFloat && !Subtarget->hasVFP2())
1825    return false;
1826
1827  unsigned Opc;
1828  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1829  switch (ISDOpcode) {
1830    default: return false;
1831    case ISD::FADD:
1832      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1833      break;
1834    case ISD::FSUB:
1835      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1836      break;
1837    case ISD::FMUL:
1838      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1839      break;
1840  }
1841  unsigned Op1 = getRegForValue(I->getOperand(0));
1842  if (Op1 == 0) return false;
1843
1844  unsigned Op2 = getRegForValue(I->getOperand(1));
1845  if (Op2 == 0) return false;
1846
1847  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
1848  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1849                          TII.get(Opc), ResultReg)
1850                  .addReg(Op1).addReg(Op2));
1851  UpdateValueMap(I, ResultReg);
1852  return true;
1853}
1854
1855// Call Handling Code
1856
1857// This is largely taken directly from CCAssignFnForNode
1858// TODO: We may not support all of this.
1859CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1860                                           bool Return,
1861                                           bool isVarArg) {
1862  switch (CC) {
1863  default:
1864    llvm_unreachable("Unsupported calling convention");
1865  case CallingConv::Fast:
1866    if (Subtarget->hasVFP2() && !isVarArg) {
1867      if (!Subtarget->isAAPCS_ABI())
1868        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1869      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1870      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1871    }
1872    // Fallthrough
1873  case CallingConv::C:
1874    // Use target triple & subtarget features to do actual dispatch.
1875    if (Subtarget->isAAPCS_ABI()) {
1876      if (Subtarget->hasVFP2() &&
1877          TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1878        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1879      else
1880        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1881    } else
1882        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1883  case CallingConv::ARM_AAPCS_VFP:
1884    if (!isVarArg)
1885      return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1886    // Fall through to soft float variant, variadic functions don't
1887    // use hard floating point ABI.
1888  case CallingConv::ARM_AAPCS:
1889    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1890  case CallingConv::ARM_APCS:
1891    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1892  case CallingConv::GHC:
1893    if (Return)
1894      llvm_unreachable("Can't return in GHC call convention");
1895    else
1896      return CC_ARM_APCS_GHC;
1897  }
1898}
1899
1900bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1901                                  SmallVectorImpl<unsigned> &ArgRegs,
1902                                  SmallVectorImpl<MVT> &ArgVTs,
1903                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1904                                  SmallVectorImpl<unsigned> &RegArgs,
1905                                  CallingConv::ID CC,
1906                                  unsigned &NumBytes,
1907                                  bool isVarArg) {
1908  SmallVector<CCValAssign, 16> ArgLocs;
1909  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
1910  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
1911                             CCAssignFnForCall(CC, false, isVarArg));
1912
1913  // Check that we can handle all of the arguments. If we can't, then bail out
1914  // now before we add code to the MBB.
1915  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1916    CCValAssign &VA = ArgLocs[i];
1917    MVT ArgVT = ArgVTs[VA.getValNo()];
1918
1919    // We don't handle NEON/vector parameters yet.
1920    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1921      return false;
1922
1923    // Now copy/store arg to correct locations.
1924    if (VA.isRegLoc() && !VA.needsCustom()) {
1925      continue;
1926    } else if (VA.needsCustom()) {
1927      // TODO: We need custom lowering for vector (v2f64) args.
1928      if (VA.getLocVT() != MVT::f64 ||
1929          // TODO: Only handle register args for now.
1930          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1931        return false;
1932    } else {
1933      switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
1934      default:
1935        return false;
1936      case MVT::i1:
1937      case MVT::i8:
1938      case MVT::i16:
1939      case MVT::i32:
1940        break;
1941      case MVT::f32:
1942        if (!Subtarget->hasVFP2())
1943          return false;
1944        break;
1945      case MVT::f64:
1946        if (!Subtarget->hasVFP2())
1947          return false;
1948        break;
1949      }
1950    }
1951  }
1952
1953  // At the point, we are able to handle the call's arguments in fast isel.
1954
1955  // Get a count of how many bytes are to be pushed on the stack.
1956  NumBytes = CCInfo.getNextStackOffset();
1957
1958  // Issue CALLSEQ_START
1959  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1960  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1961                          TII.get(AdjStackDown))
1962                  .addImm(NumBytes));
1963
1964  // Process the args.
1965  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1966    CCValAssign &VA = ArgLocs[i];
1967    unsigned Arg = ArgRegs[VA.getValNo()];
1968    MVT ArgVT = ArgVTs[VA.getValNo()];
1969
1970    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1971           "We don't handle NEON/vector parameters yet.");
1972
1973    // Handle arg promotion, etc.
1974    switch (VA.getLocInfo()) {
1975      case CCValAssign::Full: break;
1976      case CCValAssign::SExt: {
1977        MVT DestVT = VA.getLocVT();
1978        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1979        assert (Arg != 0 && "Failed to emit a sext");
1980        ArgVT = DestVT;
1981        break;
1982      }
1983      case CCValAssign::AExt:
1984        // Intentional fall-through.  Handle AExt and ZExt.
1985      case CCValAssign::ZExt: {
1986        MVT DestVT = VA.getLocVT();
1987        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1988        assert (Arg != 0 && "Failed to emit a sext");
1989        ArgVT = DestVT;
1990        break;
1991      }
1992      case CCValAssign::BCvt: {
1993        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1994                                 /*TODO: Kill=*/false);
1995        assert(BC != 0 && "Failed to emit a bitcast!");
1996        Arg = BC;
1997        ArgVT = VA.getLocVT();
1998        break;
1999      }
2000      default: llvm_unreachable("Unknown arg promotion!");
2001    }
2002
2003    // Now copy/store arg to correct locations.
2004    if (VA.isRegLoc() && !VA.needsCustom()) {
2005      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2006              VA.getLocReg())
2007        .addReg(Arg);
2008      RegArgs.push_back(VA.getLocReg());
2009    } else if (VA.needsCustom()) {
2010      // TODO: We need custom lowering for vector (v2f64) args.
2011      assert(VA.getLocVT() == MVT::f64 &&
2012             "Custom lowering for v2f64 args not available");
2013
2014      CCValAssign &NextVA = ArgLocs[++i];
2015
2016      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2017             "We only handle register args!");
2018
2019      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2020                              TII.get(ARM::VMOVRRD), VA.getLocReg())
2021                      .addReg(NextVA.getLocReg(), RegState::Define)
2022                      .addReg(Arg));
2023      RegArgs.push_back(VA.getLocReg());
2024      RegArgs.push_back(NextVA.getLocReg());
2025    } else {
2026      assert(VA.isMemLoc());
2027      // Need to store on the stack.
2028      Address Addr;
2029      Addr.BaseType = Address::RegBase;
2030      Addr.Base.Reg = ARM::SP;
2031      Addr.Offset = VA.getLocMemOffset();
2032
2033      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
2034      assert(EmitRet && "Could not emit a store for argument!");
2035    }
2036  }
2037
2038  return true;
2039}
2040
2041bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
2042                             const Instruction *I, CallingConv::ID CC,
2043                             unsigned &NumBytes, bool isVarArg) {
2044  // Issue CALLSEQ_END
2045  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2046  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2047                          TII.get(AdjStackUp))
2048                  .addImm(NumBytes).addImm(0));
2049
2050  // Now the return value.
2051  if (RetVT != MVT::isVoid) {
2052    SmallVector<CCValAssign, 16> RVLocs;
2053    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
2054    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2055
2056    // Copy all of the result registers out of their specified physreg.
2057    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
2058      // For this move we copy into two registers and then move into the
2059      // double fp reg we want.
2060      MVT DestVT = RVLocs[0].getValVT();
2061      const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2062      unsigned ResultReg = createResultReg(DstRC);
2063      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2064                              TII.get(ARM::VMOVDRR), ResultReg)
2065                      .addReg(RVLocs[0].getLocReg())
2066                      .addReg(RVLocs[1].getLocReg()));
2067
2068      UsedRegs.push_back(RVLocs[0].getLocReg());
2069      UsedRegs.push_back(RVLocs[1].getLocReg());
2070
2071      // Finally update the result.
2072      UpdateValueMap(I, ResultReg);
2073    } else {
2074      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2075      MVT CopyVT = RVLocs[0].getValVT();
2076
2077      // Special handling for extended integers.
2078      if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2079        CopyVT = MVT::i32;
2080
2081      const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2082
2083      unsigned ResultReg = createResultReg(DstRC);
2084      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2085              ResultReg).addReg(RVLocs[0].getLocReg());
2086      UsedRegs.push_back(RVLocs[0].getLocReg());
2087
2088      // Finally update the result.
2089      UpdateValueMap(I, ResultReg);
2090    }
2091  }
2092
2093  return true;
2094}
2095
2096bool ARMFastISel::SelectRet(const Instruction *I) {
2097  const ReturnInst *Ret = cast<ReturnInst>(I);
2098  const Function &F = *I->getParent()->getParent();
2099
2100  if (!FuncInfo.CanLowerReturn)
2101    return false;
2102
2103  // Build a list of return value registers.
2104  SmallVector<unsigned, 4> RetRegs;
2105
2106  CallingConv::ID CC = F.getCallingConv();
2107  if (Ret->getNumOperands() > 0) {
2108    SmallVector<ISD::OutputArg, 4> Outs;
2109    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2110
2111    // Analyze operands of the call, assigning locations to each operand.
2112    SmallVector<CCValAssign, 16> ValLocs;
2113    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
2114    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
2115                                                 F.isVarArg()));
2116
2117    const Value *RV = Ret->getOperand(0);
2118    unsigned Reg = getRegForValue(RV);
2119    if (Reg == 0)
2120      return false;
2121
2122    // Only handle a single return value for now.
2123    if (ValLocs.size() != 1)
2124      return false;
2125
2126    CCValAssign &VA = ValLocs[0];
2127
2128    // Don't bother handling odd stuff for now.
2129    if (VA.getLocInfo() != CCValAssign::Full)
2130      return false;
2131    // Only handle register returns for now.
2132    if (!VA.isRegLoc())
2133      return false;
2134
2135    unsigned SrcReg = Reg + VA.getValNo();
2136    EVT RVEVT = TLI.getValueType(RV->getType());
2137    if (!RVEVT.isSimple()) return false;
2138    MVT RVVT = RVEVT.getSimpleVT();
2139    MVT DestVT = VA.getValVT();
2140    // Special handling for extended integers.
2141    if (RVVT != DestVT) {
2142      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2143        return false;
2144
2145      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2146
2147      // Perform extension if flagged as either zext or sext.  Otherwise, do
2148      // nothing.
2149      if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2150        SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2151        if (SrcReg == 0) return false;
2152      }
2153    }
2154
2155    // Make the copy.
2156    unsigned DstReg = VA.getLocReg();
2157    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2158    // Avoid a cross-class copy. This is very unlikely.
2159    if (!SrcRC->contains(DstReg))
2160      return false;
2161    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2162            DstReg).addReg(SrcReg);
2163
2164    // Add register to return instruction.
2165    RetRegs.push_back(VA.getLocReg());
2166  }
2167
2168  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
2169  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2170                                    TII.get(RetOpc));
2171  AddOptionalDefs(MIB);
2172  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2173    MIB.addReg(RetRegs[i], RegState::Implicit);
2174  return true;
2175}
2176
2177unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2178  if (UseReg)
2179    return isThumb2 ? ARM::tBLXr : ARM::BLX;
2180  else
2181    return isThumb2 ? ARM::tBL : ARM::BL;
2182}
2183
2184unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2185  GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
2186                                       GlobalValue::ExternalLinkage, 0, Name);
2187  EVT LCREVT = TLI.getValueType(GV->getType());
2188  if (!LCREVT.isSimple()) return 0;
2189  return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
2190}
2191
2192// A quick function that will emit a call for a named libcall in F with the
2193// vector of passed arguments for the Instruction in I. We can assume that we
2194// can emit a call for any libcall we can produce. This is an abridged version
2195// of the full call infrastructure since we won't need to worry about things
2196// like computed function pointers or strange arguments at call sites.
2197// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2198// with X86.
2199bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2200  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2201
2202  // Handle *simple* calls for now.
2203  Type *RetTy = I->getType();
2204  MVT RetVT;
2205  if (RetTy->isVoidTy())
2206    RetVT = MVT::isVoid;
2207  else if (!isTypeLegal(RetTy, RetVT))
2208    return false;
2209
2210  // Can't handle non-double multi-reg retvals.
2211  if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2212    SmallVector<CCValAssign, 16> RVLocs;
2213    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
2214    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
2215    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2216      return false;
2217  }
2218
2219  // Set up the argument vectors.
2220  SmallVector<Value*, 8> Args;
2221  SmallVector<unsigned, 8> ArgRegs;
2222  SmallVector<MVT, 8> ArgVTs;
2223  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2224  Args.reserve(I->getNumOperands());
2225  ArgRegs.reserve(I->getNumOperands());
2226  ArgVTs.reserve(I->getNumOperands());
2227  ArgFlags.reserve(I->getNumOperands());
2228  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
2229    Value *Op = I->getOperand(i);
2230    unsigned Arg = getRegForValue(Op);
2231    if (Arg == 0) return false;
2232
2233    Type *ArgTy = Op->getType();
2234    MVT ArgVT;
2235    if (!isTypeLegal(ArgTy, ArgVT)) return false;
2236
2237    ISD::ArgFlagsTy Flags;
2238    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2239    Flags.setOrigAlign(OriginalAlignment);
2240
2241    Args.push_back(Op);
2242    ArgRegs.push_back(Arg);
2243    ArgVTs.push_back(ArgVT);
2244    ArgFlags.push_back(Flags);
2245  }
2246
2247  // Handle the arguments now that we've gotten them.
2248  SmallVector<unsigned, 4> RegArgs;
2249  unsigned NumBytes;
2250  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2251                       RegArgs, CC, NumBytes, false))
2252    return false;
2253
2254  unsigned CalleeReg = 0;
2255  if (EnableARMLongCalls) {
2256    CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2257    if (CalleeReg == 0) return false;
2258  }
2259
2260  // Issue the call.
2261  unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
2262  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2263                                    DL, TII.get(CallOpc));
2264  // BL / BLX don't take a predicate, but tBL / tBLX do.
2265  if (isThumb2)
2266    AddDefaultPred(MIB);
2267  if (EnableARMLongCalls)
2268    MIB.addReg(CalleeReg);
2269  else
2270    MIB.addExternalSymbol(TLI.getLibcallName(Call));
2271
2272  // Add implicit physical register uses to the call.
2273  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2274    MIB.addReg(RegArgs[i], RegState::Implicit);
2275
2276  // Add a register mask with the call-preserved registers.
2277  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2278  MIB.addRegMask(TRI.getCallPreservedMask(CC));
2279
2280  // Finish off the call including any return values.
2281  SmallVector<unsigned, 4> UsedRegs;
2282  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
2283
2284  // Set all unused physreg defs as dead.
2285  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2286
2287  return true;
2288}
2289
2290bool ARMFastISel::SelectCall(const Instruction *I,
2291                             const char *IntrMemName = 0) {
2292  const CallInst *CI = cast<CallInst>(I);
2293  const Value *Callee = CI->getCalledValue();
2294
2295  // Can't handle inline asm.
2296  if (isa<InlineAsm>(Callee)) return false;
2297
2298  // Allow SelectionDAG isel to handle tail calls.
2299  if (CI->isTailCall()) return false;
2300
2301  // Check the calling convention.
2302  ImmutableCallSite CS(CI);
2303  CallingConv::ID CC = CS.getCallingConv();
2304
2305  // TODO: Avoid some calling conventions?
2306
2307  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
2308  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
2309  bool isVarArg = FTy->isVarArg();
2310
2311  // Handle *simple* calls for now.
2312  Type *RetTy = I->getType();
2313  MVT RetVT;
2314  if (RetTy->isVoidTy())
2315    RetVT = MVT::isVoid;
2316  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2317           RetVT != MVT::i8  && RetVT != MVT::i1)
2318    return false;
2319
2320  // Can't handle non-double multi-reg retvals.
2321  if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2322      RetVT != MVT::i16 && RetVT != MVT::i32) {
2323    SmallVector<CCValAssign, 16> RVLocs;
2324    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
2325    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2326    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2327      return false;
2328  }
2329
2330  // Set up the argument vectors.
2331  SmallVector<Value*, 8> Args;
2332  SmallVector<unsigned, 8> ArgRegs;
2333  SmallVector<MVT, 8> ArgVTs;
2334  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2335  unsigned arg_size = CS.arg_size();
2336  Args.reserve(arg_size);
2337  ArgRegs.reserve(arg_size);
2338  ArgVTs.reserve(arg_size);
2339  ArgFlags.reserve(arg_size);
2340  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2341       i != e; ++i) {
2342    // If we're lowering a memory intrinsic instead of a regular call, skip the
2343    // last two arguments, which shouldn't be passed to the underlying function.
2344    if (IntrMemName && e-i <= 2)
2345      break;
2346
2347    ISD::ArgFlagsTy Flags;
2348    unsigned AttrInd = i - CS.arg_begin() + 1;
2349    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
2350      Flags.setSExt();
2351    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
2352      Flags.setZExt();
2353
2354    // FIXME: Only handle *easy* calls for now.
2355    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
2356        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
2357        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
2358        CS.paramHasAttr(AttrInd, Attribute::ByVal))
2359      return false;
2360
2361    Type *ArgTy = (*i)->getType();
2362    MVT ArgVT;
2363    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2364        ArgVT != MVT::i1)
2365      return false;
2366
2367    unsigned Arg = getRegForValue(*i);
2368    if (Arg == 0)
2369      return false;
2370
2371    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2372    Flags.setOrigAlign(OriginalAlignment);
2373
2374    Args.push_back(*i);
2375    ArgRegs.push_back(Arg);
2376    ArgVTs.push_back(ArgVT);
2377    ArgFlags.push_back(Flags);
2378  }
2379
2380  // Handle the arguments now that we've gotten them.
2381  SmallVector<unsigned, 4> RegArgs;
2382  unsigned NumBytes;
2383  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2384                       RegArgs, CC, NumBytes, isVarArg))
2385    return false;
2386
2387  bool UseReg = false;
2388  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2389  if (!GV || EnableARMLongCalls) UseReg = true;
2390
2391  unsigned CalleeReg = 0;
2392  if (UseReg) {
2393    if (IntrMemName)
2394      CalleeReg = getLibcallReg(IntrMemName);
2395    else
2396      CalleeReg = getRegForValue(Callee);
2397
2398    if (CalleeReg == 0) return false;
2399  }
2400
2401  // Issue the call.
2402  unsigned CallOpc = ARMSelectCallOp(UseReg);
2403  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2404                                    DL, TII.get(CallOpc));
2405
2406  // ARM calls don't take a predicate, but tBL / tBLX do.
2407  if(isThumb2)
2408    AddDefaultPred(MIB);
2409  if (UseReg)
2410    MIB.addReg(CalleeReg);
2411  else if (!IntrMemName)
2412    MIB.addGlobalAddress(GV, 0, 0);
2413  else
2414    MIB.addExternalSymbol(IntrMemName, 0);
2415
2416  // Add implicit physical register uses to the call.
2417  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2418    MIB.addReg(RegArgs[i], RegState::Implicit);
2419
2420  // Add a register mask with the call-preserved registers.
2421  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2422  MIB.addRegMask(TRI.getCallPreservedMask(CC));
2423
2424  // Finish off the call including any return values.
2425  SmallVector<unsigned, 4> UsedRegs;
2426  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2427    return false;
2428
2429  // Set all unused physreg defs as dead.
2430  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2431
2432  return true;
2433}
2434
2435bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2436  return Len <= 16;
2437}
2438
2439bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2440                                        uint64_t Len, unsigned Alignment) {
2441  // Make sure we don't bloat code by inlining very large memcpy's.
2442  if (!ARMIsMemCpySmall(Len))
2443    return false;
2444
2445  while (Len) {
2446    MVT VT;
2447    if (!Alignment || Alignment >= 4) {
2448      if (Len >= 4)
2449        VT = MVT::i32;
2450      else if (Len >= 2)
2451        VT = MVT::i16;
2452      else {
2453        assert (Len == 1 && "Expected a length of 1!");
2454        VT = MVT::i8;
2455      }
2456    } else {
2457      // Bound based on alignment.
2458      if (Len >= 2 && Alignment == 2)
2459        VT = MVT::i16;
2460      else {
2461        VT = MVT::i8;
2462      }
2463    }
2464
2465    bool RV;
2466    unsigned ResultReg;
2467    RV = ARMEmitLoad(VT, ResultReg, Src);
2468    assert (RV == true && "Should be able to handle this load.");
2469    RV = ARMEmitStore(VT, ResultReg, Dest);
2470    assert (RV == true && "Should be able to handle this store.");
2471    (void)RV;
2472
2473    unsigned Size = VT.getSizeInBits()/8;
2474    Len -= Size;
2475    Dest.Offset += Size;
2476    Src.Offset += Size;
2477  }
2478
2479  return true;
2480}
2481
2482bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2483  // FIXME: Handle more intrinsics.
2484  switch (I.getIntrinsicID()) {
2485  default: return false;
2486  case Intrinsic::frameaddress: {
2487    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2488    MFI->setFrameAddressIsTaken(true);
2489
2490    unsigned LdrOpc;
2491    const TargetRegisterClass *RC;
2492    if (isThumb2) {
2493      LdrOpc =  ARM::t2LDRi12;
2494      RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
2495    } else {
2496      LdrOpc =  ARM::LDRi12;
2497      RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
2498    }
2499
2500    const ARMBaseRegisterInfo *RegInfo =
2501          static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
2502    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2503    unsigned SrcReg = FramePtr;
2504
2505    // Recursively load frame address
2506    // ldr r0 [fp]
2507    // ldr r0 [r0]
2508    // ldr r0 [r0]
2509    // ...
2510    unsigned DestReg;
2511    unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2512    while (Depth--) {
2513      DestReg = createResultReg(RC);
2514      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2515                              TII.get(LdrOpc), DestReg)
2516                      .addReg(SrcReg).addImm(0));
2517      SrcReg = DestReg;
2518    }
2519    UpdateValueMap(&I, SrcReg);
2520    return true;
2521  }
2522  case Intrinsic::memcpy:
2523  case Intrinsic::memmove: {
2524    const MemTransferInst &MTI = cast<MemTransferInst>(I);
2525    // Don't handle volatile.
2526    if (MTI.isVolatile())
2527      return false;
2528
2529    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2530    // we would emit dead code because we don't currently handle memmoves.
2531    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2532    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2533      // Small memcpy's are common enough that we want to do them without a call
2534      // if possible.
2535      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2536      if (ARMIsMemCpySmall(Len)) {
2537        Address Dest, Src;
2538        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2539            !ARMComputeAddress(MTI.getRawSource(), Src))
2540          return false;
2541        unsigned Alignment = MTI.getAlignment();
2542        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2543          return true;
2544      }
2545    }
2546
2547    if (!MTI.getLength()->getType()->isIntegerTy(32))
2548      return false;
2549
2550    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2551      return false;
2552
2553    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2554    return SelectCall(&I, IntrMemName);
2555  }
2556  case Intrinsic::memset: {
2557    const MemSetInst &MSI = cast<MemSetInst>(I);
2558    // Don't handle volatile.
2559    if (MSI.isVolatile())
2560      return false;
2561
2562    if (!MSI.getLength()->getType()->isIntegerTy(32))
2563      return false;
2564
2565    if (MSI.getDestAddressSpace() > 255)
2566      return false;
2567
2568    return SelectCall(&I, "memset");
2569  }
2570  case Intrinsic::trap: {
2571    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(
2572      Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
2573    return true;
2574  }
2575  }
2576}
2577
2578bool ARMFastISel::SelectTrunc(const Instruction *I) {
2579  // The high bits for a type smaller than the register size are assumed to be
2580  // undefined.
2581  Value *Op = I->getOperand(0);
2582
2583  EVT SrcVT, DestVT;
2584  SrcVT = TLI.getValueType(Op->getType(), true);
2585  DestVT = TLI.getValueType(I->getType(), true);
2586
2587  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2588    return false;
2589  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2590    return false;
2591
2592  unsigned SrcReg = getRegForValue(Op);
2593  if (!SrcReg) return false;
2594
2595  // Because the high bits are undefined, a truncate doesn't generate
2596  // any code.
2597  UpdateValueMap(I, SrcReg);
2598  return true;
2599}
2600
2601unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2602                                    bool isZExt) {
2603  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2604    return 0;
2605
2606  unsigned Opc;
2607  bool isBoolZext = false;
2608  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
2609  switch (SrcVT.SimpleTy) {
2610  default: return 0;
2611  case MVT::i16:
2612    if (!Subtarget->hasV6Ops()) return 0;
2613    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
2614    if (isZExt)
2615      Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
2616    else
2617      Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
2618    break;
2619  case MVT::i8:
2620    if (!Subtarget->hasV6Ops()) return 0;
2621    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
2622    if (isZExt)
2623      Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
2624    else
2625      Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
2626    break;
2627  case MVT::i1:
2628    if (isZExt) {
2629      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
2630      Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
2631      isBoolZext = true;
2632      break;
2633    }
2634    return 0;
2635  }
2636
2637  unsigned ResultReg = createResultReg(RC);
2638  MachineInstrBuilder MIB;
2639  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
2640        .addReg(SrcReg);
2641  if (isBoolZext)
2642    MIB.addImm(1);
2643  else
2644    MIB.addImm(0);
2645  AddOptionalDefs(MIB);
2646  return ResultReg;
2647}
2648
2649bool ARMFastISel::SelectIntExt(const Instruction *I) {
2650  // On ARM, in general, integer casts don't involve legal types; this code
2651  // handles promotable integers.
2652  Type *DestTy = I->getType();
2653  Value *Src = I->getOperand(0);
2654  Type *SrcTy = Src->getType();
2655
2656  bool isZExt = isa<ZExtInst>(I);
2657  unsigned SrcReg = getRegForValue(Src);
2658  if (!SrcReg) return false;
2659
2660  EVT SrcEVT, DestEVT;
2661  SrcEVT = TLI.getValueType(SrcTy, true);
2662  DestEVT = TLI.getValueType(DestTy, true);
2663  if (!SrcEVT.isSimple()) return false;
2664  if (!DestEVT.isSimple()) return false;
2665
2666  MVT SrcVT = SrcEVT.getSimpleVT();
2667  MVT DestVT = DestEVT.getSimpleVT();
2668  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2669  if (ResultReg == 0) return false;
2670  UpdateValueMap(I, ResultReg);
2671  return true;
2672}
2673
2674bool ARMFastISel::SelectShift(const Instruction *I,
2675                              ARM_AM::ShiftOpc ShiftTy) {
2676  // We handle thumb2 mode by target independent selector
2677  // or SelectionDAG ISel.
2678  if (isThumb2)
2679    return false;
2680
2681  // Only handle i32 now.
2682  EVT DestVT = TLI.getValueType(I->getType(), true);
2683  if (DestVT != MVT::i32)
2684    return false;
2685
2686  unsigned Opc = ARM::MOVsr;
2687  unsigned ShiftImm;
2688  Value *Src2Value = I->getOperand(1);
2689  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
2690    ShiftImm = CI->getZExtValue();
2691
2692    // Fall back to selection DAG isel if the shift amount
2693    // is zero or greater than the width of the value type.
2694    if (ShiftImm == 0 || ShiftImm >=32)
2695      return false;
2696
2697    Opc = ARM::MOVsi;
2698  }
2699
2700  Value *Src1Value = I->getOperand(0);
2701  unsigned Reg1 = getRegForValue(Src1Value);
2702  if (Reg1 == 0) return false;
2703
2704  unsigned Reg2 = 0;
2705  if (Opc == ARM::MOVsr) {
2706    Reg2 = getRegForValue(Src2Value);
2707    if (Reg2 == 0) return false;
2708  }
2709
2710  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
2711  if(ResultReg == 0) return false;
2712
2713  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2714                                    TII.get(Opc), ResultReg)
2715                            .addReg(Reg1);
2716
2717  if (Opc == ARM::MOVsi)
2718    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
2719  else if (Opc == ARM::MOVsr) {
2720    MIB.addReg(Reg2);
2721    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
2722  }
2723
2724  AddOptionalDefs(MIB);
2725  UpdateValueMap(I, ResultReg);
2726  return true;
2727}
2728
2729// TODO: SoftFP support.
2730bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
2731
2732  switch (I->getOpcode()) {
2733    case Instruction::Load:
2734      return SelectLoad(I);
2735    case Instruction::Store:
2736      return SelectStore(I);
2737    case Instruction::Br:
2738      return SelectBranch(I);
2739    case Instruction::IndirectBr:
2740      return SelectIndirectBr(I);
2741    case Instruction::ICmp:
2742    case Instruction::FCmp:
2743      return SelectCmp(I);
2744    case Instruction::FPExt:
2745      return SelectFPExt(I);
2746    case Instruction::FPTrunc:
2747      return SelectFPTrunc(I);
2748    case Instruction::SIToFP:
2749      return SelectIToFP(I, /*isSigned*/ true);
2750    case Instruction::UIToFP:
2751      return SelectIToFP(I, /*isSigned*/ false);
2752    case Instruction::FPToSI:
2753      return SelectFPToI(I, /*isSigned*/ true);
2754    case Instruction::FPToUI:
2755      return SelectFPToI(I, /*isSigned*/ false);
2756    case Instruction::Add:
2757      return SelectBinaryIntOp(I, ISD::ADD);
2758    case Instruction::Or:
2759      return SelectBinaryIntOp(I, ISD::OR);
2760    case Instruction::Sub:
2761      return SelectBinaryIntOp(I, ISD::SUB);
2762    case Instruction::FAdd:
2763      return SelectBinaryFPOp(I, ISD::FADD);
2764    case Instruction::FSub:
2765      return SelectBinaryFPOp(I, ISD::FSUB);
2766    case Instruction::FMul:
2767      return SelectBinaryFPOp(I, ISD::FMUL);
2768    case Instruction::SDiv:
2769      return SelectDiv(I, /*isSigned*/ true);
2770    case Instruction::UDiv:
2771      return SelectDiv(I, /*isSigned*/ false);
2772    case Instruction::SRem:
2773      return SelectRem(I, /*isSigned*/ true);
2774    case Instruction::URem:
2775      return SelectRem(I, /*isSigned*/ false);
2776    case Instruction::Call:
2777      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2778        return SelectIntrinsicCall(*II);
2779      return SelectCall(I);
2780    case Instruction::Select:
2781      return SelectSelect(I);
2782    case Instruction::Ret:
2783      return SelectRet(I);
2784    case Instruction::Trunc:
2785      return SelectTrunc(I);
2786    case Instruction::ZExt:
2787    case Instruction::SExt:
2788      return SelectIntExt(I);
2789    case Instruction::Shl:
2790      return SelectShift(I, ARM_AM::lsl);
2791    case Instruction::LShr:
2792      return SelectShift(I, ARM_AM::lsr);
2793    case Instruction::AShr:
2794      return SelectShift(I, ARM_AM::asr);
2795    default: break;
2796  }
2797  return false;
2798}
2799
2800/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
2801/// vreg is being provided by the specified load instruction.  If possible,
2802/// try to fold the load as an operand to the instruction, returning true if
2803/// successful.
2804bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
2805                                const LoadInst *LI) {
2806  // Verify we have a legal type before going any further.
2807  MVT VT;
2808  if (!isLoadTypeLegal(LI->getType(), VT))
2809    return false;
2810
2811  // Combine load followed by zero- or sign-extend.
2812  // ldrb r1, [r0]       ldrb r1, [r0]
2813  // uxtb r2, r1     =>
2814  // mov  r3, r2         mov  r3, r1
2815  bool isZExt = true;
2816  switch(MI->getOpcode()) {
2817    default: return false;
2818    case ARM::SXTH:
2819    case ARM::t2SXTH:
2820      isZExt = false;
2821    case ARM::UXTH:
2822    case ARM::t2UXTH:
2823      if (VT != MVT::i16)
2824        return false;
2825    break;
2826    case ARM::SXTB:
2827    case ARM::t2SXTB:
2828      isZExt = false;
2829    case ARM::UXTB:
2830    case ARM::t2UXTB:
2831      if (VT != MVT::i8)
2832        return false;
2833    break;
2834  }
2835  // See if we can handle this address.
2836  Address Addr;
2837  if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2838
2839  unsigned ResultReg = MI->getOperand(0).getReg();
2840  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2841    return false;
2842  MI->eraseFromParent();
2843  return true;
2844}
2845
2846unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
2847                                     unsigned Align, MVT VT) {
2848  bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2849  ARMConstantPoolConstant *CPV =
2850    ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2851  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
2852
2853  unsigned Opc;
2854  unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
2855  // Load value.
2856  if (isThumb2) {
2857    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2858                            TII.get(ARM::t2LDRpci), DestReg1)
2859                    .addConstantPoolIndex(Idx));
2860    Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
2861  } else {
2862    // The extra immediate is for addrmode2.
2863    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2864                            DL, TII.get(ARM::LDRcp), DestReg1)
2865                    .addConstantPoolIndex(Idx).addImm(0));
2866    Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
2867  }
2868
2869  unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
2870  if (GlobalBaseReg == 0) {
2871    GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
2872    AFI->setGlobalBaseReg(GlobalBaseReg);
2873  }
2874
2875  unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
2876  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2877                                    DL, TII.get(Opc), DestReg2)
2878                            .addReg(DestReg1)
2879                            .addReg(GlobalBaseReg);
2880  if (!UseGOTOFF)
2881    MIB.addImm(0);
2882  AddOptionalDefs(MIB);
2883
2884  return DestReg2;
2885}
2886
2887bool ARMFastISel::FastLowerArguments() {
2888  if (!FuncInfo.CanLowerReturn)
2889    return false;
2890
2891  const Function *F = FuncInfo.Fn;
2892  if (F->isVarArg())
2893    return false;
2894
2895  CallingConv::ID CC = F->getCallingConv();
2896  switch (CC) {
2897  default:
2898    return false;
2899  case CallingConv::Fast:
2900  case CallingConv::C:
2901  case CallingConv::ARM_AAPCS_VFP:
2902  case CallingConv::ARM_AAPCS:
2903  case CallingConv::ARM_APCS:
2904    break;
2905  }
2906
2907  // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
2908  // which are passed in r0 - r3.
2909  unsigned Idx = 1;
2910  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
2911       I != E; ++I, ++Idx) {
2912    if (Idx > 4)
2913      return false;
2914
2915    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2916        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2917        F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
2918      return false;
2919
2920    Type *ArgTy = I->getType();
2921    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2922      return false;
2923
2924    EVT ArgVT = TLI.getValueType(ArgTy);
2925    if (!ArgVT.isSimple()) return false;
2926    switch (ArgVT.getSimpleVT().SimpleTy) {
2927    case MVT::i8:
2928    case MVT::i16:
2929    case MVT::i32:
2930      break;
2931    default:
2932      return false;
2933    }
2934  }
2935
2936
2937  static const uint16_t GPRArgRegs[] = {
2938    ARM::R0, ARM::R1, ARM::R2, ARM::R3
2939  };
2940
2941  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
2942  Idx = 0;
2943  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
2944       I != E; ++I, ++Idx) {
2945    if (I->use_empty())
2946      continue;
2947    unsigned SrcReg = GPRArgRegs[Idx];
2948    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2949    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2950    // Without this, EmitLiveInCopies may eliminate the livein if its only
2951    // use is a bitcast (which isn't turned into an instruction).
2952    unsigned ResultReg = createResultReg(RC);
2953    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2954            ResultReg).addReg(DstReg, getKillRegState(true));
2955    UpdateValueMap(I, ResultReg);
2956  }
2957
2958  return true;
2959}
2960
2961namespace llvm {
2962  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
2963                                const TargetLibraryInfo *libInfo) {
2964    // Completely untested on non-iOS.
2965    const TargetMachine &TM = funcInfo.MF->getTarget();
2966
2967    // Darwin and thumb1 only for now.
2968    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
2969    if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
2970      return new ARMFastISel(funcInfo, libInfo);
2971    return 0;
2972  }
2973}
2974