ARMFastISel.cpp revision 36b7beb42921c428fc9f5b5a9cc9feb7fe7dd4b3
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the ARM-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// ARMGenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMRegisterInfo.h"
20#include "ARMTargetMachine.h"
21#include "ARMSubtarget.h"
22#include "ARMConstantPoolValue.h"
23#include "MCTargetDesc/ARMAddressingModes.h"
24#include "llvm/CallingConv.h"
25#include "llvm/DerivedTypes.h"
26#include "llvm/GlobalVariable.h"
27#include "llvm/Instructions.h"
28#include "llvm/IntrinsicInst.h"
29#include "llvm/Module.h"
30#include "llvm/Operator.h"
31#include "llvm/CodeGen/Analysis.h"
32#include "llvm/CodeGen/FastISel.h"
33#include "llvm/CodeGen/FunctionLoweringInfo.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineModuleInfo.h"
36#include "llvm/CodeGen/MachineConstantPool.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineMemOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/Support/CallSite.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/GetElementPtrTypeIterator.h"
44#include "llvm/Target/TargetData.h"
45#include "llvm/Target/TargetInstrInfo.h"
46#include "llvm/Target/TargetLowering.h"
47#include "llvm/Target/TargetMachine.h"
48#include "llvm/Target/TargetOptions.h"
49using namespace llvm;
50
51static cl::opt<bool>
52DisableARMFastISel("disable-arm-fast-isel",
53                    cl::desc("Turn off experimental ARM fast-isel support"),
54                    cl::init(false), cl::Hidden);
55
56extern cl::opt<bool> EnableARMLongCalls;
57
58namespace {
59
60  // All possible address modes, plus some.
61  typedef struct Address {
62    enum {
63      RegBase,
64      FrameIndexBase
65    } BaseType;
66
67    union {
68      unsigned Reg;
69      int FI;
70    } Base;
71
72    int Offset;
73
74    // Innocuous defaults for our address.
75    Address()
76     : BaseType(RegBase), Offset(0) {
77       Base.Reg = 0;
78     }
79  } Address;
80
81class ARMFastISel : public FastISel {
82
83  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
84  /// make the right decision when generating code for different targets.
85  const ARMSubtarget *Subtarget;
86  const TargetMachine &TM;
87  const TargetInstrInfo &TII;
88  const TargetLowering &TLI;
89  ARMFunctionInfo *AFI;
90
91  // Convenience variables to avoid some queries.
92  bool isThumb2;
93  LLVMContext *Context;
94
95  public:
96    explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
97    : FastISel(funcInfo),
98      TM(funcInfo.MF->getTarget()),
99      TII(*TM.getInstrInfo()),
100      TLI(*TM.getTargetLowering()) {
101      Subtarget = &TM.getSubtarget<ARMSubtarget>();
102      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
103      isThumb2 = AFI->isThumbFunction();
104      Context = &funcInfo.Fn->getContext();
105    }
106
107    // Code from FastISel.cpp.
108    virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
109                                   const TargetRegisterClass *RC);
110    virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
111                                    const TargetRegisterClass *RC,
112                                    unsigned Op0, bool Op0IsKill);
113    virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
114                                     const TargetRegisterClass *RC,
115                                     unsigned Op0, bool Op0IsKill,
116                                     unsigned Op1, bool Op1IsKill);
117    virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
118                                      const TargetRegisterClass *RC,
119                                      unsigned Op0, bool Op0IsKill,
120                                      unsigned Op1, bool Op1IsKill,
121                                      unsigned Op2, bool Op2IsKill);
122    virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
123                                     const TargetRegisterClass *RC,
124                                     unsigned Op0, bool Op0IsKill,
125                                     uint64_t Imm);
126    virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
127                                     const TargetRegisterClass *RC,
128                                     unsigned Op0, bool Op0IsKill,
129                                     const ConstantFP *FPImm);
130    virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
131                                      const TargetRegisterClass *RC,
132                                      unsigned Op0, bool Op0IsKill,
133                                      unsigned Op1, bool Op1IsKill,
134                                      uint64_t Imm);
135    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
136                                    const TargetRegisterClass *RC,
137                                    uint64_t Imm);
138    virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
139                                     const TargetRegisterClass *RC,
140                                     uint64_t Imm1, uint64_t Imm2);
141
142    virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
143                                                unsigned Op0, bool Op0IsKill,
144                                                uint32_t Idx);
145
146    // Backend specific FastISel code.
147    virtual bool TargetSelectInstruction(const Instruction *I);
148    virtual unsigned TargetMaterializeConstant(const Constant *C);
149    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
150    virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
151                               const LoadInst *LI);
152
153  #include "ARMGenFastISel.inc"
154
155    // Instruction selection routines.
156  private:
157    bool SelectLoad(const Instruction *I);
158    bool SelectStore(const Instruction *I);
159    bool SelectBranch(const Instruction *I);
160    bool SelectCmp(const Instruction *I);
161    bool SelectFPExt(const Instruction *I);
162    bool SelectFPTrunc(const Instruction *I);
163    bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
164    bool SelectIToFP(const Instruction *I, bool isZExt);
165    bool SelectFPToSI(const Instruction *I);
166    bool SelectSDiv(const Instruction *I);
167    bool SelectSRem(const Instruction *I);
168    bool SelectCall(const Instruction *I, const char *IntrMemName);
169    bool SelectIntrinsicCall(const IntrinsicInst &I);
170    bool SelectSelect(const Instruction *I);
171    bool SelectRet(const Instruction *I);
172    bool SelectTrunc(const Instruction *I);
173    bool SelectIntExt(const Instruction *I);
174
175    // Utility routines.
176  private:
177    bool isTypeLegal(Type *Ty, MVT &VT);
178    bool isLoadTypeLegal(Type *Ty, MVT &VT);
179    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
180                    bool isZExt);
181    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
182                     unsigned Alignment = 0, bool isZExt = true,
183                     bool allocReg = true);
184
185    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
186                      unsigned Alignment = 0);
187    bool ARMComputeAddress(const Value *Obj, Address &Addr);
188    void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
189    bool ARMIsMemCpySmall(uint64_t Len);
190    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
191    unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
192    unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
193    unsigned ARMMaterializeInt(const Constant *C, EVT VT);
194    unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
195    unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
196    unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
197    unsigned ARMSelectCallOp(const GlobalValue *GV);
198
199    // Call handling routines.
200  private:
201    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
202    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
203                         SmallVectorImpl<unsigned> &ArgRegs,
204                         SmallVectorImpl<MVT> &ArgVTs,
205                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
206                         SmallVectorImpl<unsigned> &RegArgs,
207                         CallingConv::ID CC,
208                         unsigned &NumBytes);
209    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
210                    const Instruction *I, CallingConv::ID CC,
211                    unsigned &NumBytes);
212    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
213
214    // OptionalDef handling routines.
215  private:
216    bool isARMNEONPred(const MachineInstr *MI);
217    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
218    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
219    void AddLoadStoreOperands(EVT VT, Address &Addr,
220                              const MachineInstrBuilder &MIB,
221                              unsigned Flags, bool useAM3);
222};
223
224} // end anonymous namespace
225
226#include "ARMGenCallingConv.inc"
227
228// DefinesOptionalPredicate - This is different from DefinesPredicate in that
229// we don't care about implicit defs here, just places we'll need to add a
230// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
231bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
232  if (!MI->hasOptionalDef())
233    return false;
234
235  // Look to see if our OptionalDef is defining CPSR or CCR.
236  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
237    const MachineOperand &MO = MI->getOperand(i);
238    if (!MO.isReg() || !MO.isDef()) continue;
239    if (MO.getReg() == ARM::CPSR)
240      *CPSR = true;
241  }
242  return true;
243}
244
245bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
246  const MCInstrDesc &MCID = MI->getDesc();
247
248  // If we're a thumb2 or not NEON function we were handled via isPredicable.
249  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
250       AFI->isThumb2Function())
251    return false;
252
253  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
254    if (MCID.OpInfo[i].isPredicate())
255      return true;
256
257  return false;
258}
259
260// If the machine is predicable go ahead and add the predicate operands, if
261// it needs default CC operands add those.
262// TODO: If we want to support thumb1 then we'll need to deal with optional
263// CPSR defs that need to be added before the remaining operands. See s_cc_out
264// for descriptions why.
265const MachineInstrBuilder &
266ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
267  MachineInstr *MI = &*MIB;
268
269  // Do we use a predicate? or...
270  // Are we NEON in ARM mode and have a predicate operand? If so, I know
271  // we're not predicable but add it anyways.
272  if (TII.isPredicable(MI) || isARMNEONPred(MI))
273    AddDefaultPred(MIB);
274
275  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
276  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
277  bool CPSR = false;
278  if (DefinesOptionalPredicate(MI, &CPSR)) {
279    if (CPSR)
280      AddDefaultT1CC(MIB);
281    else
282      AddDefaultCC(MIB);
283  }
284  return MIB;
285}
286
287unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
288                                    const TargetRegisterClass* RC) {
289  unsigned ResultReg = createResultReg(RC);
290  const MCInstrDesc &II = TII.get(MachineInstOpcode);
291
292  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
293  return ResultReg;
294}
295
296unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
297                                     const TargetRegisterClass *RC,
298                                     unsigned Op0, bool Op0IsKill) {
299  unsigned ResultReg = createResultReg(RC);
300  const MCInstrDesc &II = TII.get(MachineInstOpcode);
301
302  if (II.getNumDefs() >= 1)
303    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
304                   .addReg(Op0, Op0IsKill * RegState::Kill));
305  else {
306    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
307                   .addReg(Op0, Op0IsKill * RegState::Kill));
308    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
309                   TII.get(TargetOpcode::COPY), ResultReg)
310                   .addReg(II.ImplicitDefs[0]));
311  }
312  return ResultReg;
313}
314
315unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
316                                      const TargetRegisterClass *RC,
317                                      unsigned Op0, bool Op0IsKill,
318                                      unsigned Op1, bool Op1IsKill) {
319  unsigned ResultReg = createResultReg(RC);
320  const MCInstrDesc &II = TII.get(MachineInstOpcode);
321
322  if (II.getNumDefs() >= 1)
323    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
324                   .addReg(Op0, Op0IsKill * RegState::Kill)
325                   .addReg(Op1, Op1IsKill * RegState::Kill));
326  else {
327    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
328                   .addReg(Op0, Op0IsKill * RegState::Kill)
329                   .addReg(Op1, Op1IsKill * RegState::Kill));
330    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
331                           TII.get(TargetOpcode::COPY), ResultReg)
332                   .addReg(II.ImplicitDefs[0]));
333  }
334  return ResultReg;
335}
336
337unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
338                                       const TargetRegisterClass *RC,
339                                       unsigned Op0, bool Op0IsKill,
340                                       unsigned Op1, bool Op1IsKill,
341                                       unsigned Op2, bool Op2IsKill) {
342  unsigned ResultReg = createResultReg(RC);
343  const MCInstrDesc &II = TII.get(MachineInstOpcode);
344
345  if (II.getNumDefs() >= 1)
346    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
347                   .addReg(Op0, Op0IsKill * RegState::Kill)
348                   .addReg(Op1, Op1IsKill * RegState::Kill)
349                   .addReg(Op2, Op2IsKill * RegState::Kill));
350  else {
351    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
352                   .addReg(Op0, Op0IsKill * RegState::Kill)
353                   .addReg(Op1, Op1IsKill * RegState::Kill)
354                   .addReg(Op2, Op2IsKill * RegState::Kill));
355    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
356                           TII.get(TargetOpcode::COPY), ResultReg)
357                   .addReg(II.ImplicitDefs[0]));
358  }
359  return ResultReg;
360}
361
362unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
363                                      const TargetRegisterClass *RC,
364                                      unsigned Op0, bool Op0IsKill,
365                                      uint64_t Imm) {
366  unsigned ResultReg = createResultReg(RC);
367  const MCInstrDesc &II = TII.get(MachineInstOpcode);
368
369  if (II.getNumDefs() >= 1)
370    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
371                   .addReg(Op0, Op0IsKill * RegState::Kill)
372                   .addImm(Imm));
373  else {
374    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
375                   .addReg(Op0, Op0IsKill * RegState::Kill)
376                   .addImm(Imm));
377    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
378                           TII.get(TargetOpcode::COPY), ResultReg)
379                   .addReg(II.ImplicitDefs[0]));
380  }
381  return ResultReg;
382}
383
384unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
385                                      const TargetRegisterClass *RC,
386                                      unsigned Op0, bool Op0IsKill,
387                                      const ConstantFP *FPImm) {
388  unsigned ResultReg = createResultReg(RC);
389  const MCInstrDesc &II = TII.get(MachineInstOpcode);
390
391  if (II.getNumDefs() >= 1)
392    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
393                   .addReg(Op0, Op0IsKill * RegState::Kill)
394                   .addFPImm(FPImm));
395  else {
396    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
397                   .addReg(Op0, Op0IsKill * RegState::Kill)
398                   .addFPImm(FPImm));
399    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
400                           TII.get(TargetOpcode::COPY), ResultReg)
401                   .addReg(II.ImplicitDefs[0]));
402  }
403  return ResultReg;
404}
405
406unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
407                                       const TargetRegisterClass *RC,
408                                       unsigned Op0, bool Op0IsKill,
409                                       unsigned Op1, bool Op1IsKill,
410                                       uint64_t Imm) {
411  unsigned ResultReg = createResultReg(RC);
412  const MCInstrDesc &II = TII.get(MachineInstOpcode);
413
414  if (II.getNumDefs() >= 1)
415    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
416                   .addReg(Op0, Op0IsKill * RegState::Kill)
417                   .addReg(Op1, Op1IsKill * RegState::Kill)
418                   .addImm(Imm));
419  else {
420    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
421                   .addReg(Op0, Op0IsKill * RegState::Kill)
422                   .addReg(Op1, Op1IsKill * RegState::Kill)
423                   .addImm(Imm));
424    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
425                           TII.get(TargetOpcode::COPY), ResultReg)
426                   .addReg(II.ImplicitDefs[0]));
427  }
428  return ResultReg;
429}
430
431unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
432                                     const TargetRegisterClass *RC,
433                                     uint64_t Imm) {
434  unsigned ResultReg = createResultReg(RC);
435  const MCInstrDesc &II = TII.get(MachineInstOpcode);
436
437  if (II.getNumDefs() >= 1)
438    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
439                   .addImm(Imm));
440  else {
441    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
442                   .addImm(Imm));
443    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
444                           TII.get(TargetOpcode::COPY), ResultReg)
445                   .addReg(II.ImplicitDefs[0]));
446  }
447  return ResultReg;
448}
449
450unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
451                                      const TargetRegisterClass *RC,
452                                      uint64_t Imm1, uint64_t Imm2) {
453  unsigned ResultReg = createResultReg(RC);
454  const MCInstrDesc &II = TII.get(MachineInstOpcode);
455
456  if (II.getNumDefs() >= 1)
457    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
458                    .addImm(Imm1).addImm(Imm2));
459  else {
460    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
461                    .addImm(Imm1).addImm(Imm2));
462    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
463                            TII.get(TargetOpcode::COPY),
464                            ResultReg)
465                    .addReg(II.ImplicitDefs[0]));
466  }
467  return ResultReg;
468}
469
470unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
471                                                 unsigned Op0, bool Op0IsKill,
472                                                 uint32_t Idx) {
473  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
474  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
475         "Cannot yet extract from physregs");
476  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
477                         DL, TII.get(TargetOpcode::COPY), ResultReg)
478                 .addReg(Op0, getKillRegState(Op0IsKill), Idx));
479  return ResultReg;
480}
481
482// TODO: Don't worry about 64-bit now, but when this is fixed remove the
483// checks from the various callers.
484unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
485  if (VT == MVT::f64) return 0;
486
487  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
488  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
489                          TII.get(ARM::VMOVRS), MoveReg)
490                  .addReg(SrcReg));
491  return MoveReg;
492}
493
494unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
495  if (VT == MVT::i64) return 0;
496
497  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
498  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
499                          TII.get(ARM::VMOVSR), MoveReg)
500                  .addReg(SrcReg));
501  return MoveReg;
502}
503
504// For double width floating point we need to materialize two constants
505// (the high and the low) into integer registers then use a move to get
506// the combined constant into an FP reg.
507unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
508  const APFloat Val = CFP->getValueAPF();
509  bool is64bit = VT == MVT::f64;
510
511  // This checks to see if we can use VFP3 instructions to materialize
512  // a constant, otherwise we have to go through the constant pool.
513  if (TLI.isFPImmLegal(Val, VT)) {
514    int Imm;
515    unsigned Opc;
516    if (is64bit) {
517      Imm = ARM_AM::getFP64Imm(Val);
518      Opc = ARM::FCONSTD;
519    } else {
520      Imm = ARM_AM::getFP32Imm(Val);
521      Opc = ARM::FCONSTS;
522    }
523    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
524    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
525                            DestReg)
526                    .addImm(Imm));
527    return DestReg;
528  }
529
530  // Require VFP2 for loading fp constants.
531  if (!Subtarget->hasVFP2()) return false;
532
533  // MachineConstantPool wants an explicit alignment.
534  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
535  if (Align == 0) {
536    // TODO: Figure out if this is correct.
537    Align = TD.getTypeAllocSize(CFP->getType());
538  }
539  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
540  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
541  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
542
543  // The extra reg is for addrmode5.
544  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
545                          DestReg)
546                  .addConstantPoolIndex(Idx)
547                  .addReg(0));
548  return DestReg;
549}
550
551unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
552
553  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
554    return false;
555
556  // If we can do this in a single instruction without a constant pool entry
557  // do so now.
558  const ConstantInt *CI = cast<ConstantInt>(C);
559  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
560    unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
561    unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
562    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
563                            TII.get(Opc), ImmReg)
564                    .addImm(CI->getZExtValue()));
565    return ImmReg;
566  }
567
568  // Use MVN to emit negative constants.
569  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
570    unsigned Imm = (unsigned)~(CI->getSExtValue());
571    bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
572      (ARM_AM::getSOImmVal(Imm) != -1);
573    if (UseImm) {
574      unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
575      unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
576      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
577                              TII.get(Opc), ImmReg)
578                      .addImm(Imm));
579      return ImmReg;
580    }
581  }
582
583  // Load from constant pool.  For now 32-bit only.
584  if (VT != MVT::i32)
585    return false;
586
587  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
588
589  // MachineConstantPool wants an explicit alignment.
590  unsigned Align = TD.getPrefTypeAlignment(C->getType());
591  if (Align == 0) {
592    // TODO: Figure out if this is correct.
593    Align = TD.getTypeAllocSize(C->getType());
594  }
595  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
596
597  if (isThumb2)
598    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
599                            TII.get(ARM::t2LDRpci), DestReg)
600                    .addConstantPoolIndex(Idx));
601  else
602    // The extra immediate is for addrmode2.
603    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
604                            TII.get(ARM::LDRcp), DestReg)
605                    .addConstantPoolIndex(Idx)
606                    .addImm(0));
607
608  return DestReg;
609}
610
611unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
612  // For now 32-bit only.
613  if (VT != MVT::i32) return 0;
614
615  Reloc::Model RelocM = TM.getRelocationModel();
616
617  // TODO: Need more magic for ARM PIC.
618  if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
619
620  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
621
622  // Use movw+movt when possible, it avoids constant pool entries.
623  // Darwin targets don't support movt with Reloc::Static, see
624  // ARMTargetLowering::LowerGlobalAddressDarwin.  Other targets only support
625  // static movt relocations.
626  if (Subtarget->useMovt() &&
627      Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
628    unsigned Opc;
629    switch (RelocM) {
630    case Reloc::PIC_:
631      Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
632      break;
633    case Reloc::DynamicNoPIC:
634      Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
635      break;
636    default:
637      Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
638      break;
639    }
640    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
641                            DestReg).addGlobalAddress(GV));
642  } else {
643    // MachineConstantPool wants an explicit alignment.
644    unsigned Align = TD.getPrefTypeAlignment(GV->getType());
645    if (Align == 0) {
646      // TODO: Figure out if this is correct.
647      Align = TD.getTypeAllocSize(GV->getType());
648    }
649
650    // Grab index.
651    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
652      (Subtarget->isThumb() ? 4 : 8);
653    unsigned Id = AFI->createPICLabelUId();
654    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
655                                                                ARMCP::CPValue,
656                                                                PCAdj);
657    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
658
659    // Load value.
660    MachineInstrBuilder MIB;
661    if (isThumb2) {
662      unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
663      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
664        .addConstantPoolIndex(Idx);
665      if (RelocM == Reloc::PIC_)
666        MIB.addImm(Id);
667    } else {
668      // The extra immediate is for addrmode2.
669      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
670                    DestReg)
671        .addConstantPoolIndex(Idx)
672        .addImm(0);
673    }
674    AddOptionalDefs(MIB);
675  }
676
677  if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
678    MachineInstrBuilder MIB;
679    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
680    if (isThumb2)
681      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
682                    TII.get(ARM::t2LDRi12), NewDestReg)
683            .addReg(DestReg)
684            .addImm(0);
685    else
686      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
687                    NewDestReg)
688            .addReg(DestReg)
689            .addImm(0);
690    DestReg = NewDestReg;
691    AddOptionalDefs(MIB);
692  }
693
694  return DestReg;
695}
696
697unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
698  EVT VT = TLI.getValueType(C->getType(), true);
699
700  // Only handle simple types.
701  if (!VT.isSimple()) return 0;
702
703  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
704    return ARMMaterializeFP(CFP, VT);
705  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
706    return ARMMaterializeGV(GV, VT);
707  else if (isa<ConstantInt>(C))
708    return ARMMaterializeInt(C, VT);
709
710  return 0;
711}
712
713// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
714
715unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
716  // Don't handle dynamic allocas.
717  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
718
719  MVT VT;
720  if (!isLoadTypeLegal(AI->getType(), VT)) return false;
721
722  DenseMap<const AllocaInst*, int>::iterator SI =
723    FuncInfo.StaticAllocaMap.find(AI);
724
725  // This will get lowered later into the correct offsets and registers
726  // via rewriteXFrameIndex.
727  if (SI != FuncInfo.StaticAllocaMap.end()) {
728    TargetRegisterClass* RC = TLI.getRegClassFor(VT);
729    unsigned ResultReg = createResultReg(RC);
730    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
731    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
732                            TII.get(Opc), ResultReg)
733                            .addFrameIndex(SI->second)
734                            .addImm(0));
735    return ResultReg;
736  }
737
738  return 0;
739}
740
741bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
742  EVT evt = TLI.getValueType(Ty, true);
743
744  // Only handle simple types.
745  if (evt == MVT::Other || !evt.isSimple()) return false;
746  VT = evt.getSimpleVT();
747
748  // Handle all legal types, i.e. a register that will directly hold this
749  // value.
750  return TLI.isTypeLegal(VT);
751}
752
753bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
754  if (isTypeLegal(Ty, VT)) return true;
755
756  // If this is a type than can be sign or zero-extended to a basic operation
757  // go ahead and accept it now.
758  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
759    return true;
760
761  return false;
762}
763
764// Computes the address to get to an object.
765bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
766  // Some boilerplate from the X86 FastISel.
767  const User *U = NULL;
768  unsigned Opcode = Instruction::UserOp1;
769  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
770    // Don't walk into other basic blocks unless the object is an alloca from
771    // another block, otherwise it may not have a virtual register assigned.
772    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
773        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
774      Opcode = I->getOpcode();
775      U = I;
776    }
777  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
778    Opcode = C->getOpcode();
779    U = C;
780  }
781
782  if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
783    if (Ty->getAddressSpace() > 255)
784      // Fast instruction selection doesn't support the special
785      // address spaces.
786      return false;
787
788  switch (Opcode) {
789    default:
790    break;
791    case Instruction::BitCast: {
792      // Look through bitcasts.
793      return ARMComputeAddress(U->getOperand(0), Addr);
794    }
795    case Instruction::IntToPtr: {
796      // Look past no-op inttoptrs.
797      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
798        return ARMComputeAddress(U->getOperand(0), Addr);
799      break;
800    }
801    case Instruction::PtrToInt: {
802      // Look past no-op ptrtoints.
803      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
804        return ARMComputeAddress(U->getOperand(0), Addr);
805      break;
806    }
807    case Instruction::GetElementPtr: {
808      Address SavedAddr = Addr;
809      int TmpOffset = Addr.Offset;
810
811      // Iterate through the GEP folding the constants into offsets where
812      // we can.
813      gep_type_iterator GTI = gep_type_begin(U);
814      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
815           i != e; ++i, ++GTI) {
816        const Value *Op = *i;
817        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
818          const StructLayout *SL = TD.getStructLayout(STy);
819          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
820          TmpOffset += SL->getElementOffset(Idx);
821        } else {
822          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
823          for (;;) {
824            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
825              // Constant-offset addressing.
826              TmpOffset += CI->getSExtValue() * S;
827              break;
828            }
829            if (isa<AddOperator>(Op) &&
830                (!isa<Instruction>(Op) ||
831                 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
832                 == FuncInfo.MBB) &&
833                isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
834              // An add (in the same block) with a constant operand. Fold the
835              // constant.
836              ConstantInt *CI =
837              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
838              TmpOffset += CI->getSExtValue() * S;
839              // Iterate on the other operand.
840              Op = cast<AddOperator>(Op)->getOperand(0);
841              continue;
842            }
843            // Unsupported
844            goto unsupported_gep;
845          }
846        }
847      }
848
849      // Try to grab the base operand now.
850      Addr.Offset = TmpOffset;
851      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
852
853      // We failed, restore everything and try the other options.
854      Addr = SavedAddr;
855
856      unsupported_gep:
857      break;
858    }
859    case Instruction::Alloca: {
860      const AllocaInst *AI = cast<AllocaInst>(Obj);
861      DenseMap<const AllocaInst*, int>::iterator SI =
862        FuncInfo.StaticAllocaMap.find(AI);
863      if (SI != FuncInfo.StaticAllocaMap.end()) {
864        Addr.BaseType = Address::FrameIndexBase;
865        Addr.Base.FI = SI->second;
866        return true;
867      }
868      break;
869    }
870  }
871
872  // Try to get this in a register if nothing else has worked.
873  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
874  return Addr.Base.Reg != 0;
875}
876
877void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
878
879  assert(VT.isSimple() && "Non-simple types are invalid here!");
880
881  bool needsLowering = false;
882  switch (VT.getSimpleVT().SimpleTy) {
883    default:
884      assert(false && "Unhandled load/store type!");
885      break;
886    case MVT::i1:
887    case MVT::i8:
888    case MVT::i16:
889    case MVT::i32:
890      if (!useAM3) {
891        // Integer loads/stores handle 12-bit offsets.
892        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
893        // Handle negative offsets.
894        if (needsLowering && isThumb2)
895          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
896                            Addr.Offset > -256);
897      } else {
898        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
899        needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
900      }
901      break;
902    case MVT::f32:
903    case MVT::f64:
904      // Floating point operands handle 8-bit offsets.
905      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
906      break;
907  }
908
909  // If this is a stack pointer and the offset needs to be simplified then
910  // put the alloca address into a register, set the base type back to
911  // register and continue. This should almost never happen.
912  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
913    TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass :
914                              ARM::GPRRegisterClass;
915    unsigned ResultReg = createResultReg(RC);
916    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
917    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
918                            TII.get(Opc), ResultReg)
919                            .addFrameIndex(Addr.Base.FI)
920                            .addImm(0));
921    Addr.Base.Reg = ResultReg;
922    Addr.BaseType = Address::RegBase;
923  }
924
925  // Since the offset is too large for the load/store instruction
926  // get the reg+offset into a register.
927  if (needsLowering) {
928    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
929                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
930    Addr.Offset = 0;
931  }
932}
933
934void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
935                                       const MachineInstrBuilder &MIB,
936                                       unsigned Flags, bool useAM3) {
937  // addrmode5 output depends on the selection dag addressing dividing the
938  // offset by 4 that it then later multiplies. Do this here as well.
939  if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
940      VT.getSimpleVT().SimpleTy == MVT::f64)
941    Addr.Offset /= 4;
942
943  // Frame base works a bit differently. Handle it separately.
944  if (Addr.BaseType == Address::FrameIndexBase) {
945    int FI = Addr.Base.FI;
946    int Offset = Addr.Offset;
947    MachineMemOperand *MMO =
948          FuncInfo.MF->getMachineMemOperand(
949                                  MachinePointerInfo::getFixedStack(FI, Offset),
950                                  Flags,
951                                  MFI.getObjectSize(FI),
952                                  MFI.getObjectAlignment(FI));
953    // Now add the rest of the operands.
954    MIB.addFrameIndex(FI);
955
956    // ARM halfword load/stores and signed byte loads need an additional
957    // operand.
958    if (useAM3) {
959      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
960      MIB.addReg(0);
961      MIB.addImm(Imm);
962    } else {
963      MIB.addImm(Addr.Offset);
964    }
965    MIB.addMemOperand(MMO);
966  } else {
967    // Now add the rest of the operands.
968    MIB.addReg(Addr.Base.Reg);
969
970    // ARM halfword load/stores and signed byte loads need an additional
971    // operand.
972    if (useAM3) {
973      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
974      MIB.addReg(0);
975      MIB.addImm(Imm);
976    } else {
977      MIB.addImm(Addr.Offset);
978    }
979  }
980  AddOptionalDefs(MIB);
981}
982
983bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
984                              unsigned Alignment, bool isZExt, bool allocReg) {
985  assert(VT.isSimple() && "Non-simple types are invalid here!");
986  unsigned Opc;
987  bool useAM3 = false;
988  bool needVMOV = false;
989  TargetRegisterClass *RC;
990  switch (VT.getSimpleVT().SimpleTy) {
991    // This is mostly going to be Neon/vector support.
992    default: return false;
993    case MVT::i1:
994    case MVT::i8:
995      if (isThumb2) {
996        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
997          Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
998        else
999          Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
1000      } else {
1001        if (isZExt) {
1002          Opc = ARM::LDRBi12;
1003        } else {
1004          Opc = ARM::LDRSB;
1005          useAM3 = true;
1006        }
1007      }
1008      RC = ARM::GPRRegisterClass;
1009      break;
1010    case MVT::i16:
1011      if (isThumb2) {
1012        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1013          Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
1014        else
1015          Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
1016      } else {
1017        Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
1018        useAM3 = true;
1019      }
1020      RC = ARM::GPRRegisterClass;
1021      break;
1022    case MVT::i32:
1023      if (isThumb2) {
1024        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1025          Opc = ARM::t2LDRi8;
1026        else
1027          Opc = ARM::t2LDRi12;
1028      } else {
1029        Opc = ARM::LDRi12;
1030      }
1031      RC = ARM::GPRRegisterClass;
1032      break;
1033    case MVT::f32:
1034      if (!Subtarget->hasVFP2()) return false;
1035      // Unaligned loads need special handling. Floats require word-alignment.
1036      if (Alignment && Alignment < 4) {
1037        needVMOV = true;
1038        VT = MVT::i32;
1039        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1040        RC = ARM::GPRRegisterClass;
1041      } else {
1042        Opc = ARM::VLDRS;
1043        RC = TLI.getRegClassFor(VT);
1044      }
1045      break;
1046    case MVT::f64:
1047      if (!Subtarget->hasVFP2()) return false;
1048      // FIXME: Unaligned loads need special handling.  Doublewords require
1049      // word-alignment.
1050      if (Alignment && Alignment < 4)
1051        return false;
1052
1053      Opc = ARM::VLDRD;
1054      RC = TLI.getRegClassFor(VT);
1055      break;
1056  }
1057  // Simplify this down to something we can handle.
1058  ARMSimplifyAddress(Addr, VT, useAM3);
1059
1060  // Create the base instruction, then add the operands.
1061  if (allocReg)
1062    ResultReg = createResultReg(RC);
1063  assert (ResultReg > 255 && "Expected an allocated virtual register.");
1064  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1065                                    TII.get(Opc), ResultReg);
1066  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1067
1068  // If we had an unaligned load of a float we've converted it to an regular
1069  // load.  Now we must move from the GRP to the FP register.
1070  if (needVMOV) {
1071    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1072    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1073                            TII.get(ARM::VMOVSR), MoveReg)
1074                    .addReg(ResultReg));
1075    ResultReg = MoveReg;
1076  }
1077  return true;
1078}
1079
1080bool ARMFastISel::SelectLoad(const Instruction *I) {
1081  // Atomic loads need special handling.
1082  if (cast<LoadInst>(I)->isAtomic())
1083    return false;
1084
1085  // Verify we have a legal type before going any further.
1086  MVT VT;
1087  if (!isLoadTypeLegal(I->getType(), VT))
1088    return false;
1089
1090  // See if we can handle this address.
1091  Address Addr;
1092  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1093
1094  unsigned ResultReg;
1095  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1096    return false;
1097  UpdateValueMap(I, ResultReg);
1098  return true;
1099}
1100
1101bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
1102                               unsigned Alignment) {
1103  unsigned StrOpc;
1104  bool useAM3 = false;
1105  switch (VT.getSimpleVT().SimpleTy) {
1106    // This is mostly going to be Neon/vector support.
1107    default: return false;
1108    case MVT::i1: {
1109      unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
1110                                               ARM::GPRRegisterClass);
1111      unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1112      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1113                              TII.get(Opc), Res)
1114                      .addReg(SrcReg).addImm(1));
1115      SrcReg = Res;
1116    } // Fallthrough here.
1117    case MVT::i8:
1118      if (isThumb2) {
1119        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1120          StrOpc = ARM::t2STRBi8;
1121        else
1122          StrOpc = ARM::t2STRBi12;
1123      } else {
1124        StrOpc = ARM::STRBi12;
1125      }
1126      break;
1127    case MVT::i16:
1128      if (isThumb2) {
1129        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1130          StrOpc = ARM::t2STRHi8;
1131        else
1132          StrOpc = ARM::t2STRHi12;
1133      } else {
1134        StrOpc = ARM::STRH;
1135        useAM3 = true;
1136      }
1137      break;
1138    case MVT::i32:
1139      if (isThumb2) {
1140        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1141          StrOpc = ARM::t2STRi8;
1142        else
1143          StrOpc = ARM::t2STRi12;
1144      } else {
1145        StrOpc = ARM::STRi12;
1146      }
1147      break;
1148    case MVT::f32:
1149      if (!Subtarget->hasVFP2()) return false;
1150      // Unaligned stores need special handling. Floats require word-alignment.
1151      if (Alignment && Alignment < 4) {
1152        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1153        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1154                                TII.get(ARM::VMOVRS), MoveReg)
1155                        .addReg(SrcReg));
1156        SrcReg = MoveReg;
1157        VT = MVT::i32;
1158        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1159      } else {
1160        StrOpc = ARM::VSTRS;
1161      }
1162      break;
1163    case MVT::f64:
1164      if (!Subtarget->hasVFP2()) return false;
1165      // FIXME: Unaligned stores need special handling.  Doublewords require
1166      // word-alignment.
1167      if (Alignment && Alignment < 4)
1168          return false;
1169
1170      StrOpc = ARM::VSTRD;
1171      break;
1172  }
1173  // Simplify this down to something we can handle.
1174  ARMSimplifyAddress(Addr, VT, useAM3);
1175
1176  // Create the base instruction, then add the operands.
1177  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1178                                    TII.get(StrOpc))
1179                            .addReg(SrcReg);
1180  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1181  return true;
1182}
1183
1184bool ARMFastISel::SelectStore(const Instruction *I) {
1185  Value *Op0 = I->getOperand(0);
1186  unsigned SrcReg = 0;
1187
1188  // Atomic stores need special handling.
1189  if (cast<StoreInst>(I)->isAtomic())
1190    return false;
1191
1192  // Verify we have a legal type before going any further.
1193  MVT VT;
1194  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1195    return false;
1196
1197  // Get the value to be stored into a register.
1198  SrcReg = getRegForValue(Op0);
1199  if (SrcReg == 0) return false;
1200
1201  // See if we can handle this address.
1202  Address Addr;
1203  if (!ARMComputeAddress(I->getOperand(1), Addr))
1204    return false;
1205
1206  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1207    return false;
1208  return true;
1209}
1210
1211static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1212  switch (Pred) {
1213    // Needs two compares...
1214    case CmpInst::FCMP_ONE:
1215    case CmpInst::FCMP_UEQ:
1216    default:
1217      // AL is our "false" for now. The other two need more compares.
1218      return ARMCC::AL;
1219    case CmpInst::ICMP_EQ:
1220    case CmpInst::FCMP_OEQ:
1221      return ARMCC::EQ;
1222    case CmpInst::ICMP_SGT:
1223    case CmpInst::FCMP_OGT:
1224      return ARMCC::GT;
1225    case CmpInst::ICMP_SGE:
1226    case CmpInst::FCMP_OGE:
1227      return ARMCC::GE;
1228    case CmpInst::ICMP_UGT:
1229    case CmpInst::FCMP_UGT:
1230      return ARMCC::HI;
1231    case CmpInst::FCMP_OLT:
1232      return ARMCC::MI;
1233    case CmpInst::ICMP_ULE:
1234    case CmpInst::FCMP_OLE:
1235      return ARMCC::LS;
1236    case CmpInst::FCMP_ORD:
1237      return ARMCC::VC;
1238    case CmpInst::FCMP_UNO:
1239      return ARMCC::VS;
1240    case CmpInst::FCMP_UGE:
1241      return ARMCC::PL;
1242    case CmpInst::ICMP_SLT:
1243    case CmpInst::FCMP_ULT:
1244      return ARMCC::LT;
1245    case CmpInst::ICMP_SLE:
1246    case CmpInst::FCMP_ULE:
1247      return ARMCC::LE;
1248    case CmpInst::FCMP_UNE:
1249    case CmpInst::ICMP_NE:
1250      return ARMCC::NE;
1251    case CmpInst::ICMP_UGE:
1252      return ARMCC::HS;
1253    case CmpInst::ICMP_ULT:
1254      return ARMCC::LO;
1255  }
1256}
1257
1258bool ARMFastISel::SelectBranch(const Instruction *I) {
1259  const BranchInst *BI = cast<BranchInst>(I);
1260  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1261  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1262
1263  // Simple branch support.
1264
1265  // If we can, avoid recomputing the compare - redoing it could lead to wonky
1266  // behavior.
1267  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1268    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1269
1270      // Get the compare predicate.
1271      // Try to take advantage of fallthrough opportunities.
1272      CmpInst::Predicate Predicate = CI->getPredicate();
1273      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1274        std::swap(TBB, FBB);
1275        Predicate = CmpInst::getInversePredicate(Predicate);
1276      }
1277
1278      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1279
1280      // We may not handle every CC for now.
1281      if (ARMPred == ARMCC::AL) return false;
1282
1283      // Emit the compare.
1284      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1285        return false;
1286
1287      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1288      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1289      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1290      FastEmitBranch(FBB, DL);
1291      FuncInfo.MBB->addSuccessor(TBB);
1292      return true;
1293    }
1294  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1295    MVT SourceVT;
1296    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1297        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1298      unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1299      unsigned OpReg = getRegForValue(TI->getOperand(0));
1300      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1301                              TII.get(TstOpc))
1302                      .addReg(OpReg).addImm(1));
1303
1304      unsigned CCMode = ARMCC::NE;
1305      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1306        std::swap(TBB, FBB);
1307        CCMode = ARMCC::EQ;
1308      }
1309
1310      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1311      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1312      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1313
1314      FastEmitBranch(FBB, DL);
1315      FuncInfo.MBB->addSuccessor(TBB);
1316      return true;
1317    }
1318  } else if (const ConstantInt *CI =
1319             dyn_cast<ConstantInt>(BI->getCondition())) {
1320    uint64_t Imm = CI->getZExtValue();
1321    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1322    FastEmitBranch(Target, DL);
1323    return true;
1324  }
1325
1326  unsigned CmpReg = getRegForValue(BI->getCondition());
1327  if (CmpReg == 0) return false;
1328
1329  // We've been divorced from our compare!  Our block was split, and
1330  // now our compare lives in a predecessor block.  We musn't
1331  // re-compare here, as the children of the compare aren't guaranteed
1332  // live across the block boundary (we *could* check for this).
1333  // Regardless, the compare has been done in the predecessor block,
1334  // and it left a value for us in a virtual register.  Ergo, we test
1335  // the one-bit value left in the virtual register.
1336  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1337  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
1338                  .addReg(CmpReg).addImm(1));
1339
1340  unsigned CCMode = ARMCC::NE;
1341  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1342    std::swap(TBB, FBB);
1343    CCMode = ARMCC::EQ;
1344  }
1345
1346  unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1347  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1348                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1349  FastEmitBranch(FBB, DL);
1350  FuncInfo.MBB->addSuccessor(TBB);
1351  return true;
1352}
1353
1354bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1355                             bool isZExt) {
1356  Type *Ty = Src1Value->getType();
1357  EVT SrcVT = TLI.getValueType(Ty, true);
1358  if (!SrcVT.isSimple()) return false;
1359
1360  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
1361  if (isFloat && !Subtarget->hasVFP2())
1362    return false;
1363
1364  // Check to see if the 2nd operand is a constant that we can encode directly
1365  // in the compare.
1366  int Imm = 0;
1367  bool UseImm = false;
1368  bool isNegativeImm = false;
1369  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1370  // Thus, Src1Value may be a ConstantInt, but we're missing it.
1371  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1372    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1373        SrcVT == MVT::i1) {
1374      const APInt &CIVal = ConstInt->getValue();
1375      Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1376      if (Imm < 0) {
1377        isNegativeImm = true;
1378        Imm = -Imm;
1379      }
1380      UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1381        (ARM_AM::getSOImmVal(Imm) != -1);
1382    }
1383  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1384    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1385      if (ConstFP->isZero() && !ConstFP->isNegative())
1386        UseImm = true;
1387  }
1388
1389  unsigned CmpOpc;
1390  bool isICmp = true;
1391  bool needsExt = false;
1392  switch (SrcVT.getSimpleVT().SimpleTy) {
1393    default: return false;
1394    // TODO: Verify compares.
1395    case MVT::f32:
1396      isICmp = false;
1397      CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
1398      break;
1399    case MVT::f64:
1400      isICmp = false;
1401      CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1402      break;
1403    case MVT::i1:
1404    case MVT::i8:
1405    case MVT::i16:
1406      needsExt = true;
1407    // Intentional fall-through.
1408    case MVT::i32:
1409      if (isThumb2) {
1410        if (!UseImm)
1411          CmpOpc = ARM::t2CMPrr;
1412        else
1413          CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
1414      } else {
1415        if (!UseImm)
1416          CmpOpc = ARM::CMPrr;
1417        else
1418          CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
1419      }
1420      break;
1421  }
1422
1423  unsigned SrcReg1 = getRegForValue(Src1Value);
1424  if (SrcReg1 == 0) return false;
1425
1426  unsigned SrcReg2 = 0;
1427  if (!UseImm) {
1428    SrcReg2 = getRegForValue(Src2Value);
1429    if (SrcReg2 == 0) return false;
1430  }
1431
1432  // We have i1, i8, or i16, we need to either zero extend or sign extend.
1433  if (needsExt) {
1434    unsigned ResultReg;
1435    ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1436    if (ResultReg == 0) return false;
1437    SrcReg1 = ResultReg;
1438    if (!UseImm) {
1439      ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1440      if (ResultReg == 0) return false;
1441      SrcReg2 = ResultReg;
1442    }
1443  }
1444
1445  if (!UseImm) {
1446    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1447                            TII.get(CmpOpc))
1448                    .addReg(SrcReg1).addReg(SrcReg2));
1449  } else {
1450    MachineInstrBuilder MIB;
1451    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1452      .addReg(SrcReg1);
1453
1454    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1455    if (isICmp)
1456      MIB.addImm(Imm);
1457    AddOptionalDefs(MIB);
1458  }
1459
1460  // For floating point we need to move the result to a comparison register
1461  // that we can then use for branches.
1462  if (Ty->isFloatTy() || Ty->isDoubleTy())
1463    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1464                            TII.get(ARM::FMSTAT)));
1465  return true;
1466}
1467
1468bool ARMFastISel::SelectCmp(const Instruction *I) {
1469  const CmpInst *CI = cast<CmpInst>(I);
1470  Type *Ty = CI->getOperand(0)->getType();
1471
1472  // Get the compare predicate.
1473  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1474
1475  // We may not handle every CC for now.
1476  if (ARMPred == ARMCC::AL) return false;
1477
1478  // Emit the compare.
1479  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1480    return false;
1481
1482  // Now set a register based on the comparison. Explicitly set the predicates
1483  // here.
1484  unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1485  TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
1486                                    : ARM::GPRRegisterClass;
1487  unsigned DestReg = createResultReg(RC);
1488  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1489  unsigned ZeroReg = TargetMaterializeConstant(Zero);
1490  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
1491  unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR;
1492  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
1493          .addReg(ZeroReg).addImm(1)
1494          .addImm(ARMPred).addReg(CondReg);
1495
1496  UpdateValueMap(I, DestReg);
1497  return true;
1498}
1499
1500bool ARMFastISel::SelectFPExt(const Instruction *I) {
1501  // Make sure we have VFP and that we're extending float to double.
1502  if (!Subtarget->hasVFP2()) return false;
1503
1504  Value *V = I->getOperand(0);
1505  if (!I->getType()->isDoubleTy() ||
1506      !V->getType()->isFloatTy()) return false;
1507
1508  unsigned Op = getRegForValue(V);
1509  if (Op == 0) return false;
1510
1511  unsigned Result = createResultReg(ARM::DPRRegisterClass);
1512  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1513                          TII.get(ARM::VCVTDS), Result)
1514                  .addReg(Op));
1515  UpdateValueMap(I, Result);
1516  return true;
1517}
1518
1519bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1520  // Make sure we have VFP and that we're truncating double to float.
1521  if (!Subtarget->hasVFP2()) return false;
1522
1523  Value *V = I->getOperand(0);
1524  if (!(I->getType()->isFloatTy() &&
1525        V->getType()->isDoubleTy())) return false;
1526
1527  unsigned Op = getRegForValue(V);
1528  if (Op == 0) return false;
1529
1530  unsigned Result = createResultReg(ARM::SPRRegisterClass);
1531  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1532                          TII.get(ARM::VCVTSD), Result)
1533                  .addReg(Op));
1534  UpdateValueMap(I, Result);
1535  return true;
1536}
1537
1538bool ARMFastISel::SelectIToFP(const Instruction *I, bool isZExt) {
1539  // Make sure we have VFP.
1540  if (!Subtarget->hasVFP2()) return false;
1541
1542  MVT DstVT;
1543  Type *Ty = I->getType();
1544  if (!isTypeLegal(Ty, DstVT))
1545    return false;
1546
1547  Value *Src = I->getOperand(0);
1548  EVT SrcVT = TLI.getValueType(Src->getType(), true);
1549  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1550    return false;
1551
1552  unsigned SrcReg = getRegForValue(Src);
1553  if (SrcReg == 0) return false;
1554
1555  // Handle sign-extension.
1556  if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1557    EVT DestVT = MVT::i32;
1558    unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
1559    if (ResultReg == 0) return false;
1560    SrcReg = ResultReg;
1561  }
1562
1563  // The conversion routine works on fp-reg to fp-reg and the operand above
1564  // was an integer, move it to the fp registers if possible.
1565  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1566  if (FP == 0) return false;
1567
1568  unsigned Opc;
1569  if (Ty->isFloatTy()) Opc = isZExt ? ARM::VUITOS : ARM::VSITOS;
1570  else if (Ty->isDoubleTy()) Opc = isZExt ? ARM::VUITOD : ARM::VSITOD;
1571  else return false;
1572
1573  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1574  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1575                          ResultReg)
1576                  .addReg(FP));
1577  UpdateValueMap(I, ResultReg);
1578  return true;
1579}
1580
1581bool ARMFastISel::SelectFPToSI(const Instruction *I) {
1582  // Make sure we have VFP.
1583  if (!Subtarget->hasVFP2()) return false;
1584
1585  MVT DstVT;
1586  Type *RetTy = I->getType();
1587  if (!isTypeLegal(RetTy, DstVT))
1588    return false;
1589
1590  unsigned Op = getRegForValue(I->getOperand(0));
1591  if (Op == 0) return false;
1592
1593  unsigned Opc;
1594  Type *OpTy = I->getOperand(0)->getType();
1595  if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
1596  else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
1597  else return false;
1598
1599  // f64->s32 or f32->s32 both need an intermediate f32 reg.
1600  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1601  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1602                          ResultReg)
1603                  .addReg(Op));
1604
1605  // This result needs to be in an integer register, but the conversion only
1606  // takes place in fp-regs.
1607  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1608  if (IntReg == 0) return false;
1609
1610  UpdateValueMap(I, IntReg);
1611  return true;
1612}
1613
1614bool ARMFastISel::SelectSelect(const Instruction *I) {
1615  MVT VT;
1616  if (!isTypeLegal(I->getType(), VT))
1617    return false;
1618
1619  // Things need to be register sized for register moves.
1620  if (VT != MVT::i32) return false;
1621  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1622
1623  unsigned CondReg = getRegForValue(I->getOperand(0));
1624  if (CondReg == 0) return false;
1625  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1626  if (Op1Reg == 0) return false;
1627
1628  // Check to see if we can use an immediate in the conditional move.
1629  int Imm = 0;
1630  bool UseImm = false;
1631  bool isNegativeImm = false;
1632  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1633    assert (VT == MVT::i32 && "Expecting an i32.");
1634    Imm = (int)ConstInt->getValue().getZExtValue();
1635    if (Imm < 0) {
1636      isNegativeImm = true;
1637      Imm = ~Imm;
1638    }
1639    UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1640      (ARM_AM::getSOImmVal(Imm) != -1);
1641  }
1642
1643  unsigned Op2Reg = 0;
1644  if (!UseImm) {
1645    Op2Reg = getRegForValue(I->getOperand(2));
1646    if (Op2Reg == 0) return false;
1647  }
1648
1649  unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
1650  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1651                  .addReg(CondReg).addImm(0));
1652
1653  unsigned MovCCOpc;
1654  if (!UseImm) {
1655    MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1656  } else {
1657    if (!isNegativeImm) {
1658      MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1659    } else {
1660      MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1661    }
1662  }
1663  unsigned ResultReg = createResultReg(RC);
1664  if (!UseImm)
1665    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1666    .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
1667  else
1668    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1669    .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
1670  UpdateValueMap(I, ResultReg);
1671  return true;
1672}
1673
1674bool ARMFastISel::SelectSDiv(const Instruction *I) {
1675  MVT VT;
1676  Type *Ty = I->getType();
1677  if (!isTypeLegal(Ty, VT))
1678    return false;
1679
1680  // If we have integer div support we should have selected this automagically.
1681  // In case we have a real miss go ahead and return false and we'll pick
1682  // it up later.
1683  if (Subtarget->hasDivide()) return false;
1684
1685  // Otherwise emit a libcall.
1686  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1687  if (VT == MVT::i8)
1688    LC = RTLIB::SDIV_I8;
1689  else if (VT == MVT::i16)
1690    LC = RTLIB::SDIV_I16;
1691  else if (VT == MVT::i32)
1692    LC = RTLIB::SDIV_I32;
1693  else if (VT == MVT::i64)
1694    LC = RTLIB::SDIV_I64;
1695  else if (VT == MVT::i128)
1696    LC = RTLIB::SDIV_I128;
1697  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1698
1699  return ARMEmitLibcall(I, LC);
1700}
1701
1702bool ARMFastISel::SelectSRem(const Instruction *I) {
1703  MVT VT;
1704  Type *Ty = I->getType();
1705  if (!isTypeLegal(Ty, VT))
1706    return false;
1707
1708  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1709  if (VT == MVT::i8)
1710    LC = RTLIB::SREM_I8;
1711  else if (VT == MVT::i16)
1712    LC = RTLIB::SREM_I16;
1713  else if (VT == MVT::i32)
1714    LC = RTLIB::SREM_I32;
1715  else if (VT == MVT::i64)
1716    LC = RTLIB::SREM_I64;
1717  else if (VT == MVT::i128)
1718    LC = RTLIB::SREM_I128;
1719  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1720
1721  return ARMEmitLibcall(I, LC);
1722}
1723
1724bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
1725  EVT VT  = TLI.getValueType(I->getType(), true);
1726
1727  // We can get here in the case when we want to use NEON for our fp
1728  // operations, but can't figure out how to. Just use the vfp instructions
1729  // if we have them.
1730  // FIXME: It'd be nice to use NEON instructions.
1731  Type *Ty = I->getType();
1732  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1733  if (isFloat && !Subtarget->hasVFP2())
1734    return false;
1735
1736  unsigned Opc;
1737  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1738  switch (ISDOpcode) {
1739    default: return false;
1740    case ISD::FADD:
1741      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1742      break;
1743    case ISD::FSUB:
1744      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1745      break;
1746    case ISD::FMUL:
1747      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1748      break;
1749  }
1750  unsigned Op1 = getRegForValue(I->getOperand(0));
1751  if (Op1 == 0) return false;
1752
1753  unsigned Op2 = getRegForValue(I->getOperand(1));
1754  if (Op2 == 0) return false;
1755
1756  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1757  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1758                          TII.get(Opc), ResultReg)
1759                  .addReg(Op1).addReg(Op2));
1760  UpdateValueMap(I, ResultReg);
1761  return true;
1762}
1763
1764// Call Handling Code
1765
1766// This is largely taken directly from CCAssignFnForNode - we don't support
1767// varargs in FastISel so that part has been removed.
1768// TODO: We may not support all of this.
1769CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
1770  switch (CC) {
1771  default:
1772    llvm_unreachable("Unsupported calling convention");
1773  case CallingConv::Fast:
1774    // Ignore fastcc. Silence compiler warnings.
1775    (void)RetFastCC_ARM_APCS;
1776    (void)FastCC_ARM_APCS;
1777    // Fallthrough
1778  case CallingConv::C:
1779    // Use target triple & subtarget features to do actual dispatch.
1780    if (Subtarget->isAAPCS_ABI()) {
1781      if (Subtarget->hasVFP2() &&
1782          TM.Options.FloatABIType == FloatABI::Hard)
1783        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1784      else
1785        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1786    } else
1787        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1788  case CallingConv::ARM_AAPCS_VFP:
1789    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1790  case CallingConv::ARM_AAPCS:
1791    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1792  case CallingConv::ARM_APCS:
1793    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1794  }
1795}
1796
1797bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1798                                  SmallVectorImpl<unsigned> &ArgRegs,
1799                                  SmallVectorImpl<MVT> &ArgVTs,
1800                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1801                                  SmallVectorImpl<unsigned> &RegArgs,
1802                                  CallingConv::ID CC,
1803                                  unsigned &NumBytes) {
1804  SmallVector<CCValAssign, 16> ArgLocs;
1805  CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1806  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
1807
1808  // Get a count of how many bytes are to be pushed on the stack.
1809  NumBytes = CCInfo.getNextStackOffset();
1810
1811  // Issue CALLSEQ_START
1812  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1813  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1814                          TII.get(AdjStackDown))
1815                  .addImm(NumBytes));
1816
1817  // Process the args.
1818  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1819    CCValAssign &VA = ArgLocs[i];
1820    unsigned Arg = ArgRegs[VA.getValNo()];
1821    MVT ArgVT = ArgVTs[VA.getValNo()];
1822
1823    // We don't handle NEON/vector parameters yet.
1824    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1825      return false;
1826
1827    // Handle arg promotion, etc.
1828    switch (VA.getLocInfo()) {
1829      case CCValAssign::Full: break;
1830      case CCValAssign::SExt: {
1831        MVT DestVT = VA.getLocVT();
1832        unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
1833                                           /*isZExt*/false);
1834        assert (ResultReg != 0 && "Failed to emit a sext");
1835        Arg = ResultReg;
1836        ArgVT = DestVT;
1837        break;
1838      }
1839      case CCValAssign::AExt:
1840        // Intentional fall-through.  Handle AExt and ZExt.
1841      case CCValAssign::ZExt: {
1842        MVT DestVT = VA.getLocVT();
1843        unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
1844                                           /*isZExt*/true);
1845        assert (ResultReg != 0 && "Failed to emit a sext");
1846        Arg = ResultReg;
1847        ArgVT = DestVT;
1848        break;
1849      }
1850      case CCValAssign::BCvt: {
1851        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1852                                 /*TODO: Kill=*/false);
1853        assert(BC != 0 && "Failed to emit a bitcast!");
1854        Arg = BC;
1855        ArgVT = VA.getLocVT();
1856        break;
1857      }
1858      default: llvm_unreachable("Unknown arg promotion!");
1859    }
1860
1861    // Now copy/store arg to correct locations.
1862    if (VA.isRegLoc() && !VA.needsCustom()) {
1863      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1864              VA.getLocReg())
1865        .addReg(Arg);
1866      RegArgs.push_back(VA.getLocReg());
1867    } else if (VA.needsCustom()) {
1868      // TODO: We need custom lowering for vector (v2f64) args.
1869      if (VA.getLocVT() != MVT::f64) return false;
1870
1871      CCValAssign &NextVA = ArgLocs[++i];
1872
1873      // TODO: Only handle register args for now.
1874      if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
1875
1876      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1877                              TII.get(ARM::VMOVRRD), VA.getLocReg())
1878                      .addReg(NextVA.getLocReg(), RegState::Define)
1879                      .addReg(Arg));
1880      RegArgs.push_back(VA.getLocReg());
1881      RegArgs.push_back(NextVA.getLocReg());
1882    } else {
1883      assert(VA.isMemLoc());
1884      // Need to store on the stack.
1885      Address Addr;
1886      Addr.BaseType = Address::RegBase;
1887      Addr.Base.Reg = ARM::SP;
1888      Addr.Offset = VA.getLocMemOffset();
1889
1890      if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
1891    }
1892  }
1893  return true;
1894}
1895
1896bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
1897                             const Instruction *I, CallingConv::ID CC,
1898                             unsigned &NumBytes) {
1899  // Issue CALLSEQ_END
1900  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1901  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1902                          TII.get(AdjStackUp))
1903                  .addImm(NumBytes).addImm(0));
1904
1905  // Now the return value.
1906  if (RetVT != MVT::isVoid) {
1907    SmallVector<CCValAssign, 16> RVLocs;
1908    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1909    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
1910
1911    // Copy all of the result registers out of their specified physreg.
1912    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
1913      // For this move we copy into two registers and then move into the
1914      // double fp reg we want.
1915      EVT DestVT = RVLocs[0].getValVT();
1916      TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
1917      unsigned ResultReg = createResultReg(DstRC);
1918      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1919                              TII.get(ARM::VMOVDRR), ResultReg)
1920                      .addReg(RVLocs[0].getLocReg())
1921                      .addReg(RVLocs[1].getLocReg()));
1922
1923      UsedRegs.push_back(RVLocs[0].getLocReg());
1924      UsedRegs.push_back(RVLocs[1].getLocReg());
1925
1926      // Finally update the result.
1927      UpdateValueMap(I, ResultReg);
1928    } else {
1929      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
1930      EVT CopyVT = RVLocs[0].getValVT();
1931
1932      // Special handling for extended integers.
1933      if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
1934        CopyVT = MVT::i32;
1935
1936      TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1937
1938      unsigned ResultReg = createResultReg(DstRC);
1939      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1940              ResultReg).addReg(RVLocs[0].getLocReg());
1941      UsedRegs.push_back(RVLocs[0].getLocReg());
1942
1943      // Finally update the result.
1944      UpdateValueMap(I, ResultReg);
1945    }
1946  }
1947
1948  return true;
1949}
1950
1951bool ARMFastISel::SelectRet(const Instruction *I) {
1952  const ReturnInst *Ret = cast<ReturnInst>(I);
1953  const Function &F = *I->getParent()->getParent();
1954
1955  if (!FuncInfo.CanLowerReturn)
1956    return false;
1957
1958  if (F.isVarArg())
1959    return false;
1960
1961  CallingConv::ID CC = F.getCallingConv();
1962  if (Ret->getNumOperands() > 0) {
1963    SmallVector<ISD::OutputArg, 4> Outs;
1964    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
1965                  Outs, TLI);
1966
1967    // Analyze operands of the call, assigning locations to each operand.
1968    SmallVector<CCValAssign, 16> ValLocs;
1969    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
1970    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
1971
1972    const Value *RV = Ret->getOperand(0);
1973    unsigned Reg = getRegForValue(RV);
1974    if (Reg == 0)
1975      return false;
1976
1977    // Only handle a single return value for now.
1978    if (ValLocs.size() != 1)
1979      return false;
1980
1981    CCValAssign &VA = ValLocs[0];
1982
1983    // Don't bother handling odd stuff for now.
1984    if (VA.getLocInfo() != CCValAssign::Full)
1985      return false;
1986    // Only handle register returns for now.
1987    if (!VA.isRegLoc())
1988      return false;
1989
1990    unsigned SrcReg = Reg + VA.getValNo();
1991    EVT RVVT = TLI.getValueType(RV->getType());
1992    EVT DestVT = VA.getValVT();
1993    // Special handling for extended integers.
1994    if (RVVT != DestVT) {
1995      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
1996        return false;
1997
1998      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1999        return false;
2000
2001      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2002
2003      bool isZExt = Outs[0].Flags.isZExt();
2004      unsigned ResultReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2005      if (ResultReg == 0) return false;
2006      SrcReg = ResultReg;
2007    }
2008
2009    // Make the copy.
2010    unsigned DstReg = VA.getLocReg();
2011    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2012    // Avoid a cross-class copy. This is very unlikely.
2013    if (!SrcRC->contains(DstReg))
2014      return false;
2015    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2016            DstReg).addReg(SrcReg);
2017
2018    // Mark the register as live out of the function.
2019    MRI.addLiveOut(VA.getLocReg());
2020  }
2021
2022  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
2023  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2024                          TII.get(RetOpc)));
2025  return true;
2026}
2027
2028unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
2029
2030  // iOS needs the r9 versions of the opcodes.
2031  bool isiOS = Subtarget->isTargetIOS();
2032  if (isThumb2) {
2033    return isiOS ? ARM::tBLr9 : ARM::tBL;
2034  } else  {
2035    return isiOS ? ARM::BLr9 : ARM::BL;
2036  }
2037}
2038
2039// A quick function that will emit a call for a named libcall in F with the
2040// vector of passed arguments for the Instruction in I. We can assume that we
2041// can emit a call for any libcall we can produce. This is an abridged version
2042// of the full call infrastructure since we won't need to worry about things
2043// like computed function pointers or strange arguments at call sites.
2044// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2045// with X86.
2046bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2047  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2048
2049  // Handle *simple* calls for now.
2050  Type *RetTy = I->getType();
2051  MVT RetVT;
2052  if (RetTy->isVoidTy())
2053    RetVT = MVT::isVoid;
2054  else if (!isTypeLegal(RetTy, RetVT))
2055    return false;
2056
2057  // TODO: For now if we have long calls specified we don't handle the call.
2058  if (EnableARMLongCalls) return false;
2059
2060  // Set up the argument vectors.
2061  SmallVector<Value*, 8> Args;
2062  SmallVector<unsigned, 8> ArgRegs;
2063  SmallVector<MVT, 8> ArgVTs;
2064  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2065  Args.reserve(I->getNumOperands());
2066  ArgRegs.reserve(I->getNumOperands());
2067  ArgVTs.reserve(I->getNumOperands());
2068  ArgFlags.reserve(I->getNumOperands());
2069  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
2070    Value *Op = I->getOperand(i);
2071    unsigned Arg = getRegForValue(Op);
2072    if (Arg == 0) return false;
2073
2074    Type *ArgTy = Op->getType();
2075    MVT ArgVT;
2076    if (!isTypeLegal(ArgTy, ArgVT)) return false;
2077
2078    ISD::ArgFlagsTy Flags;
2079    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2080    Flags.setOrigAlign(OriginalAlignment);
2081
2082    Args.push_back(Op);
2083    ArgRegs.push_back(Arg);
2084    ArgVTs.push_back(ArgVT);
2085    ArgFlags.push_back(Flags);
2086  }
2087
2088  // Handle the arguments now that we've gotten them.
2089  SmallVector<unsigned, 4> RegArgs;
2090  unsigned NumBytes;
2091  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
2092    return false;
2093
2094  // Issue the call, BLr9 for iOS, BL otherwise.
2095  // TODO: Turn this into the table of arm call ops.
2096  MachineInstrBuilder MIB;
2097  unsigned CallOpc = ARMSelectCallOp(NULL);
2098  if(isThumb2)
2099    // Explicitly adding the predicate here.
2100    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2101                         TII.get(CallOpc)))
2102                         .addExternalSymbol(TLI.getLibcallName(Call));
2103  else
2104    // Explicitly adding the predicate here.
2105    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2106                         TII.get(CallOpc))
2107          .addExternalSymbol(TLI.getLibcallName(Call)));
2108
2109  // Add implicit physical register uses to the call.
2110  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2111    MIB.addReg(RegArgs[i]);
2112
2113  // Finish off the call including any return values.
2114  SmallVector<unsigned, 4> UsedRegs;
2115  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
2116
2117  // Set all unused physreg defs as dead.
2118  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2119
2120  return true;
2121}
2122
2123bool ARMFastISel::SelectCall(const Instruction *I,
2124                             const char *IntrMemName = 0) {
2125  const CallInst *CI = cast<CallInst>(I);
2126  const Value *Callee = CI->getCalledValue();
2127
2128  // Can't handle inline asm.
2129  if (isa<InlineAsm>(Callee)) return false;
2130
2131  // Only handle global variable Callees.
2132  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2133  if (!GV)
2134    return false;
2135
2136  // Check the calling convention.
2137  ImmutableCallSite CS(CI);
2138  CallingConv::ID CC = CS.getCallingConv();
2139
2140  // TODO: Avoid some calling conventions?
2141
2142  // Let SDISel handle vararg functions.
2143  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
2144  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
2145  if (FTy->isVarArg())
2146    return false;
2147
2148  // Handle *simple* calls for now.
2149  Type *RetTy = I->getType();
2150  MVT RetVT;
2151  if (RetTy->isVoidTy())
2152    RetVT = MVT::isVoid;
2153  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2154           RetVT != MVT::i8  && RetVT != MVT::i1)
2155    return false;
2156
2157  // TODO: For now if we have long calls specified we don't handle the call.
2158  if (EnableARMLongCalls) return false;
2159
2160  // Set up the argument vectors.
2161  SmallVector<Value*, 8> Args;
2162  SmallVector<unsigned, 8> ArgRegs;
2163  SmallVector<MVT, 8> ArgVTs;
2164  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2165  Args.reserve(CS.arg_size());
2166  ArgRegs.reserve(CS.arg_size());
2167  ArgVTs.reserve(CS.arg_size());
2168  ArgFlags.reserve(CS.arg_size());
2169  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2170       i != e; ++i) {
2171    // If we're lowering a memory intrinsic instead of a regular call, skip the
2172    // last two arguments, which shouldn't be passed to the underlying function.
2173    if (IntrMemName && e-i <= 2)
2174      break;
2175
2176    ISD::ArgFlagsTy Flags;
2177    unsigned AttrInd = i - CS.arg_begin() + 1;
2178    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
2179      Flags.setSExt();
2180    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
2181      Flags.setZExt();
2182
2183    // FIXME: Only handle *easy* calls for now.
2184    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
2185        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
2186        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
2187        CS.paramHasAttr(AttrInd, Attribute::ByVal))
2188      return false;
2189
2190    Type *ArgTy = (*i)->getType();
2191    MVT ArgVT;
2192    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2193        ArgVT != MVT::i1)
2194      return false;
2195
2196    unsigned Arg = getRegForValue(*i);
2197    if (Arg == 0)
2198      return false;
2199
2200    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2201    Flags.setOrigAlign(OriginalAlignment);
2202
2203    Args.push_back(*i);
2204    ArgRegs.push_back(Arg);
2205    ArgVTs.push_back(ArgVT);
2206    ArgFlags.push_back(Flags);
2207  }
2208
2209  // Handle the arguments now that we've gotten them.
2210  SmallVector<unsigned, 4> RegArgs;
2211  unsigned NumBytes;
2212  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
2213    return false;
2214
2215  // Issue the call, BLr9 for iOS, BL otherwise.
2216  // TODO: Turn this into the table of arm call ops.
2217  MachineInstrBuilder MIB;
2218  unsigned CallOpc = ARMSelectCallOp(GV);
2219  // Explicitly adding the predicate here.
2220  if(isThumb2) {
2221    // Explicitly adding the predicate here.
2222    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2223                                 TII.get(CallOpc)));
2224    if (!IntrMemName)
2225      MIB.addGlobalAddress(GV, 0, 0);
2226    else
2227      MIB.addExternalSymbol(IntrMemName, 0);
2228  } else {
2229    if (!IntrMemName)
2230      // Explicitly adding the predicate here.
2231      MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2232                                   TII.get(CallOpc))
2233            .addGlobalAddress(GV, 0, 0));
2234    else
2235      MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2236                                   TII.get(CallOpc))
2237            .addExternalSymbol(IntrMemName, 0));
2238  }
2239
2240  // Add implicit physical register uses to the call.
2241  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2242    MIB.addReg(RegArgs[i]);
2243
2244  // Finish off the call including any return values.
2245  SmallVector<unsigned, 4> UsedRegs;
2246  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
2247
2248  // Set all unused physreg defs as dead.
2249  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2250
2251  return true;
2252}
2253
2254bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2255  return Len <= 16;
2256}
2257
2258bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) {
2259  // Make sure we don't bloat code by inlining very large memcpy's.
2260  if (!ARMIsMemCpySmall(Len))
2261    return false;
2262
2263  // We don't care about alignment here since we just emit integer accesses.
2264  while (Len) {
2265    MVT VT;
2266    if (Len >= 4)
2267      VT = MVT::i32;
2268    else if (Len >= 2)
2269      VT = MVT::i16;
2270    else {
2271      assert(Len == 1);
2272      VT = MVT::i8;
2273    }
2274
2275    bool RV;
2276    unsigned ResultReg;
2277    RV = ARMEmitLoad(VT, ResultReg, Src);
2278    assert (RV == true && "Should be able to handle this load.");
2279    RV = ARMEmitStore(VT, ResultReg, Dest);
2280    assert (RV == true && "Should be able to handle this store.");
2281
2282    unsigned Size = VT.getSizeInBits()/8;
2283    Len -= Size;
2284    Dest.Offset += Size;
2285    Src.Offset += Size;
2286  }
2287
2288  return true;
2289}
2290
2291bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2292  // FIXME: Handle more intrinsics.
2293  switch (I.getIntrinsicID()) {
2294  default: return false;
2295  case Intrinsic::memcpy:
2296  case Intrinsic::memmove: {
2297    const MemTransferInst &MTI = cast<MemTransferInst>(I);
2298    // Don't handle volatile.
2299    if (MTI.isVolatile())
2300      return false;
2301
2302    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2303    // we would emit dead code because we don't currently handle memmoves.
2304    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2305    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2306      // Small memcpy's are common enough that we want to do them without a call
2307      // if possible.
2308      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2309      if (ARMIsMemCpySmall(Len)) {
2310        Address Dest, Src;
2311        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2312            !ARMComputeAddress(MTI.getRawSource(), Src))
2313          return false;
2314        if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
2315          return true;
2316      }
2317    }
2318
2319    if (!MTI.getLength()->getType()->isIntegerTy(32))
2320      return false;
2321
2322    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2323      return false;
2324
2325    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2326    return SelectCall(&I, IntrMemName);
2327  }
2328  case Intrinsic::memset: {
2329    const MemSetInst &MSI = cast<MemSetInst>(I);
2330    // Don't handle volatile.
2331    if (MSI.isVolatile())
2332      return false;
2333
2334    if (!MSI.getLength()->getType()->isIntegerTy(32))
2335      return false;
2336
2337    if (MSI.getDestAddressSpace() > 255)
2338      return false;
2339
2340    return SelectCall(&I, "memset");
2341  }
2342  }
2343}
2344
2345bool ARMFastISel::SelectTrunc(const Instruction *I) {
2346  // The high bits for a type smaller than the register size are assumed to be
2347  // undefined.
2348  Value *Op = I->getOperand(0);
2349
2350  EVT SrcVT, DestVT;
2351  SrcVT = TLI.getValueType(Op->getType(), true);
2352  DestVT = TLI.getValueType(I->getType(), true);
2353
2354  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2355    return false;
2356  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2357    return false;
2358
2359  unsigned SrcReg = getRegForValue(Op);
2360  if (!SrcReg) return false;
2361
2362  // Because the high bits are undefined, a truncate doesn't generate
2363  // any code.
2364  UpdateValueMap(I, SrcReg);
2365  return true;
2366}
2367
2368unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
2369                                    bool isZExt) {
2370  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2371    return 0;
2372
2373  unsigned Opc;
2374  bool isBoolZext = false;
2375  if (!SrcVT.isSimple()) return 0;
2376  switch (SrcVT.getSimpleVT().SimpleTy) {
2377  default: return 0;
2378  case MVT::i16:
2379    if (!Subtarget->hasV6Ops()) return 0;
2380    if (isZExt)
2381      Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
2382    else
2383      Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
2384    break;
2385  case MVT::i8:
2386    if (!Subtarget->hasV6Ops()) return 0;
2387    if (isZExt)
2388      Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
2389    else
2390      Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
2391    break;
2392  case MVT::i1:
2393    if (isZExt) {
2394      Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
2395      isBoolZext = true;
2396      break;
2397    }
2398    return 0;
2399  }
2400
2401  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
2402  MachineInstrBuilder MIB;
2403  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
2404        .addReg(SrcReg);
2405  if (isBoolZext)
2406    MIB.addImm(1);
2407  else
2408    MIB.addImm(0);
2409  AddOptionalDefs(MIB);
2410  return ResultReg;
2411}
2412
2413bool ARMFastISel::SelectIntExt(const Instruction *I) {
2414  // On ARM, in general, integer casts don't involve legal types; this code
2415  // handles promotable integers.
2416  Type *DestTy = I->getType();
2417  Value *Src = I->getOperand(0);
2418  Type *SrcTy = Src->getType();
2419
2420  EVT SrcVT, DestVT;
2421  SrcVT = TLI.getValueType(SrcTy, true);
2422  DestVT = TLI.getValueType(DestTy, true);
2423
2424  bool isZExt = isa<ZExtInst>(I);
2425  unsigned SrcReg = getRegForValue(Src);
2426  if (!SrcReg) return false;
2427
2428  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2429  if (ResultReg == 0) return false;
2430  UpdateValueMap(I, ResultReg);
2431  return true;
2432}
2433
2434// TODO: SoftFP support.
2435bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
2436
2437  switch (I->getOpcode()) {
2438    case Instruction::Load:
2439      return SelectLoad(I);
2440    case Instruction::Store:
2441      return SelectStore(I);
2442    case Instruction::Br:
2443      return SelectBranch(I);
2444    case Instruction::ICmp:
2445    case Instruction::FCmp:
2446      return SelectCmp(I);
2447    case Instruction::FPExt:
2448      return SelectFPExt(I);
2449    case Instruction::FPTrunc:
2450      return SelectFPTrunc(I);
2451    case Instruction::SIToFP:
2452      return SelectIToFP(I, /*isZExt*/ false);
2453    case Instruction::UIToFP:
2454      return SelectIToFP(I, /*isZExt*/ true);
2455    case Instruction::FPToSI:
2456      return SelectFPToSI(I);
2457    case Instruction::FAdd:
2458      return SelectBinaryOp(I, ISD::FADD);
2459    case Instruction::FSub:
2460      return SelectBinaryOp(I, ISD::FSUB);
2461    case Instruction::FMul:
2462      return SelectBinaryOp(I, ISD::FMUL);
2463    case Instruction::SDiv:
2464      return SelectSDiv(I);
2465    case Instruction::SRem:
2466      return SelectSRem(I);
2467    case Instruction::Call:
2468      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2469        return SelectIntrinsicCall(*II);
2470      return SelectCall(I);
2471    case Instruction::Select:
2472      return SelectSelect(I);
2473    case Instruction::Ret:
2474      return SelectRet(I);
2475    case Instruction::Trunc:
2476      return SelectTrunc(I);
2477    case Instruction::ZExt:
2478    case Instruction::SExt:
2479      return SelectIntExt(I);
2480    default: break;
2481  }
2482  return false;
2483}
2484
2485/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
2486/// vreg is being provided by the specified load instruction.  If possible,
2487/// try to fold the load as an operand to the instruction, returning true if
2488/// successful.
2489bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
2490                                const LoadInst *LI) {
2491  // Verify we have a legal type before going any further.
2492  MVT VT;
2493  if (!isLoadTypeLegal(LI->getType(), VT))
2494    return false;
2495
2496  // Combine load followed by zero- or sign-extend.
2497  // ldrb r1, [r0]       ldrb r1, [r0]
2498  // uxtb r2, r1     =>
2499  // mov  r3, r2         mov  r3, r1
2500  bool isZExt = true;
2501  switch(MI->getOpcode()) {
2502    default: return false;
2503    case ARM::SXTH:
2504    case ARM::t2SXTH:
2505      isZExt = false;
2506    case ARM::UXTH:
2507    case ARM::t2UXTH:
2508      if (VT != MVT::i16)
2509        return false;
2510    break;
2511    case ARM::SXTB:
2512    case ARM::t2SXTB:
2513      isZExt = false;
2514    case ARM::UXTB:
2515    case ARM::t2UXTB:
2516      if (VT != MVT::i8)
2517        return false;
2518    break;
2519  }
2520  // See if we can handle this address.
2521  Address Addr;
2522  if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2523
2524  unsigned ResultReg = MI->getOperand(0).getReg();
2525  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2526    return false;
2527  MI->eraseFromParent();
2528  return true;
2529}
2530
2531namespace llvm {
2532  llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
2533    // Completely untested on non-iOS.
2534    const TargetMachine &TM = funcInfo.MF->getTarget();
2535
2536    // Darwin and thumb1 only for now.
2537    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
2538    if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() &&
2539        !DisableARMFastISel)
2540      return new ARMFastISel(funcInfo);
2541    return 0;
2542  }
2543}
2544