1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the ARM-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// ARMGenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "ARM.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMISelLowering.h"
21#include "ARMMachineFunctionInfo.h"
22#include "ARMSubtarget.h"
23#include "MCTargetDesc/ARMAddressingModes.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/CodeGen/Analysis.h"
26#include "llvm/CodeGen/FastISel.h"
27#include "llvm/CodeGen/FunctionLoweringInfo.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineMemOperand.h"
32#include "llvm/CodeGen/MachineModuleInfo.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/CallSite.h"
35#include "llvm/IR/CallingConv.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GetElementPtrTypeIterator.h"
39#include "llvm/IR/GlobalVariable.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/Module.h"
43#include "llvm/IR/Operator.h"
44#include "llvm/Support/CommandLine.h"
45#include "llvm/Support/ErrorHandling.h"
46#include "llvm/Target/TargetInstrInfo.h"
47#include "llvm/Target/TargetLowering.h"
48#include "llvm/Target/TargetMachine.h"
49#include "llvm/Target/TargetOptions.h"
50using namespace llvm;
51
52extern cl::opt<bool> EnableARMLongCalls;
53
54namespace {
55
56  // All possible address modes, plus some.
57  typedef struct Address {
58    enum {
59      RegBase,
60      FrameIndexBase
61    } BaseType;
62
63    union {
64      unsigned Reg;
65      int FI;
66    } Base;
67
68    int Offset;
69
70    // Innocuous defaults for our address.
71    Address()
72     : BaseType(RegBase), Offset(0) {
73       Base.Reg = 0;
74     }
75  } Address;
76
77class ARMFastISel final : public FastISel {
78
79  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
80  /// make the right decision when generating code for different targets.
81  const ARMSubtarget *Subtarget;
82  Module &M;
83  const TargetMachine &TM;
84  const TargetInstrInfo &TII;
85  const TargetLowering &TLI;
86  ARMFunctionInfo *AFI;
87
88  // Convenience variables to avoid some queries.
89  bool isThumb2;
90  LLVMContext *Context;
91
92  public:
93    explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
94                         const TargetLibraryInfo *libInfo)
95    : FastISel(funcInfo, libInfo),
96      M(const_cast<Module&>(*funcInfo.Fn->getParent())),
97      TM(funcInfo.MF->getTarget()),
98      TII(*TM.getInstrInfo()),
99      TLI(*TM.getTargetLowering()) {
100      Subtarget = &TM.getSubtarget<ARMSubtarget>();
101      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
102      isThumb2 = AFI->isThumbFunction();
103      Context = &funcInfo.Fn->getContext();
104    }
105
106    // Code from FastISel.cpp.
107  private:
108    unsigned FastEmitInst_r(unsigned MachineInstOpcode,
109                            const TargetRegisterClass *RC,
110                            unsigned Op0, bool Op0IsKill);
111    unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
112                             const TargetRegisterClass *RC,
113                             unsigned Op0, bool Op0IsKill,
114                             unsigned Op1, bool Op1IsKill);
115    unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
116                              const TargetRegisterClass *RC,
117                              unsigned Op0, bool Op0IsKill,
118                              unsigned Op1, bool Op1IsKill,
119                              unsigned Op2, bool Op2IsKill);
120    unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
121                             const TargetRegisterClass *RC,
122                             unsigned Op0, bool Op0IsKill,
123                             uint64_t Imm);
124    unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
125                              const TargetRegisterClass *RC,
126                              unsigned Op0, bool Op0IsKill,
127                              unsigned Op1, bool Op1IsKill,
128                              uint64_t Imm);
129    unsigned FastEmitInst_i(unsigned MachineInstOpcode,
130                            const TargetRegisterClass *RC,
131                            uint64_t Imm);
132
133    // Backend specific FastISel code.
134  private:
135    bool TargetSelectInstruction(const Instruction *I) override;
136    unsigned TargetMaterializeConstant(const Constant *C) override;
137    unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
138    bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
139                             const LoadInst *LI) override;
140    bool FastLowerArguments() override;
141  private:
142  #include "ARMGenFastISel.inc"
143
144    // Instruction selection routines.
145  private:
146    bool SelectLoad(const Instruction *I);
147    bool SelectStore(const Instruction *I);
148    bool SelectBranch(const Instruction *I);
149    bool SelectIndirectBr(const Instruction *I);
150    bool SelectCmp(const Instruction *I);
151    bool SelectFPExt(const Instruction *I);
152    bool SelectFPTrunc(const Instruction *I);
153    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
154    bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
155    bool SelectIToFP(const Instruction *I, bool isSigned);
156    bool SelectFPToI(const Instruction *I, bool isSigned);
157    bool SelectDiv(const Instruction *I, bool isSigned);
158    bool SelectRem(const Instruction *I, bool isSigned);
159    bool SelectCall(const Instruction *I, const char *IntrMemName);
160    bool SelectIntrinsicCall(const IntrinsicInst &I);
161    bool SelectSelect(const Instruction *I);
162    bool SelectRet(const Instruction *I);
163    bool SelectTrunc(const Instruction *I);
164    bool SelectIntExt(const Instruction *I);
165    bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
166
167    // Utility routines.
168  private:
169    bool isTypeLegal(Type *Ty, MVT &VT);
170    bool isLoadTypeLegal(Type *Ty, MVT &VT);
171    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
172                    bool isZExt);
173    bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
174                     unsigned Alignment = 0, bool isZExt = true,
175                     bool allocReg = true);
176    bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
177                      unsigned Alignment = 0);
178    bool ARMComputeAddress(const Value *Obj, Address &Addr);
179    void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
180    bool ARMIsMemCpySmall(uint64_t Len);
181    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
182                               unsigned Alignment);
183    unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
184    unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
185    unsigned ARMMaterializeInt(const Constant *C, MVT VT);
186    unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
187    unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
188    unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
189    unsigned ARMSelectCallOp(bool UseReg);
190    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
191
192    const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
193
194    // Call handling routines.
195  private:
196    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
197                                  bool Return,
198                                  bool isVarArg);
199    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
200                         SmallVectorImpl<unsigned> &ArgRegs,
201                         SmallVectorImpl<MVT> &ArgVTs,
202                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
203                         SmallVectorImpl<unsigned> &RegArgs,
204                         CallingConv::ID CC,
205                         unsigned &NumBytes,
206                         bool isVarArg);
207    unsigned getLibcallReg(const Twine &Name);
208    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
209                    const Instruction *I, CallingConv::ID CC,
210                    unsigned &NumBytes, bool isVarArg);
211    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
212
213    // OptionalDef handling routines.
214  private:
215    bool isARMNEONPred(const MachineInstr *MI);
216    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
217    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
218    void AddLoadStoreOperands(MVT VT, Address &Addr,
219                              const MachineInstrBuilder &MIB,
220                              unsigned Flags, bool useAM3);
221};
222
223} // end anonymous namespace
224
225#include "ARMGenCallingConv.inc"
226
227// DefinesOptionalPredicate - This is different from DefinesPredicate in that
228// we don't care about implicit defs here, just places we'll need to add a
229// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
230bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
231  if (!MI->hasOptionalDef())
232    return false;
233
234  // Look to see if our OptionalDef is defining CPSR or CCR.
235  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
236    const MachineOperand &MO = MI->getOperand(i);
237    if (!MO.isReg() || !MO.isDef()) continue;
238    if (MO.getReg() == ARM::CPSR)
239      *CPSR = true;
240  }
241  return true;
242}
243
244bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
245  const MCInstrDesc &MCID = MI->getDesc();
246
247  // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
248  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
249       AFI->isThumb2Function())
250    return MI->isPredicable();
251
252  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
253    if (MCID.OpInfo[i].isPredicate())
254      return true;
255
256  return false;
257}
258
259// If the machine is predicable go ahead and add the predicate operands, if
260// it needs default CC operands add those.
261// TODO: If we want to support thumb1 then we'll need to deal with optional
262// CPSR defs that need to be added before the remaining operands. See s_cc_out
263// for descriptions why.
264const MachineInstrBuilder &
265ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
266  MachineInstr *MI = &*MIB;
267
268  // Do we use a predicate? or...
269  // Are we NEON in ARM mode and have a predicate operand? If so, I know
270  // we're not predicable but add it anyways.
271  if (isARMNEONPred(MI))
272    AddDefaultPred(MIB);
273
274  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
275  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
276  bool CPSR = false;
277  if (DefinesOptionalPredicate(MI, &CPSR)) {
278    if (CPSR)
279      AddDefaultT1CC(MIB);
280    else
281      AddDefaultCC(MIB);
282  }
283  return MIB;
284}
285
286unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
287                                     const TargetRegisterClass *RC,
288                                     unsigned Op0, bool Op0IsKill) {
289  unsigned ResultReg = createResultReg(RC);
290  const MCInstrDesc &II = TII.get(MachineInstOpcode);
291
292  // Make sure the input operand is sufficiently constrained to be legal
293  // for this instruction.
294  Op0 = constrainOperandRegClass(II, Op0, 1);
295  if (II.getNumDefs() >= 1) {
296    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
297                            ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
298  } else {
299    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
300                   .addReg(Op0, Op0IsKill * RegState::Kill));
301    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
302                   TII.get(TargetOpcode::COPY), ResultReg)
303                   .addReg(II.ImplicitDefs[0]));
304  }
305  return ResultReg;
306}
307
308unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
309                                      const TargetRegisterClass *RC,
310                                      unsigned Op0, bool Op0IsKill,
311                                      unsigned Op1, bool Op1IsKill) {
312  unsigned ResultReg = createResultReg(RC);
313  const MCInstrDesc &II = TII.get(MachineInstOpcode);
314
315  // Make sure the input operands are sufficiently constrained to be legal
316  // for this instruction.
317  Op0 = constrainOperandRegClass(II, Op0, 1);
318  Op1 = constrainOperandRegClass(II, Op1, 2);
319
320  if (II.getNumDefs() >= 1) {
321    AddOptionalDefs(
322        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
323            .addReg(Op0, Op0IsKill * RegState::Kill)
324            .addReg(Op1, Op1IsKill * RegState::Kill));
325  } else {
326    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
327                   .addReg(Op0, Op0IsKill * RegState::Kill)
328                   .addReg(Op1, Op1IsKill * RegState::Kill));
329    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
330                           TII.get(TargetOpcode::COPY), ResultReg)
331                   .addReg(II.ImplicitDefs[0]));
332  }
333  return ResultReg;
334}
335
336unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
337                                       const TargetRegisterClass *RC,
338                                       unsigned Op0, bool Op0IsKill,
339                                       unsigned Op1, bool Op1IsKill,
340                                       unsigned Op2, bool Op2IsKill) {
341  unsigned ResultReg = createResultReg(RC);
342  const MCInstrDesc &II = TII.get(MachineInstOpcode);
343
344  // Make sure the input operands are sufficiently constrained to be legal
345  // for this instruction.
346  Op0 = constrainOperandRegClass(II, Op0, 1);
347  Op1 = constrainOperandRegClass(II, Op1, 2);
348  Op2 = constrainOperandRegClass(II, Op1, 3);
349
350  if (II.getNumDefs() >= 1) {
351    AddOptionalDefs(
352        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
353            .addReg(Op0, Op0IsKill * RegState::Kill)
354            .addReg(Op1, Op1IsKill * RegState::Kill)
355            .addReg(Op2, Op2IsKill * RegState::Kill));
356  } else {
357    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
358                   .addReg(Op0, Op0IsKill * RegState::Kill)
359                   .addReg(Op1, Op1IsKill * RegState::Kill)
360                   .addReg(Op2, Op2IsKill * RegState::Kill));
361    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
362                           TII.get(TargetOpcode::COPY), ResultReg)
363                   .addReg(II.ImplicitDefs[0]));
364  }
365  return ResultReg;
366}
367
368unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
369                                      const TargetRegisterClass *RC,
370                                      unsigned Op0, bool Op0IsKill,
371                                      uint64_t Imm) {
372  unsigned ResultReg = createResultReg(RC);
373  const MCInstrDesc &II = TII.get(MachineInstOpcode);
374
375  // Make sure the input operand is sufficiently constrained to be legal
376  // for this instruction.
377  Op0 = constrainOperandRegClass(II, Op0, 1);
378  if (II.getNumDefs() >= 1) {
379    AddOptionalDefs(
380        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
381            .addReg(Op0, Op0IsKill * RegState::Kill)
382            .addImm(Imm));
383  } else {
384    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
385                   .addReg(Op0, Op0IsKill * RegState::Kill)
386                   .addImm(Imm));
387    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
388                           TII.get(TargetOpcode::COPY), ResultReg)
389                   .addReg(II.ImplicitDefs[0]));
390  }
391  return ResultReg;
392}
393
394unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
395                                       const TargetRegisterClass *RC,
396                                       unsigned Op0, bool Op0IsKill,
397                                       unsigned Op1, bool Op1IsKill,
398                                       uint64_t Imm) {
399  unsigned ResultReg = createResultReg(RC);
400  const MCInstrDesc &II = TII.get(MachineInstOpcode);
401
402  // Make sure the input operands are sufficiently constrained to be legal
403  // for this instruction.
404  Op0 = constrainOperandRegClass(II, Op0, 1);
405  Op1 = constrainOperandRegClass(II, Op1, 2);
406  if (II.getNumDefs() >= 1) {
407    AddOptionalDefs(
408        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
409            .addReg(Op0, Op0IsKill * RegState::Kill)
410            .addReg(Op1, Op1IsKill * RegState::Kill)
411            .addImm(Imm));
412  } else {
413    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
414                   .addReg(Op0, Op0IsKill * RegState::Kill)
415                   .addReg(Op1, Op1IsKill * RegState::Kill)
416                   .addImm(Imm));
417    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
418                           TII.get(TargetOpcode::COPY), ResultReg)
419                   .addReg(II.ImplicitDefs[0]));
420  }
421  return ResultReg;
422}
423
424unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
425                                     const TargetRegisterClass *RC,
426                                     uint64_t Imm) {
427  unsigned ResultReg = createResultReg(RC);
428  const MCInstrDesc &II = TII.get(MachineInstOpcode);
429
430  if (II.getNumDefs() >= 1) {
431    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
432                            ResultReg).addImm(Imm));
433  } else {
434    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
435                   .addImm(Imm));
436    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
437                           TII.get(TargetOpcode::COPY), ResultReg)
438                   .addReg(II.ImplicitDefs[0]));
439  }
440  return ResultReg;
441}
442
443// TODO: Don't worry about 64-bit now, but when this is fixed remove the
444// checks from the various callers.
445unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
446  if (VT == MVT::f64) return 0;
447
448  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
449  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
450                          TII.get(ARM::VMOVSR), MoveReg)
451                  .addReg(SrcReg));
452  return MoveReg;
453}
454
455unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
456  if (VT == MVT::i64) return 0;
457
458  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
459  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
460                          TII.get(ARM::VMOVRS), MoveReg)
461                  .addReg(SrcReg));
462  return MoveReg;
463}
464
465// For double width floating point we need to materialize two constants
466// (the high and the low) into integer registers then use a move to get
467// the combined constant into an FP reg.
468unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
469  const APFloat Val = CFP->getValueAPF();
470  bool is64bit = VT == MVT::f64;
471
472  // This checks to see if we can use VFP3 instructions to materialize
473  // a constant, otherwise we have to go through the constant pool.
474  if (TLI.isFPImmLegal(Val, VT)) {
475    int Imm;
476    unsigned Opc;
477    if (is64bit) {
478      Imm = ARM_AM::getFP64Imm(Val);
479      Opc = ARM::FCONSTD;
480    } else {
481      Imm = ARM_AM::getFP32Imm(Val);
482      Opc = ARM::FCONSTS;
483    }
484    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
485    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
486                            TII.get(Opc), DestReg).addImm(Imm));
487    return DestReg;
488  }
489
490  // Require VFP2 for loading fp constants.
491  if (!Subtarget->hasVFP2()) return false;
492
493  // MachineConstantPool wants an explicit alignment.
494  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
495  if (Align == 0) {
496    // TODO: Figure out if this is correct.
497    Align = DL.getTypeAllocSize(CFP->getType());
498  }
499  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
500  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
501  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
502
503  // The extra reg is for addrmode5.
504  AddOptionalDefs(
505      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
506          .addConstantPoolIndex(Idx)
507          .addReg(0));
508  return DestReg;
509}
510
511unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
512
513  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
514    return false;
515
516  // If we can do this in a single instruction without a constant pool entry
517  // do so now.
518  const ConstantInt *CI = cast<ConstantInt>(C);
519  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
520    unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
521    const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
522      &ARM::GPRRegClass;
523    unsigned ImmReg = createResultReg(RC);
524    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
525                            TII.get(Opc), ImmReg)
526                    .addImm(CI->getZExtValue()));
527    return ImmReg;
528  }
529
530  // Use MVN to emit negative constants.
531  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
532    unsigned Imm = (unsigned)~(CI->getSExtValue());
533    bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
534      (ARM_AM::getSOImmVal(Imm) != -1);
535    if (UseImm) {
536      unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
537      unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
538      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
539                              TII.get(Opc), ImmReg)
540                      .addImm(Imm));
541      return ImmReg;
542    }
543  }
544
545  // Load from constant pool.  For now 32-bit only.
546  if (VT != MVT::i32)
547    return false;
548
549  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
550
551  // MachineConstantPool wants an explicit alignment.
552  unsigned Align = DL.getPrefTypeAlignment(C->getType());
553  if (Align == 0) {
554    // TODO: Figure out if this is correct.
555    Align = DL.getTypeAllocSize(C->getType());
556  }
557  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
558
559  if (isThumb2)
560    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
561                            TII.get(ARM::t2LDRpci), DestReg)
562                    .addConstantPoolIndex(Idx));
563  else {
564    // The extra immediate is for addrmode2.
565    DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
566    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
567                            TII.get(ARM::LDRcp), DestReg)
568                    .addConstantPoolIndex(Idx)
569                    .addImm(0));
570  }
571
572  return DestReg;
573}
574
575unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
576  // For now 32-bit only.
577  if (VT != MVT::i32) return 0;
578
579  Reloc::Model RelocM = TM.getRelocationModel();
580  bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
581  const TargetRegisterClass *RC = isThumb2 ?
582    (const TargetRegisterClass*)&ARM::rGPRRegClass :
583    (const TargetRegisterClass*)&ARM::GPRRegClass;
584  unsigned DestReg = createResultReg(RC);
585
586  // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
587  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
588  bool IsThreadLocal = GVar && GVar->isThreadLocal();
589  if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
590
591  // Use movw+movt when possible, it avoids constant pool entries.
592  // Non-darwin targets only support static movt relocations in FastISel.
593  if (Subtarget->useMovt(*FuncInfo.MF) &&
594      (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
595    unsigned Opc;
596    unsigned char TF = 0;
597    if (Subtarget->isTargetMachO())
598      TF = ARMII::MO_NONLAZY;
599
600    switch (RelocM) {
601    case Reloc::PIC_:
602      Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
603      break;
604    default:
605      Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
606      break;
607    }
608    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
609                            TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
610  } else {
611    // MachineConstantPool wants an explicit alignment.
612    unsigned Align = DL.getPrefTypeAlignment(GV->getType());
613    if (Align == 0) {
614      // TODO: Figure out if this is correct.
615      Align = DL.getTypeAllocSize(GV->getType());
616    }
617
618    if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
619      return ARMLowerPICELF(GV, Align, VT);
620
621    // Grab index.
622    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
623      (Subtarget->isThumb() ? 4 : 8);
624    unsigned Id = AFI->createPICLabelUId();
625    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
626                                                                ARMCP::CPValue,
627                                                                PCAdj);
628    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
629
630    // Load value.
631    MachineInstrBuilder MIB;
632    if (isThumb2) {
633      unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
634      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
635                    DestReg).addConstantPoolIndex(Idx);
636      if (RelocM == Reloc::PIC_)
637        MIB.addImm(Id);
638      AddOptionalDefs(MIB);
639    } else {
640      // The extra immediate is for addrmode2.
641      DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
642      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
643                    TII.get(ARM::LDRcp), DestReg)
644                .addConstantPoolIndex(Idx)
645                .addImm(0);
646      AddOptionalDefs(MIB);
647
648      if (RelocM == Reloc::PIC_) {
649        unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
650        unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
651
652        MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
653                                          DbgLoc, TII.get(Opc), NewDestReg)
654                                  .addReg(DestReg)
655                                  .addImm(Id);
656        AddOptionalDefs(MIB);
657        return NewDestReg;
658      }
659    }
660  }
661
662  if (IsIndirect) {
663    MachineInstrBuilder MIB;
664    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
665    if (isThumb2)
666      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
667                    TII.get(ARM::t2LDRi12), NewDestReg)
668            .addReg(DestReg)
669            .addImm(0);
670    else
671      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
672                    TII.get(ARM::LDRi12), NewDestReg)
673                .addReg(DestReg)
674                .addImm(0);
675    DestReg = NewDestReg;
676    AddOptionalDefs(MIB);
677  }
678
679  return DestReg;
680}
681
682unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
683  EVT CEVT = TLI.getValueType(C->getType(), true);
684
685  // Only handle simple types.
686  if (!CEVT.isSimple()) return 0;
687  MVT VT = CEVT.getSimpleVT();
688
689  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
690    return ARMMaterializeFP(CFP, VT);
691  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
692    return ARMMaterializeGV(GV, VT);
693  else if (isa<ConstantInt>(C))
694    return ARMMaterializeInt(C, VT);
695
696  return 0;
697}
698
699// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
700
701unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
702  // Don't handle dynamic allocas.
703  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
704
705  MVT VT;
706  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
707
708  DenseMap<const AllocaInst*, int>::iterator SI =
709    FuncInfo.StaticAllocaMap.find(AI);
710
711  // This will get lowered later into the correct offsets and registers
712  // via rewriteXFrameIndex.
713  if (SI != FuncInfo.StaticAllocaMap.end()) {
714    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
715    const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
716    unsigned ResultReg = createResultReg(RC);
717    ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
718
719    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
720                            TII.get(Opc), ResultReg)
721                            .addFrameIndex(SI->second)
722                            .addImm(0));
723    return ResultReg;
724  }
725
726  return 0;
727}
728
729bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
730  EVT evt = TLI.getValueType(Ty, true);
731
732  // Only handle simple types.
733  if (evt == MVT::Other || !evt.isSimple()) return false;
734  VT = evt.getSimpleVT();
735
736  // Handle all legal types, i.e. a register that will directly hold this
737  // value.
738  return TLI.isTypeLegal(VT);
739}
740
741bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
742  if (isTypeLegal(Ty, VT)) return true;
743
744  // If this is a type than can be sign or zero-extended to a basic operation
745  // go ahead and accept it now.
746  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
747    return true;
748
749  return false;
750}
751
752// Computes the address to get to an object.
753bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
754  // Some boilerplate from the X86 FastISel.
755  const User *U = nullptr;
756  unsigned Opcode = Instruction::UserOp1;
757  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
758    // Don't walk into other basic blocks unless the object is an alloca from
759    // another block, otherwise it may not have a virtual register assigned.
760    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
761        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
762      Opcode = I->getOpcode();
763      U = I;
764    }
765  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
766    Opcode = C->getOpcode();
767    U = C;
768  }
769
770  if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
771    if (Ty->getAddressSpace() > 255)
772      // Fast instruction selection doesn't support the special
773      // address spaces.
774      return false;
775
776  switch (Opcode) {
777    default:
778    break;
779    case Instruction::BitCast:
780      // Look through bitcasts.
781      return ARMComputeAddress(U->getOperand(0), Addr);
782    case Instruction::IntToPtr:
783      // Look past no-op inttoptrs.
784      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
785        return ARMComputeAddress(U->getOperand(0), Addr);
786      break;
787    case Instruction::PtrToInt:
788      // Look past no-op ptrtoints.
789      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
790        return ARMComputeAddress(U->getOperand(0), Addr);
791      break;
792    case Instruction::GetElementPtr: {
793      Address SavedAddr = Addr;
794      int TmpOffset = Addr.Offset;
795
796      // Iterate through the GEP folding the constants into offsets where
797      // we can.
798      gep_type_iterator GTI = gep_type_begin(U);
799      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
800           i != e; ++i, ++GTI) {
801        const Value *Op = *i;
802        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
803          const StructLayout *SL = DL.getStructLayout(STy);
804          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
805          TmpOffset += SL->getElementOffset(Idx);
806        } else {
807          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
808          for (;;) {
809            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
810              // Constant-offset addressing.
811              TmpOffset += CI->getSExtValue() * S;
812              break;
813            }
814            if (canFoldAddIntoGEP(U, Op)) {
815              // A compatible add with a constant operand. Fold the constant.
816              ConstantInt *CI =
817              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
818              TmpOffset += CI->getSExtValue() * S;
819              // Iterate on the other operand.
820              Op = cast<AddOperator>(Op)->getOperand(0);
821              continue;
822            }
823            // Unsupported
824            goto unsupported_gep;
825          }
826        }
827      }
828
829      // Try to grab the base operand now.
830      Addr.Offset = TmpOffset;
831      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
832
833      // We failed, restore everything and try the other options.
834      Addr = SavedAddr;
835
836      unsupported_gep:
837      break;
838    }
839    case Instruction::Alloca: {
840      const AllocaInst *AI = cast<AllocaInst>(Obj);
841      DenseMap<const AllocaInst*, int>::iterator SI =
842        FuncInfo.StaticAllocaMap.find(AI);
843      if (SI != FuncInfo.StaticAllocaMap.end()) {
844        Addr.BaseType = Address::FrameIndexBase;
845        Addr.Base.FI = SI->second;
846        return true;
847      }
848      break;
849    }
850  }
851
852  // Try to get this in a register if nothing else has worked.
853  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
854  return Addr.Base.Reg != 0;
855}
856
857void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
858  bool needsLowering = false;
859  switch (VT.SimpleTy) {
860    default: llvm_unreachable("Unhandled load/store type!");
861    case MVT::i1:
862    case MVT::i8:
863    case MVT::i16:
864    case MVT::i32:
865      if (!useAM3) {
866        // Integer loads/stores handle 12-bit offsets.
867        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
868        // Handle negative offsets.
869        if (needsLowering && isThumb2)
870          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
871                            Addr.Offset > -256);
872      } else {
873        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
874        needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
875      }
876      break;
877    case MVT::f32:
878    case MVT::f64:
879      // Floating point operands handle 8-bit offsets.
880      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
881      break;
882  }
883
884  // If this is a stack pointer and the offset needs to be simplified then
885  // put the alloca address into a register, set the base type back to
886  // register and continue. This should almost never happen.
887  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
888    const TargetRegisterClass *RC = isThumb2 ?
889      (const TargetRegisterClass*)&ARM::tGPRRegClass :
890      (const TargetRegisterClass*)&ARM::GPRRegClass;
891    unsigned ResultReg = createResultReg(RC);
892    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
893    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
894                            TII.get(Opc), ResultReg)
895                            .addFrameIndex(Addr.Base.FI)
896                            .addImm(0));
897    Addr.Base.Reg = ResultReg;
898    Addr.BaseType = Address::RegBase;
899  }
900
901  // Since the offset is too large for the load/store instruction
902  // get the reg+offset into a register.
903  if (needsLowering) {
904    Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
905                                 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
906    Addr.Offset = 0;
907  }
908}
909
910void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
911                                       const MachineInstrBuilder &MIB,
912                                       unsigned Flags, bool useAM3) {
913  // addrmode5 output depends on the selection dag addressing dividing the
914  // offset by 4 that it then later multiplies. Do this here as well.
915  if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
916    Addr.Offset /= 4;
917
918  // Frame base works a bit differently. Handle it separately.
919  if (Addr.BaseType == Address::FrameIndexBase) {
920    int FI = Addr.Base.FI;
921    int Offset = Addr.Offset;
922    MachineMemOperand *MMO =
923          FuncInfo.MF->getMachineMemOperand(
924                                  MachinePointerInfo::getFixedStack(FI, Offset),
925                                  Flags,
926                                  MFI.getObjectSize(FI),
927                                  MFI.getObjectAlignment(FI));
928    // Now add the rest of the operands.
929    MIB.addFrameIndex(FI);
930
931    // ARM halfword load/stores and signed byte loads need an additional
932    // operand.
933    if (useAM3) {
934      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
935      MIB.addReg(0);
936      MIB.addImm(Imm);
937    } else {
938      MIB.addImm(Addr.Offset);
939    }
940    MIB.addMemOperand(MMO);
941  } else {
942    // Now add the rest of the operands.
943    MIB.addReg(Addr.Base.Reg);
944
945    // ARM halfword load/stores and signed byte loads need an additional
946    // operand.
947    if (useAM3) {
948      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
949      MIB.addReg(0);
950      MIB.addImm(Imm);
951    } else {
952      MIB.addImm(Addr.Offset);
953    }
954  }
955  AddOptionalDefs(MIB);
956}
957
958bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
959                              unsigned Alignment, bool isZExt, bool allocReg) {
960  unsigned Opc;
961  bool useAM3 = false;
962  bool needVMOV = false;
963  const TargetRegisterClass *RC;
964  switch (VT.SimpleTy) {
965    // This is mostly going to be Neon/vector support.
966    default: return false;
967    case MVT::i1:
968    case MVT::i8:
969      if (isThumb2) {
970        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
971          Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
972        else
973          Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
974      } else {
975        if (isZExt) {
976          Opc = ARM::LDRBi12;
977        } else {
978          Opc = ARM::LDRSB;
979          useAM3 = true;
980        }
981      }
982      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
983      break;
984    case MVT::i16:
985      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
986        return false;
987
988      if (isThumb2) {
989        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
990          Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
991        else
992          Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
993      } else {
994        Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
995        useAM3 = true;
996      }
997      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
998      break;
999    case MVT::i32:
1000      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1001        return false;
1002
1003      if (isThumb2) {
1004        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1005          Opc = ARM::t2LDRi8;
1006        else
1007          Opc = ARM::t2LDRi12;
1008      } else {
1009        Opc = ARM::LDRi12;
1010      }
1011      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1012      break;
1013    case MVT::f32:
1014      if (!Subtarget->hasVFP2()) return false;
1015      // Unaligned loads need special handling. Floats require word-alignment.
1016      if (Alignment && Alignment < 4) {
1017        needVMOV = true;
1018        VT = MVT::i32;
1019        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1020        RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1021      } else {
1022        Opc = ARM::VLDRS;
1023        RC = TLI.getRegClassFor(VT);
1024      }
1025      break;
1026    case MVT::f64:
1027      if (!Subtarget->hasVFP2()) return false;
1028      // FIXME: Unaligned loads need special handling.  Doublewords require
1029      // word-alignment.
1030      if (Alignment && Alignment < 4)
1031        return false;
1032
1033      Opc = ARM::VLDRD;
1034      RC = TLI.getRegClassFor(VT);
1035      break;
1036  }
1037  // Simplify this down to something we can handle.
1038  ARMSimplifyAddress(Addr, VT, useAM3);
1039
1040  // Create the base instruction, then add the operands.
1041  if (allocReg)
1042    ResultReg = createResultReg(RC);
1043  assert (ResultReg > 255 && "Expected an allocated virtual register.");
1044  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1045                                    TII.get(Opc), ResultReg);
1046  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1047
1048  // If we had an unaligned load of a float we've converted it to an regular
1049  // load.  Now we must move from the GRP to the FP register.
1050  if (needVMOV) {
1051    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1052    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1053                            TII.get(ARM::VMOVSR), MoveReg)
1054                    .addReg(ResultReg));
1055    ResultReg = MoveReg;
1056  }
1057  return true;
1058}
1059
1060bool ARMFastISel::SelectLoad(const Instruction *I) {
1061  // Atomic loads need special handling.
1062  if (cast<LoadInst>(I)->isAtomic())
1063    return false;
1064
1065  // Verify we have a legal type before going any further.
1066  MVT VT;
1067  if (!isLoadTypeLegal(I->getType(), VT))
1068    return false;
1069
1070  // See if we can handle this address.
1071  Address Addr;
1072  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1073
1074  unsigned ResultReg;
1075  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1076    return false;
1077  UpdateValueMap(I, ResultReg);
1078  return true;
1079}
1080
1081bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
1082                               unsigned Alignment) {
1083  unsigned StrOpc;
1084  bool useAM3 = false;
1085  switch (VT.SimpleTy) {
1086    // This is mostly going to be Neon/vector support.
1087    default: return false;
1088    case MVT::i1: {
1089      unsigned Res = createResultReg(isThumb2 ?
1090        (const TargetRegisterClass*)&ARM::tGPRRegClass :
1091        (const TargetRegisterClass*)&ARM::GPRRegClass);
1092      unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1093      SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
1094      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1095                              TII.get(Opc), Res)
1096                      .addReg(SrcReg).addImm(1));
1097      SrcReg = Res;
1098    } // Fallthrough here.
1099    case MVT::i8:
1100      if (isThumb2) {
1101        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1102          StrOpc = ARM::t2STRBi8;
1103        else
1104          StrOpc = ARM::t2STRBi12;
1105      } else {
1106        StrOpc = ARM::STRBi12;
1107      }
1108      break;
1109    case MVT::i16:
1110      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
1111        return false;
1112
1113      if (isThumb2) {
1114        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1115          StrOpc = ARM::t2STRHi8;
1116        else
1117          StrOpc = ARM::t2STRHi12;
1118      } else {
1119        StrOpc = ARM::STRH;
1120        useAM3 = true;
1121      }
1122      break;
1123    case MVT::i32:
1124      if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
1125        return false;
1126
1127      if (isThumb2) {
1128        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1129          StrOpc = ARM::t2STRi8;
1130        else
1131          StrOpc = ARM::t2STRi12;
1132      } else {
1133        StrOpc = ARM::STRi12;
1134      }
1135      break;
1136    case MVT::f32:
1137      if (!Subtarget->hasVFP2()) return false;
1138      // Unaligned stores need special handling. Floats require word-alignment.
1139      if (Alignment && Alignment < 4) {
1140        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1141        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1142                                TII.get(ARM::VMOVRS), MoveReg)
1143                        .addReg(SrcReg));
1144        SrcReg = MoveReg;
1145        VT = MVT::i32;
1146        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1147      } else {
1148        StrOpc = ARM::VSTRS;
1149      }
1150      break;
1151    case MVT::f64:
1152      if (!Subtarget->hasVFP2()) return false;
1153      // FIXME: Unaligned stores need special handling.  Doublewords require
1154      // word-alignment.
1155      if (Alignment && Alignment < 4)
1156          return false;
1157
1158      StrOpc = ARM::VSTRD;
1159      break;
1160  }
1161  // Simplify this down to something we can handle.
1162  ARMSimplifyAddress(Addr, VT, useAM3);
1163
1164  // Create the base instruction, then add the operands.
1165  SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
1166  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1167                                    TII.get(StrOpc))
1168                            .addReg(SrcReg);
1169  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1170  return true;
1171}
1172
1173bool ARMFastISel::SelectStore(const Instruction *I) {
1174  Value *Op0 = I->getOperand(0);
1175  unsigned SrcReg = 0;
1176
1177  // Atomic stores need special handling.
1178  if (cast<StoreInst>(I)->isAtomic())
1179    return false;
1180
1181  // Verify we have a legal type before going any further.
1182  MVT VT;
1183  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1184    return false;
1185
1186  // Get the value to be stored into a register.
1187  SrcReg = getRegForValue(Op0);
1188  if (SrcReg == 0) return false;
1189
1190  // See if we can handle this address.
1191  Address Addr;
1192  if (!ARMComputeAddress(I->getOperand(1), Addr))
1193    return false;
1194
1195  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1196    return false;
1197  return true;
1198}
1199
1200static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1201  switch (Pred) {
1202    // Needs two compares...
1203    case CmpInst::FCMP_ONE:
1204    case CmpInst::FCMP_UEQ:
1205    default:
1206      // AL is our "false" for now. The other two need more compares.
1207      return ARMCC::AL;
1208    case CmpInst::ICMP_EQ:
1209    case CmpInst::FCMP_OEQ:
1210      return ARMCC::EQ;
1211    case CmpInst::ICMP_SGT:
1212    case CmpInst::FCMP_OGT:
1213      return ARMCC::GT;
1214    case CmpInst::ICMP_SGE:
1215    case CmpInst::FCMP_OGE:
1216      return ARMCC::GE;
1217    case CmpInst::ICMP_UGT:
1218    case CmpInst::FCMP_UGT:
1219      return ARMCC::HI;
1220    case CmpInst::FCMP_OLT:
1221      return ARMCC::MI;
1222    case CmpInst::ICMP_ULE:
1223    case CmpInst::FCMP_OLE:
1224      return ARMCC::LS;
1225    case CmpInst::FCMP_ORD:
1226      return ARMCC::VC;
1227    case CmpInst::FCMP_UNO:
1228      return ARMCC::VS;
1229    case CmpInst::FCMP_UGE:
1230      return ARMCC::PL;
1231    case CmpInst::ICMP_SLT:
1232    case CmpInst::FCMP_ULT:
1233      return ARMCC::LT;
1234    case CmpInst::ICMP_SLE:
1235    case CmpInst::FCMP_ULE:
1236      return ARMCC::LE;
1237    case CmpInst::FCMP_UNE:
1238    case CmpInst::ICMP_NE:
1239      return ARMCC::NE;
1240    case CmpInst::ICMP_UGE:
1241      return ARMCC::HS;
1242    case CmpInst::ICMP_ULT:
1243      return ARMCC::LO;
1244  }
1245}
1246
1247bool ARMFastISel::SelectBranch(const Instruction *I) {
1248  const BranchInst *BI = cast<BranchInst>(I);
1249  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1250  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1251
1252  // Simple branch support.
1253
1254  // If we can, avoid recomputing the compare - redoing it could lead to wonky
1255  // behavior.
1256  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1257    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1258
1259      // Get the compare predicate.
1260      // Try to take advantage of fallthrough opportunities.
1261      CmpInst::Predicate Predicate = CI->getPredicate();
1262      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1263        std::swap(TBB, FBB);
1264        Predicate = CmpInst::getInversePredicate(Predicate);
1265      }
1266
1267      ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1268
1269      // We may not handle every CC for now.
1270      if (ARMPred == ARMCC::AL) return false;
1271
1272      // Emit the compare.
1273      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1274        return false;
1275
1276      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1277      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1278      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1279      FastEmitBranch(FBB, DbgLoc);
1280      FuncInfo.MBB->addSuccessor(TBB);
1281      return true;
1282    }
1283  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1284    MVT SourceVT;
1285    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1286        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1287      unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1288      unsigned OpReg = getRegForValue(TI->getOperand(0));
1289      OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
1290      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1291                              TII.get(TstOpc))
1292                      .addReg(OpReg).addImm(1));
1293
1294      unsigned CCMode = ARMCC::NE;
1295      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1296        std::swap(TBB, FBB);
1297        CCMode = ARMCC::EQ;
1298      }
1299
1300      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1301      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1302      .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1303
1304      FastEmitBranch(FBB, DbgLoc);
1305      FuncInfo.MBB->addSuccessor(TBB);
1306      return true;
1307    }
1308  } else if (const ConstantInt *CI =
1309             dyn_cast<ConstantInt>(BI->getCondition())) {
1310    uint64_t Imm = CI->getZExtValue();
1311    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1312    FastEmitBranch(Target, DbgLoc);
1313    return true;
1314  }
1315
1316  unsigned CmpReg = getRegForValue(BI->getCondition());
1317  if (CmpReg == 0) return false;
1318
1319  // We've been divorced from our compare!  Our block was split, and
1320  // now our compare lives in a predecessor block.  We musn't
1321  // re-compare here, as the children of the compare aren't guaranteed
1322  // live across the block boundary (we *could* check for this).
1323  // Regardless, the compare has been done in the predecessor block,
1324  // and it left a value for us in a virtual register.  Ergo, we test
1325  // the one-bit value left in the virtual register.
1326  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1327  CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
1328  AddOptionalDefs(
1329      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
1330          .addReg(CmpReg)
1331          .addImm(1));
1332
1333  unsigned CCMode = ARMCC::NE;
1334  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1335    std::swap(TBB, FBB);
1336    CCMode = ARMCC::EQ;
1337  }
1338
1339  unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1340  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
1341                  .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1342  FastEmitBranch(FBB, DbgLoc);
1343  FuncInfo.MBB->addSuccessor(TBB);
1344  return true;
1345}
1346
1347bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1348  unsigned AddrReg = getRegForValue(I->getOperand(0));
1349  if (AddrReg == 0) return false;
1350
1351  unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1352  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1353                          TII.get(Opc)).addReg(AddrReg));
1354
1355  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1356  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
1357    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
1358
1359  return true;
1360}
1361
1362bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1363                             bool isZExt) {
1364  Type *Ty = Src1Value->getType();
1365  EVT SrcEVT = TLI.getValueType(Ty, true);
1366  if (!SrcEVT.isSimple()) return false;
1367  MVT SrcVT = SrcEVT.getSimpleVT();
1368
1369  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
1370  if (isFloat && !Subtarget->hasVFP2())
1371    return false;
1372
1373  // Check to see if the 2nd operand is a constant that we can encode directly
1374  // in the compare.
1375  int Imm = 0;
1376  bool UseImm = false;
1377  bool isNegativeImm = false;
1378  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1379  // Thus, Src1Value may be a ConstantInt, but we're missing it.
1380  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1381    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1382        SrcVT == MVT::i1) {
1383      const APInt &CIVal = ConstInt->getValue();
1384      Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1385      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1386      // then a cmn, because there is no way to represent 2147483648 as a
1387      // signed 32-bit int.
1388      if (Imm < 0 && Imm != (int)0x80000000) {
1389        isNegativeImm = true;
1390        Imm = -Imm;
1391      }
1392      UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1393        (ARM_AM::getSOImmVal(Imm) != -1);
1394    }
1395  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1396    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1397      if (ConstFP->isZero() && !ConstFP->isNegative())
1398        UseImm = true;
1399  }
1400
1401  unsigned CmpOpc;
1402  bool isICmp = true;
1403  bool needsExt = false;
1404  switch (SrcVT.SimpleTy) {
1405    default: return false;
1406    // TODO: Verify compares.
1407    case MVT::f32:
1408      isICmp = false;
1409      CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
1410      break;
1411    case MVT::f64:
1412      isICmp = false;
1413      CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1414      break;
1415    case MVT::i1:
1416    case MVT::i8:
1417    case MVT::i16:
1418      needsExt = true;
1419    // Intentional fall-through.
1420    case MVT::i32:
1421      if (isThumb2) {
1422        if (!UseImm)
1423          CmpOpc = ARM::t2CMPrr;
1424        else
1425          CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1426      } else {
1427        if (!UseImm)
1428          CmpOpc = ARM::CMPrr;
1429        else
1430          CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1431      }
1432      break;
1433  }
1434
1435  unsigned SrcReg1 = getRegForValue(Src1Value);
1436  if (SrcReg1 == 0) return false;
1437
1438  unsigned SrcReg2 = 0;
1439  if (!UseImm) {
1440    SrcReg2 = getRegForValue(Src2Value);
1441    if (SrcReg2 == 0) return false;
1442  }
1443
1444  // We have i1, i8, or i16, we need to either zero extend or sign extend.
1445  if (needsExt) {
1446    SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1447    if (SrcReg1 == 0) return false;
1448    if (!UseImm) {
1449      SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1450      if (SrcReg2 == 0) return false;
1451    }
1452  }
1453
1454  const MCInstrDesc &II = TII.get(CmpOpc);
1455  SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
1456  if (!UseImm) {
1457    SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
1458    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1459                    .addReg(SrcReg1).addReg(SrcReg2));
1460  } else {
1461    MachineInstrBuilder MIB;
1462    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
1463      .addReg(SrcReg1);
1464
1465    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1466    if (isICmp)
1467      MIB.addImm(Imm);
1468    AddOptionalDefs(MIB);
1469  }
1470
1471  // For floating point we need to move the result to a comparison register
1472  // that we can then use for branches.
1473  if (Ty->isFloatTy() || Ty->isDoubleTy())
1474    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1475                            TII.get(ARM::FMSTAT)));
1476  return true;
1477}
1478
1479bool ARMFastISel::SelectCmp(const Instruction *I) {
1480  const CmpInst *CI = cast<CmpInst>(I);
1481
1482  // Get the compare predicate.
1483  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1484
1485  // We may not handle every CC for now.
1486  if (ARMPred == ARMCC::AL) return false;
1487
1488  // Emit the compare.
1489  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1490    return false;
1491
1492  // Now set a register based on the comparison. Explicitly set the predicates
1493  // here.
1494  unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1495  const TargetRegisterClass *RC = isThumb2 ?
1496    (const TargetRegisterClass*)&ARM::rGPRRegClass :
1497    (const TargetRegisterClass*)&ARM::GPRRegClass;
1498  unsigned DestReg = createResultReg(RC);
1499  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1500  unsigned ZeroReg = TargetMaterializeConstant(Zero);
1501  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1502  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
1503          .addReg(ZeroReg).addImm(1)
1504          .addImm(ARMPred).addReg(ARM::CPSR);
1505
1506  UpdateValueMap(I, DestReg);
1507  return true;
1508}
1509
1510bool ARMFastISel::SelectFPExt(const Instruction *I) {
1511  // Make sure we have VFP and that we're extending float to double.
1512  if (!Subtarget->hasVFP2()) return false;
1513
1514  Value *V = I->getOperand(0);
1515  if (!I->getType()->isDoubleTy() ||
1516      !V->getType()->isFloatTy()) return false;
1517
1518  unsigned Op = getRegForValue(V);
1519  if (Op == 0) return false;
1520
1521  unsigned Result = createResultReg(&ARM::DPRRegClass);
1522  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1523                          TII.get(ARM::VCVTDS), Result)
1524                  .addReg(Op));
1525  UpdateValueMap(I, Result);
1526  return true;
1527}
1528
1529bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1530  // Make sure we have VFP and that we're truncating double to float.
1531  if (!Subtarget->hasVFP2()) return false;
1532
1533  Value *V = I->getOperand(0);
1534  if (!(I->getType()->isFloatTy() &&
1535        V->getType()->isDoubleTy())) return false;
1536
1537  unsigned Op = getRegForValue(V);
1538  if (Op == 0) return false;
1539
1540  unsigned Result = createResultReg(&ARM::SPRRegClass);
1541  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1542                          TII.get(ARM::VCVTSD), Result)
1543                  .addReg(Op));
1544  UpdateValueMap(I, Result);
1545  return true;
1546}
1547
1548bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1549  // Make sure we have VFP.
1550  if (!Subtarget->hasVFP2()) return false;
1551
1552  MVT DstVT;
1553  Type *Ty = I->getType();
1554  if (!isTypeLegal(Ty, DstVT))
1555    return false;
1556
1557  Value *Src = I->getOperand(0);
1558  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
1559  if (!SrcEVT.isSimple())
1560    return false;
1561  MVT SrcVT = SrcEVT.getSimpleVT();
1562  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1563    return false;
1564
1565  unsigned SrcReg = getRegForValue(Src);
1566  if (SrcReg == 0) return false;
1567
1568  // Handle sign-extension.
1569  if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1570    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
1571                                       /*isZExt*/!isSigned);
1572    if (SrcReg == 0) return false;
1573  }
1574
1575  // The conversion routine works on fp-reg to fp-reg and the operand above
1576  // was an integer, move it to the fp registers if possible.
1577  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1578  if (FP == 0) return false;
1579
1580  unsigned Opc;
1581  if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1582  else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1583  else return false;
1584
1585  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1586  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1587                          TII.get(Opc), ResultReg).addReg(FP));
1588  UpdateValueMap(I, ResultReg);
1589  return true;
1590}
1591
1592bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1593  // Make sure we have VFP.
1594  if (!Subtarget->hasVFP2()) return false;
1595
1596  MVT DstVT;
1597  Type *RetTy = I->getType();
1598  if (!isTypeLegal(RetTy, DstVT))
1599    return false;
1600
1601  unsigned Op = getRegForValue(I->getOperand(0));
1602  if (Op == 0) return false;
1603
1604  unsigned Opc;
1605  Type *OpTy = I->getOperand(0)->getType();
1606  if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1607  else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1608  else return false;
1609
1610  // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1611  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1612  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1613                          TII.get(Opc), ResultReg).addReg(Op));
1614
1615  // This result needs to be in an integer register, but the conversion only
1616  // takes place in fp-regs.
1617  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1618  if (IntReg == 0) return false;
1619
1620  UpdateValueMap(I, IntReg);
1621  return true;
1622}
1623
1624bool ARMFastISel::SelectSelect(const Instruction *I) {
1625  MVT VT;
1626  if (!isTypeLegal(I->getType(), VT))
1627    return false;
1628
1629  // Things need to be register sized for register moves.
1630  if (VT != MVT::i32) return false;
1631
1632  unsigned CondReg = getRegForValue(I->getOperand(0));
1633  if (CondReg == 0) return false;
1634  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1635  if (Op1Reg == 0) return false;
1636
1637  // Check to see if we can use an immediate in the conditional move.
1638  int Imm = 0;
1639  bool UseImm = false;
1640  bool isNegativeImm = false;
1641  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1642    assert (VT == MVT::i32 && "Expecting an i32.");
1643    Imm = (int)ConstInt->getValue().getZExtValue();
1644    if (Imm < 0) {
1645      isNegativeImm = true;
1646      Imm = ~Imm;
1647    }
1648    UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1649      (ARM_AM::getSOImmVal(Imm) != -1);
1650  }
1651
1652  unsigned Op2Reg = 0;
1653  if (!UseImm) {
1654    Op2Reg = getRegForValue(I->getOperand(2));
1655    if (Op2Reg == 0) return false;
1656  }
1657
1658  unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
1659  CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
1660  AddOptionalDefs(
1661      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
1662          .addReg(CondReg)
1663          .addImm(0));
1664
1665  unsigned MovCCOpc;
1666  const TargetRegisterClass *RC;
1667  if (!UseImm) {
1668    RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1669    MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1670  } else {
1671    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1672    if (!isNegativeImm)
1673      MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1674    else
1675      MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1676  }
1677  unsigned ResultReg = createResultReg(RC);
1678  if (!UseImm) {
1679    Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
1680    Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
1681    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1682            ResultReg)
1683        .addReg(Op2Reg)
1684        .addReg(Op1Reg)
1685        .addImm(ARMCC::NE)
1686        .addReg(ARM::CPSR);
1687  } else {
1688    Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
1689    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
1690            ResultReg)
1691        .addReg(Op1Reg)
1692        .addImm(Imm)
1693        .addImm(ARMCC::EQ)
1694        .addReg(ARM::CPSR);
1695  }
1696  UpdateValueMap(I, ResultReg);
1697  return true;
1698}
1699
1700bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1701  MVT VT;
1702  Type *Ty = I->getType();
1703  if (!isTypeLegal(Ty, VT))
1704    return false;
1705
1706  // If we have integer div support we should have selected this automagically.
1707  // In case we have a real miss go ahead and return false and we'll pick
1708  // it up later.
1709  if (Subtarget->hasDivide()) return false;
1710
1711  // Otherwise emit a libcall.
1712  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1713  if (VT == MVT::i8)
1714    LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1715  else if (VT == MVT::i16)
1716    LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1717  else if (VT == MVT::i32)
1718    LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1719  else if (VT == MVT::i64)
1720    LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1721  else if (VT == MVT::i128)
1722    LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1723  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1724
1725  return ARMEmitLibcall(I, LC);
1726}
1727
1728bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1729  MVT VT;
1730  Type *Ty = I->getType();
1731  if (!isTypeLegal(Ty, VT))
1732    return false;
1733
1734  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1735  if (VT == MVT::i8)
1736    LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1737  else if (VT == MVT::i16)
1738    LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1739  else if (VT == MVT::i32)
1740    LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1741  else if (VT == MVT::i64)
1742    LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1743  else if (VT == MVT::i128)
1744    LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1745  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1746
1747  return ARMEmitLibcall(I, LC);
1748}
1749
1750bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1751  EVT DestVT  = TLI.getValueType(I->getType(), true);
1752
1753  // We can get here in the case when we have a binary operation on a non-legal
1754  // type and the target independent selector doesn't know how to handle it.
1755  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1756    return false;
1757
1758  unsigned Opc;
1759  switch (ISDOpcode) {
1760    default: return false;
1761    case ISD::ADD:
1762      Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1763      break;
1764    case ISD::OR:
1765      Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1766      break;
1767    case ISD::SUB:
1768      Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1769      break;
1770  }
1771
1772  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1773  if (SrcReg1 == 0) return false;
1774
1775  // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1776  // in the instruction, rather then materializing the value in a register.
1777  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1778  if (SrcReg2 == 0) return false;
1779
1780  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
1781  SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
1782  SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
1783  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1784                          TII.get(Opc), ResultReg)
1785                  .addReg(SrcReg1).addReg(SrcReg2));
1786  UpdateValueMap(I, ResultReg);
1787  return true;
1788}
1789
1790bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1791  EVT FPVT = TLI.getValueType(I->getType(), true);
1792  if (!FPVT.isSimple()) return false;
1793  MVT VT = FPVT.getSimpleVT();
1794
1795  // We can get here in the case when we want to use NEON for our fp
1796  // operations, but can't figure out how to. Just use the vfp instructions
1797  // if we have them.
1798  // FIXME: It'd be nice to use NEON instructions.
1799  Type *Ty = I->getType();
1800  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1801  if (isFloat && !Subtarget->hasVFP2())
1802    return false;
1803
1804  unsigned Opc;
1805  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1806  switch (ISDOpcode) {
1807    default: return false;
1808    case ISD::FADD:
1809      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1810      break;
1811    case ISD::FSUB:
1812      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1813      break;
1814    case ISD::FMUL:
1815      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1816      break;
1817  }
1818  unsigned Op1 = getRegForValue(I->getOperand(0));
1819  if (Op1 == 0) return false;
1820
1821  unsigned Op2 = getRegForValue(I->getOperand(1));
1822  if (Op2 == 0) return false;
1823
1824  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
1825  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1826                          TII.get(Opc), ResultReg)
1827                  .addReg(Op1).addReg(Op2));
1828  UpdateValueMap(I, ResultReg);
1829  return true;
1830}
1831
1832// Call Handling Code
1833
1834// This is largely taken directly from CCAssignFnForNode
1835// TODO: We may not support all of this.
1836CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1837                                           bool Return,
1838                                           bool isVarArg) {
1839  switch (CC) {
1840  default:
1841    llvm_unreachable("Unsupported calling convention");
1842  case CallingConv::Fast:
1843    if (Subtarget->hasVFP2() && !isVarArg) {
1844      if (!Subtarget->isAAPCS_ABI())
1845        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1846      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1847      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1848    }
1849    // Fallthrough
1850  case CallingConv::C:
1851    // Use target triple & subtarget features to do actual dispatch.
1852    if (Subtarget->isAAPCS_ABI()) {
1853      if (Subtarget->hasVFP2() &&
1854          TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1855        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1856      else
1857        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1858    } else
1859        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1860  case CallingConv::ARM_AAPCS_VFP:
1861    if (!isVarArg)
1862      return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1863    // Fall through to soft float variant, variadic functions don't
1864    // use hard floating point ABI.
1865  case CallingConv::ARM_AAPCS:
1866    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1867  case CallingConv::ARM_APCS:
1868    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1869  case CallingConv::GHC:
1870    if (Return)
1871      llvm_unreachable("Can't return in GHC call convention");
1872    else
1873      return CC_ARM_APCS_GHC;
1874  }
1875}
1876
1877bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1878                                  SmallVectorImpl<unsigned> &ArgRegs,
1879                                  SmallVectorImpl<MVT> &ArgVTs,
1880                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1881                                  SmallVectorImpl<unsigned> &RegArgs,
1882                                  CallingConv::ID CC,
1883                                  unsigned &NumBytes,
1884                                  bool isVarArg) {
1885  SmallVector<CCValAssign, 16> ArgLocs;
1886  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
1887  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
1888                             CCAssignFnForCall(CC, false, isVarArg));
1889
1890  // Check that we can handle all of the arguments. If we can't, then bail out
1891  // now before we add code to the MBB.
1892  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1893    CCValAssign &VA = ArgLocs[i];
1894    MVT ArgVT = ArgVTs[VA.getValNo()];
1895
1896    // We don't handle NEON/vector parameters yet.
1897    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1898      return false;
1899
1900    // Now copy/store arg to correct locations.
1901    if (VA.isRegLoc() && !VA.needsCustom()) {
1902      continue;
1903    } else if (VA.needsCustom()) {
1904      // TODO: We need custom lowering for vector (v2f64) args.
1905      if (VA.getLocVT() != MVT::f64 ||
1906          // TODO: Only handle register args for now.
1907          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1908        return false;
1909    } else {
1910      switch (ArgVT.SimpleTy) {
1911      default:
1912        return false;
1913      case MVT::i1:
1914      case MVT::i8:
1915      case MVT::i16:
1916      case MVT::i32:
1917        break;
1918      case MVT::f32:
1919        if (!Subtarget->hasVFP2())
1920          return false;
1921        break;
1922      case MVT::f64:
1923        if (!Subtarget->hasVFP2())
1924          return false;
1925        break;
1926      }
1927    }
1928  }
1929
1930  // At the point, we are able to handle the call's arguments in fast isel.
1931
1932  // Get a count of how many bytes are to be pushed on the stack.
1933  NumBytes = CCInfo.getNextStackOffset();
1934
1935  // Issue CALLSEQ_START
1936  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1937  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1938                          TII.get(AdjStackDown))
1939                  .addImm(NumBytes));
1940
1941  // Process the args.
1942  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1943    CCValAssign &VA = ArgLocs[i];
1944    unsigned Arg = ArgRegs[VA.getValNo()];
1945    MVT ArgVT = ArgVTs[VA.getValNo()];
1946
1947    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1948           "We don't handle NEON/vector parameters yet.");
1949
1950    // Handle arg promotion, etc.
1951    switch (VA.getLocInfo()) {
1952      case CCValAssign::Full: break;
1953      case CCValAssign::SExt: {
1954        MVT DestVT = VA.getLocVT();
1955        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1956        assert (Arg != 0 && "Failed to emit a sext");
1957        ArgVT = DestVT;
1958        break;
1959      }
1960      case CCValAssign::AExt:
1961        // Intentional fall-through.  Handle AExt and ZExt.
1962      case CCValAssign::ZExt: {
1963        MVT DestVT = VA.getLocVT();
1964        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1965        assert (Arg != 0 && "Failed to emit a zext");
1966        ArgVT = DestVT;
1967        break;
1968      }
1969      case CCValAssign::BCvt: {
1970        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1971                                 /*TODO: Kill=*/false);
1972        assert(BC != 0 && "Failed to emit a bitcast!");
1973        Arg = BC;
1974        ArgVT = VA.getLocVT();
1975        break;
1976      }
1977      default: llvm_unreachable("Unknown arg promotion!");
1978    }
1979
1980    // Now copy/store arg to correct locations.
1981    if (VA.isRegLoc() && !VA.needsCustom()) {
1982      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1983              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
1984      RegArgs.push_back(VA.getLocReg());
1985    } else if (VA.needsCustom()) {
1986      // TODO: We need custom lowering for vector (v2f64) args.
1987      assert(VA.getLocVT() == MVT::f64 &&
1988             "Custom lowering for v2f64 args not available");
1989
1990      CCValAssign &NextVA = ArgLocs[++i];
1991
1992      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1993             "We only handle register args!");
1994
1995      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1996                              TII.get(ARM::VMOVRRD), VA.getLocReg())
1997                      .addReg(NextVA.getLocReg(), RegState::Define)
1998                      .addReg(Arg));
1999      RegArgs.push_back(VA.getLocReg());
2000      RegArgs.push_back(NextVA.getLocReg());
2001    } else {
2002      assert(VA.isMemLoc());
2003      // Need to store on the stack.
2004      Address Addr;
2005      Addr.BaseType = Address::RegBase;
2006      Addr.Base.Reg = ARM::SP;
2007      Addr.Offset = VA.getLocMemOffset();
2008
2009      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
2010      assert(EmitRet && "Could not emit a store for argument!");
2011    }
2012  }
2013
2014  return true;
2015}
2016
2017bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
2018                             const Instruction *I, CallingConv::ID CC,
2019                             unsigned &NumBytes, bool isVarArg) {
2020  // Issue CALLSEQ_END
2021  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2022  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2023                          TII.get(AdjStackUp))
2024                  .addImm(NumBytes).addImm(0));
2025
2026  // Now the return value.
2027  if (RetVT != MVT::isVoid) {
2028    SmallVector<CCValAssign, 16> RVLocs;
2029    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
2030    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2031
2032    // Copy all of the result registers out of their specified physreg.
2033    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
2034      // For this move we copy into two registers and then move into the
2035      // double fp reg we want.
2036      MVT DestVT = RVLocs[0].getValVT();
2037      const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2038      unsigned ResultReg = createResultReg(DstRC);
2039      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2040                              TII.get(ARM::VMOVDRR), ResultReg)
2041                      .addReg(RVLocs[0].getLocReg())
2042                      .addReg(RVLocs[1].getLocReg()));
2043
2044      UsedRegs.push_back(RVLocs[0].getLocReg());
2045      UsedRegs.push_back(RVLocs[1].getLocReg());
2046
2047      // Finally update the result.
2048      UpdateValueMap(I, ResultReg);
2049    } else {
2050      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2051      MVT CopyVT = RVLocs[0].getValVT();
2052
2053      // Special handling for extended integers.
2054      if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2055        CopyVT = MVT::i32;
2056
2057      const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2058
2059      unsigned ResultReg = createResultReg(DstRC);
2060      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2061              TII.get(TargetOpcode::COPY),
2062              ResultReg).addReg(RVLocs[0].getLocReg());
2063      UsedRegs.push_back(RVLocs[0].getLocReg());
2064
2065      // Finally update the result.
2066      UpdateValueMap(I, ResultReg);
2067    }
2068  }
2069
2070  return true;
2071}
2072
2073bool ARMFastISel::SelectRet(const Instruction *I) {
2074  const ReturnInst *Ret = cast<ReturnInst>(I);
2075  const Function &F = *I->getParent()->getParent();
2076
2077  if (!FuncInfo.CanLowerReturn)
2078    return false;
2079
2080  // Build a list of return value registers.
2081  SmallVector<unsigned, 4> RetRegs;
2082
2083  CallingConv::ID CC = F.getCallingConv();
2084  if (Ret->getNumOperands() > 0) {
2085    SmallVector<ISD::OutputArg, 4> Outs;
2086    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2087
2088    // Analyze operands of the call, assigning locations to each operand.
2089    SmallVector<CCValAssign, 16> ValLocs;
2090    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
2091    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
2092                                                 F.isVarArg()));
2093
2094    const Value *RV = Ret->getOperand(0);
2095    unsigned Reg = getRegForValue(RV);
2096    if (Reg == 0)
2097      return false;
2098
2099    // Only handle a single return value for now.
2100    if (ValLocs.size() != 1)
2101      return false;
2102
2103    CCValAssign &VA = ValLocs[0];
2104
2105    // Don't bother handling odd stuff for now.
2106    if (VA.getLocInfo() != CCValAssign::Full)
2107      return false;
2108    // Only handle register returns for now.
2109    if (!VA.isRegLoc())
2110      return false;
2111
2112    unsigned SrcReg = Reg + VA.getValNo();
2113    EVT RVEVT = TLI.getValueType(RV->getType());
2114    if (!RVEVT.isSimple()) return false;
2115    MVT RVVT = RVEVT.getSimpleVT();
2116    MVT DestVT = VA.getValVT();
2117    // Special handling for extended integers.
2118    if (RVVT != DestVT) {
2119      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2120        return false;
2121
2122      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2123
2124      // Perform extension if flagged as either zext or sext.  Otherwise, do
2125      // nothing.
2126      if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2127        SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2128        if (SrcReg == 0) return false;
2129      }
2130    }
2131
2132    // Make the copy.
2133    unsigned DstReg = VA.getLocReg();
2134    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2135    // Avoid a cross-class copy. This is very unlikely.
2136    if (!SrcRC->contains(DstReg))
2137      return false;
2138    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2139            TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
2140
2141    // Add register to return instruction.
2142    RetRegs.push_back(VA.getLocReg());
2143  }
2144
2145  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
2146  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2147                                    TII.get(RetOpc));
2148  AddOptionalDefs(MIB);
2149  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2150    MIB.addReg(RetRegs[i], RegState::Implicit);
2151  return true;
2152}
2153
2154unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2155  if (UseReg)
2156    return isThumb2 ? ARM::tBLXr : ARM::BLX;
2157  else
2158    return isThumb2 ? ARM::tBL : ARM::BL;
2159}
2160
2161unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2162  // Manually compute the global's type to avoid building it when unnecessary.
2163  Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
2164  EVT LCREVT = TLI.getValueType(GVTy);
2165  if (!LCREVT.isSimple()) return 0;
2166
2167  GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
2168                                       GlobalValue::ExternalLinkage, nullptr,
2169                                       Name);
2170  assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
2171  return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
2172}
2173
2174// A quick function that will emit a call for a named libcall in F with the
2175// vector of passed arguments for the Instruction in I. We can assume that we
2176// can emit a call for any libcall we can produce. This is an abridged version
2177// of the full call infrastructure since we won't need to worry about things
2178// like computed function pointers or strange arguments at call sites.
2179// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2180// with X86.
2181bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2182  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2183
2184  // Handle *simple* calls for now.
2185  Type *RetTy = I->getType();
2186  MVT RetVT;
2187  if (RetTy->isVoidTy())
2188    RetVT = MVT::isVoid;
2189  else if (!isTypeLegal(RetTy, RetVT))
2190    return false;
2191
2192  // Can't handle non-double multi-reg retvals.
2193  if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2194    SmallVector<CCValAssign, 16> RVLocs;
2195    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
2196    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
2197    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2198      return false;
2199  }
2200
2201  // Set up the argument vectors.
2202  SmallVector<Value*, 8> Args;
2203  SmallVector<unsigned, 8> ArgRegs;
2204  SmallVector<MVT, 8> ArgVTs;
2205  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2206  Args.reserve(I->getNumOperands());
2207  ArgRegs.reserve(I->getNumOperands());
2208  ArgVTs.reserve(I->getNumOperands());
2209  ArgFlags.reserve(I->getNumOperands());
2210  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
2211    Value *Op = I->getOperand(i);
2212    unsigned Arg = getRegForValue(Op);
2213    if (Arg == 0) return false;
2214
2215    Type *ArgTy = Op->getType();
2216    MVT ArgVT;
2217    if (!isTypeLegal(ArgTy, ArgVT)) return false;
2218
2219    ISD::ArgFlagsTy Flags;
2220    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2221    Flags.setOrigAlign(OriginalAlignment);
2222
2223    Args.push_back(Op);
2224    ArgRegs.push_back(Arg);
2225    ArgVTs.push_back(ArgVT);
2226    ArgFlags.push_back(Flags);
2227  }
2228
2229  // Handle the arguments now that we've gotten them.
2230  SmallVector<unsigned, 4> RegArgs;
2231  unsigned NumBytes;
2232  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2233                       RegArgs, CC, NumBytes, false))
2234    return false;
2235
2236  unsigned CalleeReg = 0;
2237  if (EnableARMLongCalls) {
2238    CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2239    if (CalleeReg == 0) return false;
2240  }
2241
2242  // Issue the call.
2243  unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
2244  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2245                                    DbgLoc, TII.get(CallOpc));
2246  // BL / BLX don't take a predicate, but tBL / tBLX do.
2247  if (isThumb2)
2248    AddDefaultPred(MIB);
2249  if (EnableARMLongCalls)
2250    MIB.addReg(CalleeReg);
2251  else
2252    MIB.addExternalSymbol(TLI.getLibcallName(Call));
2253
2254  // Add implicit physical register uses to the call.
2255  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2256    MIB.addReg(RegArgs[i], RegState::Implicit);
2257
2258  // Add a register mask with the call-preserved registers.
2259  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2260  MIB.addRegMask(TRI.getCallPreservedMask(CC));
2261
2262  // Finish off the call including any return values.
2263  SmallVector<unsigned, 4> UsedRegs;
2264  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
2265
2266  // Set all unused physreg defs as dead.
2267  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2268
2269  return true;
2270}
2271
2272bool ARMFastISel::SelectCall(const Instruction *I,
2273                             const char *IntrMemName = nullptr) {
2274  const CallInst *CI = cast<CallInst>(I);
2275  const Value *Callee = CI->getCalledValue();
2276
2277  // Can't handle inline asm.
2278  if (isa<InlineAsm>(Callee)) return false;
2279
2280  // Allow SelectionDAG isel to handle tail calls.
2281  if (CI->isTailCall()) return false;
2282
2283  // Check the calling convention.
2284  ImmutableCallSite CS(CI);
2285  CallingConv::ID CC = CS.getCallingConv();
2286
2287  // TODO: Avoid some calling conventions?
2288
2289  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
2290  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
2291  bool isVarArg = FTy->isVarArg();
2292
2293  // Handle *simple* calls for now.
2294  Type *RetTy = I->getType();
2295  MVT RetVT;
2296  if (RetTy->isVoidTy())
2297    RetVT = MVT::isVoid;
2298  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2299           RetVT != MVT::i8  && RetVT != MVT::i1)
2300    return false;
2301
2302  // Can't handle non-double multi-reg retvals.
2303  if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2304      RetVT != MVT::i16 && RetVT != MVT::i32) {
2305    SmallVector<CCValAssign, 16> RVLocs;
2306    CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
2307    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
2308    if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2309      return false;
2310  }
2311
2312  // Set up the argument vectors.
2313  SmallVector<Value*, 8> Args;
2314  SmallVector<unsigned, 8> ArgRegs;
2315  SmallVector<MVT, 8> ArgVTs;
2316  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2317  unsigned arg_size = CS.arg_size();
2318  Args.reserve(arg_size);
2319  ArgRegs.reserve(arg_size);
2320  ArgVTs.reserve(arg_size);
2321  ArgFlags.reserve(arg_size);
2322  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2323       i != e; ++i) {
2324    // If we're lowering a memory intrinsic instead of a regular call, skip the
2325    // last two arguments, which shouldn't be passed to the underlying function.
2326    if (IntrMemName && e-i <= 2)
2327      break;
2328
2329    ISD::ArgFlagsTy Flags;
2330    unsigned AttrInd = i - CS.arg_begin() + 1;
2331    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
2332      Flags.setSExt();
2333    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
2334      Flags.setZExt();
2335
2336    // FIXME: Only handle *easy* calls for now.
2337    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
2338        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
2339        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
2340        CS.paramHasAttr(AttrInd, Attribute::ByVal))
2341      return false;
2342
2343    Type *ArgTy = (*i)->getType();
2344    MVT ArgVT;
2345    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2346        ArgVT != MVT::i1)
2347      return false;
2348
2349    unsigned Arg = getRegForValue(*i);
2350    if (Arg == 0)
2351      return false;
2352
2353    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
2354    Flags.setOrigAlign(OriginalAlignment);
2355
2356    Args.push_back(*i);
2357    ArgRegs.push_back(Arg);
2358    ArgVTs.push_back(ArgVT);
2359    ArgFlags.push_back(Flags);
2360  }
2361
2362  // Handle the arguments now that we've gotten them.
2363  SmallVector<unsigned, 4> RegArgs;
2364  unsigned NumBytes;
2365  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2366                       RegArgs, CC, NumBytes, isVarArg))
2367    return false;
2368
2369  bool UseReg = false;
2370  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2371  if (!GV || EnableARMLongCalls) UseReg = true;
2372
2373  unsigned CalleeReg = 0;
2374  if (UseReg) {
2375    if (IntrMemName)
2376      CalleeReg = getLibcallReg(IntrMemName);
2377    else
2378      CalleeReg = getRegForValue(Callee);
2379
2380    if (CalleeReg == 0) return false;
2381  }
2382
2383  // Issue the call.
2384  unsigned CallOpc = ARMSelectCallOp(UseReg);
2385  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2386                                    DbgLoc, TII.get(CallOpc));
2387
2388  unsigned char OpFlags = 0;
2389
2390  // Add MO_PLT for global address or external symbol in the PIC relocation
2391  // model.
2392  if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
2393    OpFlags = ARMII::MO_PLT;
2394
2395  // ARM calls don't take a predicate, but tBL / tBLX do.
2396  if(isThumb2)
2397    AddDefaultPred(MIB);
2398  if (UseReg)
2399    MIB.addReg(CalleeReg);
2400  else if (!IntrMemName)
2401    MIB.addGlobalAddress(GV, 0, OpFlags);
2402  else
2403    MIB.addExternalSymbol(IntrMemName, OpFlags);
2404
2405  // Add implicit physical register uses to the call.
2406  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2407    MIB.addReg(RegArgs[i], RegState::Implicit);
2408
2409  // Add a register mask with the call-preserved registers.
2410  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2411  MIB.addRegMask(TRI.getCallPreservedMask(CC));
2412
2413  // Finish off the call including any return values.
2414  SmallVector<unsigned, 4> UsedRegs;
2415  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2416    return false;
2417
2418  // Set all unused physreg defs as dead.
2419  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2420
2421  return true;
2422}
2423
2424bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2425  return Len <= 16;
2426}
2427
2428bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2429                                        uint64_t Len, unsigned Alignment) {
2430  // Make sure we don't bloat code by inlining very large memcpy's.
2431  if (!ARMIsMemCpySmall(Len))
2432    return false;
2433
2434  while (Len) {
2435    MVT VT;
2436    if (!Alignment || Alignment >= 4) {
2437      if (Len >= 4)
2438        VT = MVT::i32;
2439      else if (Len >= 2)
2440        VT = MVT::i16;
2441      else {
2442        assert (Len == 1 && "Expected a length of 1!");
2443        VT = MVT::i8;
2444      }
2445    } else {
2446      // Bound based on alignment.
2447      if (Len >= 2 && Alignment == 2)
2448        VT = MVT::i16;
2449      else {
2450        VT = MVT::i8;
2451      }
2452    }
2453
2454    bool RV;
2455    unsigned ResultReg;
2456    RV = ARMEmitLoad(VT, ResultReg, Src);
2457    assert (RV == true && "Should be able to handle this load.");
2458    RV = ARMEmitStore(VT, ResultReg, Dest);
2459    assert (RV == true && "Should be able to handle this store.");
2460    (void)RV;
2461
2462    unsigned Size = VT.getSizeInBits()/8;
2463    Len -= Size;
2464    Dest.Offset += Size;
2465    Src.Offset += Size;
2466  }
2467
2468  return true;
2469}
2470
2471bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2472  // FIXME: Handle more intrinsics.
2473  switch (I.getIntrinsicID()) {
2474  default: return false;
2475  case Intrinsic::frameaddress: {
2476    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2477    MFI->setFrameAddressIsTaken(true);
2478
2479    unsigned LdrOpc;
2480    const TargetRegisterClass *RC;
2481    if (isThumb2) {
2482      LdrOpc =  ARM::t2LDRi12;
2483      RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
2484    } else {
2485      LdrOpc =  ARM::LDRi12;
2486      RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
2487    }
2488
2489    const ARMBaseRegisterInfo *RegInfo =
2490          static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
2491    unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2492    unsigned SrcReg = FramePtr;
2493
2494    // Recursively load frame address
2495    // ldr r0 [fp]
2496    // ldr r0 [r0]
2497    // ldr r0 [r0]
2498    // ...
2499    unsigned DestReg;
2500    unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2501    while (Depth--) {
2502      DestReg = createResultReg(RC);
2503      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2504                              TII.get(LdrOpc), DestReg)
2505                      .addReg(SrcReg).addImm(0));
2506      SrcReg = DestReg;
2507    }
2508    UpdateValueMap(&I, SrcReg);
2509    return true;
2510  }
2511  case Intrinsic::memcpy:
2512  case Intrinsic::memmove: {
2513    const MemTransferInst &MTI = cast<MemTransferInst>(I);
2514    // Don't handle volatile.
2515    if (MTI.isVolatile())
2516      return false;
2517
2518    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2519    // we would emit dead code because we don't currently handle memmoves.
2520    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2521    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2522      // Small memcpy's are common enough that we want to do them without a call
2523      // if possible.
2524      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2525      if (ARMIsMemCpySmall(Len)) {
2526        Address Dest, Src;
2527        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2528            !ARMComputeAddress(MTI.getRawSource(), Src))
2529          return false;
2530        unsigned Alignment = MTI.getAlignment();
2531        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2532          return true;
2533      }
2534    }
2535
2536    if (!MTI.getLength()->getType()->isIntegerTy(32))
2537      return false;
2538
2539    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2540      return false;
2541
2542    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2543    return SelectCall(&I, IntrMemName);
2544  }
2545  case Intrinsic::memset: {
2546    const MemSetInst &MSI = cast<MemSetInst>(I);
2547    // Don't handle volatile.
2548    if (MSI.isVolatile())
2549      return false;
2550
2551    if (!MSI.getLength()->getType()->isIntegerTy(32))
2552      return false;
2553
2554    if (MSI.getDestAddressSpace() > 255)
2555      return false;
2556
2557    return SelectCall(&I, "memset");
2558  }
2559  case Intrinsic::trap: {
2560    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
2561      Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
2562    return true;
2563  }
2564  }
2565}
2566
2567bool ARMFastISel::SelectTrunc(const Instruction *I) {
2568  // The high bits for a type smaller than the register size are assumed to be
2569  // undefined.
2570  Value *Op = I->getOperand(0);
2571
2572  EVT SrcVT, DestVT;
2573  SrcVT = TLI.getValueType(Op->getType(), true);
2574  DestVT = TLI.getValueType(I->getType(), true);
2575
2576  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2577    return false;
2578  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2579    return false;
2580
2581  unsigned SrcReg = getRegForValue(Op);
2582  if (!SrcReg) return false;
2583
2584  // Because the high bits are undefined, a truncate doesn't generate
2585  // any code.
2586  UpdateValueMap(I, SrcReg);
2587  return true;
2588}
2589
2590unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2591                                    bool isZExt) {
2592  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2593    return 0;
2594  if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2595    return 0;
2596
2597  // Table of which combinations can be emitted as a single instruction,
2598  // and which will require two.
2599  static const uint8_t isSingleInstrTbl[3][2][2][2] = {
2600    //            ARM                     Thumb
2601    //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
2602    //    ext:     s  z      s  z          s  z      s  z
2603    /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2604    /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2605    /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2606  };
2607
2608  // Target registers for:
2609  //  - For ARM can never be PC.
2610  //  - For 16-bit Thumb are restricted to lower 8 registers.
2611  //  - For 32-bit Thumb are restricted to non-SP and non-PC.
2612  static const TargetRegisterClass *RCTbl[2][2] = {
2613    // Instructions: Two                     Single
2614    /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2615    /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
2616  };
2617
2618  // Table governing the instruction(s) to be emitted.
2619  static const struct InstructionTable {
2620    uint32_t Opc   : 16;
2621    uint32_t hasS  :  1; // Some instructions have an S bit, always set it to 0.
2622    uint32_t Shift :  7; // For shift operand addressing mode, used by MOVsi.
2623    uint32_t Imm   :  8; // All instructions have either a shift or a mask.
2624  } IT[2][2][3][2] = {
2625    { // Two instructions (first is left shift, second is in this table).
2626      { // ARM                Opc           S  Shift             Imm
2627        /*  1 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  31 },
2628        /*  1 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  31 } },
2629        /*  8 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  24 },
2630        /*  8 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  24 } },
2631        /* 16 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  16 },
2632        /* 16 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  16 } }
2633      },
2634      { // Thumb              Opc           S  Shift             Imm
2635        /*  1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  31 },
2636        /*  1 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  31 } },
2637        /*  8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  24 },
2638        /*  8 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  24 } },
2639        /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  16 },
2640        /* 16 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  16 } }
2641      }
2642    },
2643    { // Single instruction.
2644      { // ARM                Opc           S  Shift             Imm
2645        /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
2646        /*  1 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift,   1 } },
2647        /*  8 bit sext */ { { ARM::SXTB   , 0, ARM_AM::no_shift,   0 },
2648        /*  8 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift, 255 } },
2649        /* 16 bit sext */ { { ARM::SXTH   , 0, ARM_AM::no_shift,   0 },
2650        /* 16 bit zext */   { ARM::UXTH   , 0, ARM_AM::no_shift,   0 } }
2651      },
2652      { // Thumb              Opc           S  Shift             Imm
2653        /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
2654        /*  1 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift,   1 } },
2655        /*  8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift,   0 },
2656        /*  8 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
2657        /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift,   0 },
2658        /* 16 bit zext */   { ARM::t2UXTH , 0, ARM_AM::no_shift,   0 } }
2659      }
2660    }
2661  };
2662
2663  unsigned SrcBits = SrcVT.getSizeInBits();
2664  unsigned DestBits = DestVT.getSizeInBits();
2665  (void) DestBits;
2666  assert((SrcBits < DestBits) && "can only extend to larger types");
2667  assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
2668         "other sizes unimplemented");
2669  assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
2670         "other sizes unimplemented");
2671
2672  bool hasV6Ops = Subtarget->hasV6Ops();
2673  unsigned Bitness = SrcBits / 8;  // {1,8,16}=>{0,1,2}
2674  assert((Bitness < 3) && "sanity-check table bounds");
2675
2676  bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2677  const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2678  const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2679  unsigned Opc = ITP->Opc;
2680  assert(ARM::KILL != Opc && "Invalid table entry");
2681  unsigned hasS = ITP->hasS;
2682  ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2683  assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2684         "only MOVsi has shift operand addressing mode");
2685  unsigned Imm = ITP->Imm;
2686
2687  // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2688  bool setsCPSR = &ARM::tGPRRegClass == RC;
2689  unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
2690  unsigned ResultReg;
2691  // MOVsi encodes shift and immediate in shift operand addressing mode.
2692  // The following condition has the same value when emitting two
2693  // instruction sequences: both are shifts.
2694  bool ImmIsSO = (Shift != ARM_AM::no_shift);
2695
2696  // Either one or two instructions are emitted.
2697  // They're always of the form:
2698  //   dst = in OP imm
2699  // CPSR is set only by 16-bit Thumb instructions.
2700  // Predicate, if any, is AL.
2701  // S bit, if available, is always 0.
2702  // When two are emitted the first's result will feed as the second's input,
2703  // that value is then dead.
2704  unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
2705  for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
2706    ResultReg = createResultReg(RC);
2707    bool isLsl = (0 == Instr) && !isSingleInstr;
2708    unsigned Opcode = isLsl ? LSLOpc : Opc;
2709    ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
2710    unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
2711    bool isKill = 1 == Instr;
2712    MachineInstrBuilder MIB = BuildMI(
2713        *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
2714    if (setsCPSR)
2715      MIB.addReg(ARM::CPSR, RegState::Define);
2716    SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
2717    AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
2718    if (hasS)
2719      AddDefaultCC(MIB);
2720    // Second instruction consumes the first's result.
2721    SrcReg = ResultReg;
2722  }
2723
2724  return ResultReg;
2725}
2726
2727bool ARMFastISel::SelectIntExt(const Instruction *I) {
2728  // On ARM, in general, integer casts don't involve legal types; this code
2729  // handles promotable integers.
2730  Type *DestTy = I->getType();
2731  Value *Src = I->getOperand(0);
2732  Type *SrcTy = Src->getType();
2733
2734  bool isZExt = isa<ZExtInst>(I);
2735  unsigned SrcReg = getRegForValue(Src);
2736  if (!SrcReg) return false;
2737
2738  EVT SrcEVT, DestEVT;
2739  SrcEVT = TLI.getValueType(SrcTy, true);
2740  DestEVT = TLI.getValueType(DestTy, true);
2741  if (!SrcEVT.isSimple()) return false;
2742  if (!DestEVT.isSimple()) return false;
2743
2744  MVT SrcVT = SrcEVT.getSimpleVT();
2745  MVT DestVT = DestEVT.getSimpleVT();
2746  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2747  if (ResultReg == 0) return false;
2748  UpdateValueMap(I, ResultReg);
2749  return true;
2750}
2751
2752bool ARMFastISel::SelectShift(const Instruction *I,
2753                              ARM_AM::ShiftOpc ShiftTy) {
2754  // We handle thumb2 mode by target independent selector
2755  // or SelectionDAG ISel.
2756  if (isThumb2)
2757    return false;
2758
2759  // Only handle i32 now.
2760  EVT DestVT = TLI.getValueType(I->getType(), true);
2761  if (DestVT != MVT::i32)
2762    return false;
2763
2764  unsigned Opc = ARM::MOVsr;
2765  unsigned ShiftImm;
2766  Value *Src2Value = I->getOperand(1);
2767  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
2768    ShiftImm = CI->getZExtValue();
2769
2770    // Fall back to selection DAG isel if the shift amount
2771    // is zero or greater than the width of the value type.
2772    if (ShiftImm == 0 || ShiftImm >=32)
2773      return false;
2774
2775    Opc = ARM::MOVsi;
2776  }
2777
2778  Value *Src1Value = I->getOperand(0);
2779  unsigned Reg1 = getRegForValue(Src1Value);
2780  if (Reg1 == 0) return false;
2781
2782  unsigned Reg2 = 0;
2783  if (Opc == ARM::MOVsr) {
2784    Reg2 = getRegForValue(Src2Value);
2785    if (Reg2 == 0) return false;
2786  }
2787
2788  unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
2789  if(ResultReg == 0) return false;
2790
2791  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2792                                    TII.get(Opc), ResultReg)
2793                            .addReg(Reg1);
2794
2795  if (Opc == ARM::MOVsi)
2796    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
2797  else if (Opc == ARM::MOVsr) {
2798    MIB.addReg(Reg2);
2799    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
2800  }
2801
2802  AddOptionalDefs(MIB);
2803  UpdateValueMap(I, ResultReg);
2804  return true;
2805}
2806
2807// TODO: SoftFP support.
2808bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
2809
2810  switch (I->getOpcode()) {
2811    case Instruction::Load:
2812      return SelectLoad(I);
2813    case Instruction::Store:
2814      return SelectStore(I);
2815    case Instruction::Br:
2816      return SelectBranch(I);
2817    case Instruction::IndirectBr:
2818      return SelectIndirectBr(I);
2819    case Instruction::ICmp:
2820    case Instruction::FCmp:
2821      return SelectCmp(I);
2822    case Instruction::FPExt:
2823      return SelectFPExt(I);
2824    case Instruction::FPTrunc:
2825      return SelectFPTrunc(I);
2826    case Instruction::SIToFP:
2827      return SelectIToFP(I, /*isSigned*/ true);
2828    case Instruction::UIToFP:
2829      return SelectIToFP(I, /*isSigned*/ false);
2830    case Instruction::FPToSI:
2831      return SelectFPToI(I, /*isSigned*/ true);
2832    case Instruction::FPToUI:
2833      return SelectFPToI(I, /*isSigned*/ false);
2834    case Instruction::Add:
2835      return SelectBinaryIntOp(I, ISD::ADD);
2836    case Instruction::Or:
2837      return SelectBinaryIntOp(I, ISD::OR);
2838    case Instruction::Sub:
2839      return SelectBinaryIntOp(I, ISD::SUB);
2840    case Instruction::FAdd:
2841      return SelectBinaryFPOp(I, ISD::FADD);
2842    case Instruction::FSub:
2843      return SelectBinaryFPOp(I, ISD::FSUB);
2844    case Instruction::FMul:
2845      return SelectBinaryFPOp(I, ISD::FMUL);
2846    case Instruction::SDiv:
2847      return SelectDiv(I, /*isSigned*/ true);
2848    case Instruction::UDiv:
2849      return SelectDiv(I, /*isSigned*/ false);
2850    case Instruction::SRem:
2851      return SelectRem(I, /*isSigned*/ true);
2852    case Instruction::URem:
2853      return SelectRem(I, /*isSigned*/ false);
2854    case Instruction::Call:
2855      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2856        return SelectIntrinsicCall(*II);
2857      return SelectCall(I);
2858    case Instruction::Select:
2859      return SelectSelect(I);
2860    case Instruction::Ret:
2861      return SelectRet(I);
2862    case Instruction::Trunc:
2863      return SelectTrunc(I);
2864    case Instruction::ZExt:
2865    case Instruction::SExt:
2866      return SelectIntExt(I);
2867    case Instruction::Shl:
2868      return SelectShift(I, ARM_AM::lsl);
2869    case Instruction::LShr:
2870      return SelectShift(I, ARM_AM::lsr);
2871    case Instruction::AShr:
2872      return SelectShift(I, ARM_AM::asr);
2873    default: break;
2874  }
2875  return false;
2876}
2877
2878namespace {
2879// This table describes sign- and zero-extend instructions which can be
2880// folded into a preceding load. All of these extends have an immediate
2881// (sometimes a mask and sometimes a shift) that's applied after
2882// extension.
2883const struct FoldableLoadExtendsStruct {
2884  uint16_t Opc[2];  // ARM, Thumb.
2885  uint8_t ExpectedImm;
2886  uint8_t isZExt     : 1;
2887  uint8_t ExpectedVT : 7;
2888} FoldableLoadExtends[] = {
2889  { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
2890  { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
2891  { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
2892  { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
2893  { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
2894};
2895}
2896
2897/// \brief The specified machine instr operand is a vreg, and that
2898/// vreg is being provided by the specified load instruction.  If possible,
2899/// try to fold the load as an operand to the instruction, returning true if
2900/// successful.
2901bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2902                                      const LoadInst *LI) {
2903  // Verify we have a legal type before going any further.
2904  MVT VT;
2905  if (!isLoadTypeLegal(LI->getType(), VT))
2906    return false;
2907
2908  // Combine load followed by zero- or sign-extend.
2909  // ldrb r1, [r0]       ldrb r1, [r0]
2910  // uxtb r2, r1     =>
2911  // mov  r3, r2         mov  r3, r1
2912  if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
2913    return false;
2914  const uint64_t Imm = MI->getOperand(2).getImm();
2915
2916  bool Found = false;
2917  bool isZExt;
2918  for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
2919       i != e; ++i) {
2920    if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
2921        (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
2922        MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
2923      Found = true;
2924      isZExt = FoldableLoadExtends[i].isZExt;
2925    }
2926  }
2927  if (!Found) return false;
2928
2929  // See if we can handle this address.
2930  Address Addr;
2931  if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2932
2933  unsigned ResultReg = MI->getOperand(0).getReg();
2934  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2935    return false;
2936  MI->eraseFromParent();
2937  return true;
2938}
2939
2940unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
2941                                     unsigned Align, MVT VT) {
2942  bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2943  ARMConstantPoolConstant *CPV =
2944    ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2945  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
2946
2947  unsigned Opc;
2948  unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
2949  // Load value.
2950  if (isThumb2) {
2951    DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
2952    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2953                            TII.get(ARM::t2LDRpci), DestReg1)
2954                    .addConstantPoolIndex(Idx));
2955    Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
2956  } else {
2957    // The extra immediate is for addrmode2.
2958    DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
2959    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2960                            DbgLoc, TII.get(ARM::LDRcp), DestReg1)
2961                    .addConstantPoolIndex(Idx).addImm(0));
2962    Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
2963  }
2964
2965  unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
2966  if (GlobalBaseReg == 0) {
2967    GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
2968    AFI->setGlobalBaseReg(GlobalBaseReg);
2969  }
2970
2971  unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
2972  DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
2973  DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
2974  GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
2975  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2976                                    DbgLoc, TII.get(Opc), DestReg2)
2977                            .addReg(DestReg1)
2978                            .addReg(GlobalBaseReg);
2979  if (!UseGOTOFF)
2980    MIB.addImm(0);
2981  AddOptionalDefs(MIB);
2982
2983  return DestReg2;
2984}
2985
2986bool ARMFastISel::FastLowerArguments() {
2987  if (!FuncInfo.CanLowerReturn)
2988    return false;
2989
2990  const Function *F = FuncInfo.Fn;
2991  if (F->isVarArg())
2992    return false;
2993
2994  CallingConv::ID CC = F->getCallingConv();
2995  switch (CC) {
2996  default:
2997    return false;
2998  case CallingConv::Fast:
2999  case CallingConv::C:
3000  case CallingConv::ARM_AAPCS_VFP:
3001  case CallingConv::ARM_AAPCS:
3002  case CallingConv::ARM_APCS:
3003    break;
3004  }
3005
3006  // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3007  // which are passed in r0 - r3.
3008  unsigned Idx = 1;
3009  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3010       I != E; ++I, ++Idx) {
3011    if (Idx > 4)
3012      return false;
3013
3014    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
3015        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
3016        F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
3017      return false;
3018
3019    Type *ArgTy = I->getType();
3020    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3021      return false;
3022
3023    EVT ArgVT = TLI.getValueType(ArgTy);
3024    if (!ArgVT.isSimple()) return false;
3025    switch (ArgVT.getSimpleVT().SimpleTy) {
3026    case MVT::i8:
3027    case MVT::i16:
3028    case MVT::i32:
3029      break;
3030    default:
3031      return false;
3032    }
3033  }
3034
3035
3036  static const uint16_t GPRArgRegs[] = {
3037    ARM::R0, ARM::R1, ARM::R2, ARM::R3
3038  };
3039
3040  const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3041  Idx = 0;
3042  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
3043       I != E; ++I, ++Idx) {
3044    unsigned SrcReg = GPRArgRegs[Idx];
3045    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3046    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3047    // Without this, EmitLiveInCopies may eliminate the livein if its only
3048    // use is a bitcast (which isn't turned into an instruction).
3049    unsigned ResultReg = createResultReg(RC);
3050    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3051            TII.get(TargetOpcode::COPY),
3052            ResultReg).addReg(DstReg, getKillRegState(true));
3053    UpdateValueMap(I, ResultReg);
3054  }
3055
3056  return true;
3057}
3058
3059namespace llvm {
3060  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3061                                const TargetLibraryInfo *libInfo) {
3062    const TargetMachine &TM = funcInfo.MF->getTarget();
3063
3064    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
3065    // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
3066    bool UseFastISel = false;
3067    UseFastISel |= Subtarget->isTargetMachO() && !Subtarget->isThumb1Only();
3068    UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
3069    UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
3070
3071    if (UseFastISel) {
3072      // iOS always has a FP for backtracking, force other targets
3073      // to keep their FP when doing FastISel. The emitted code is
3074      // currently superior, and in cases like test-suite's lencod
3075      // FastISel isn't quite correct when FP is eliminated.
3076      TM.Options.NoFramePointerElim = true;
3077      return new ARMFastISel(funcInfo, libInfo);
3078    }
3079    return nullptr;
3080  }
3081}
3082