1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMISelLowering.h"
16#include "ARMCallingConv.h"
17#include "ARMConstantPoolValue.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMPerfectShuffle.h"
20#include "ARMSubtarget.h"
21#include "ARMTargetMachine.h"
22#include "ARMTargetObjectFile.h"
23#include "MCTargetDesc/ARMAddressingModes.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/CodeGen/CallingConvLower.h"
27#include "llvm/CodeGen/IntrinsicLowering.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineModuleInfo.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/IR/CallingConv.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/Instruction.h"
41#include "llvm/IR/Instructions.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/Type.h"
44#include "llvm/MC/MCSectionMachO.h"
45#include "llvm/Support/CommandLine.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MathExtras.h"
49#include "llvm/Target/TargetOptions.h"
50#include <utility>
51using namespace llvm;
52
53#define DEBUG_TYPE "arm-isel"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
57STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
58
59cl::opt<bool>
60EnableARMLongCalls("arm-long-calls", cl::Hidden,
61  cl::desc("Generate calls via indirect call instructions"),
62  cl::init(false));
63
64static cl::opt<bool>
65ARMInterworking("arm-interworking", cl::Hidden,
66  cl::desc("Enable / disable ARM interworking (for debugging only)"),
67  cl::init(true));
68
69namespace {
70  class ARMCCState : public CCState {
71  public:
72    ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
73               const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
74               LLVMContext &C, ParmContext PC)
75        : CCState(CC, isVarArg, MF, TM, locs, C) {
76      assert(((PC == Call) || (PC == Prologue)) &&
77             "ARMCCState users must specify whether their context is call"
78             "or prologue generation.");
79      CallOrPrologue = PC;
80    }
81  };
82}
83
84// The APCS parameter registers.
85static const MCPhysReg GPRArgRegs[] = {
86  ARM::R0, ARM::R1, ARM::R2, ARM::R3
87};
88
89void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
90                                       MVT PromotedBitwiseVT) {
91  if (VT != PromotedLdStVT) {
92    setOperationAction(ISD::LOAD, VT, Promote);
93    AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
94
95    setOperationAction(ISD::STORE, VT, Promote);
96    AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
97  }
98
99  MVT ElemTy = VT.getVectorElementType();
100  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
101    setOperationAction(ISD::SETCC, VT, Custom);
102  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
103  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
104  if (ElemTy == MVT::i32) {
105    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
106    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
107    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
108    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
109  } else {
110    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
111    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
112    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
113    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
114  }
115  setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
116  setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
117  setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
118  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
119  setOperationAction(ISD::SELECT,            VT, Expand);
120  setOperationAction(ISD::SELECT_CC,         VT, Expand);
121  setOperationAction(ISD::VSELECT,           VT, Expand);
122  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
123  if (VT.isInteger()) {
124    setOperationAction(ISD::SHL, VT, Custom);
125    setOperationAction(ISD::SRA, VT, Custom);
126    setOperationAction(ISD::SRL, VT, Custom);
127  }
128
129  // Promote all bit-wise operations.
130  if (VT.isInteger() && VT != PromotedBitwiseVT) {
131    setOperationAction(ISD::AND, VT, Promote);
132    AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
133    setOperationAction(ISD::OR,  VT, Promote);
134    AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
135    setOperationAction(ISD::XOR, VT, Promote);
136    AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
137  }
138
139  // Neon does not support vector divide/remainder operations.
140  setOperationAction(ISD::SDIV, VT, Expand);
141  setOperationAction(ISD::UDIV, VT, Expand);
142  setOperationAction(ISD::FDIV, VT, Expand);
143  setOperationAction(ISD::SREM, VT, Expand);
144  setOperationAction(ISD::UREM, VT, Expand);
145  setOperationAction(ISD::FREM, VT, Expand);
146}
147
148void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
149  addRegisterClass(VT, &ARM::DPRRegClass);
150  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
151}
152
153void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
154  addRegisterClass(VT, &ARM::DPairRegClass);
155  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
156}
157
158static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
159  if (TT.isOSBinFormatMachO())
160    return new TargetLoweringObjectFileMachO();
161  if (TT.isOSWindows())
162    return new TargetLoweringObjectFileCOFF();
163  return new ARMElfTargetObjectFile();
164}
165
166ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
167    : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
168  Subtarget = &TM.getSubtarget<ARMSubtarget>();
169  RegInfo = TM.getRegisterInfo();
170  Itins = TM.getInstrItineraryData();
171
172  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
173
174  if (Subtarget->isTargetMachO()) {
175    // Uses VFP for Thumb libfuncs if available.
176    if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
177        Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
178      // Single-precision floating-point arithmetic.
179      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
180      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
181      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
182      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
183
184      // Double-precision floating-point arithmetic.
185      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
186      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
187      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
188      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
189
190      // Single-precision comparisons.
191      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
192      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
193      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
194      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
195      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
196      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
197      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
198      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
199
200      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
201      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
202      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
203      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
204      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
205      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
206      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
207      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
208
209      // Double-precision comparisons.
210      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
211      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
212      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
213      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
214      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
215      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
216      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
217      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
218
219      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
220      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
221      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
222      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
223      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
224      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
225      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
226      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
227
228      // Floating-point to integer conversions.
229      // i64 conversions are done via library routines even when generating VFP
230      // instructions, so use the same ones.
231      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
232      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
233      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
234      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
235
236      // Conversions between floating types.
237      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
238      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
239
240      // Integer to floating-point conversions.
241      // i64 conversions are done via library routines even when generating VFP
242      // instructions, so use the same ones.
243      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
244      // e.g., __floatunsidf vs. __floatunssidfvfp.
245      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
246      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
247      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
248      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
249    }
250  }
251
252  // These libcalls are not available in 32-bit.
253  setLibcallName(RTLIB::SHL_I128, nullptr);
254  setLibcallName(RTLIB::SRL_I128, nullptr);
255  setLibcallName(RTLIB::SRA_I128, nullptr);
256
257  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
258      !Subtarget->isTargetWindows()) {
259    static const struct {
260      const RTLIB::Libcall Op;
261      const char * const Name;
262      const CallingConv::ID CC;
263      const ISD::CondCode Cond;
264    } LibraryCalls[] = {
265      // Double-precision floating-point arithmetic helper functions
266      // RTABI chapter 4.1.2, Table 2
267      { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
268      { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
269      { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
270      { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
271
272      // Double-precision floating-point comparison helper functions
273      // RTABI chapter 4.1.2, Table 3
274      { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
275      { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
276      { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
277      { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
278      { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
279      { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
280      { RTLIB::UO_F64,  "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
281      { RTLIB::O_F64,   "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
282
283      // Single-precision floating-point arithmetic helper functions
284      // RTABI chapter 4.1.2, Table 4
285      { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
286      { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
287      { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
288      { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
289
290      // Single-precision floating-point comparison helper functions
291      // RTABI chapter 4.1.2, Table 5
292      { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
293      { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
294      { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
295      { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
296      { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
297      { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
298      { RTLIB::UO_F32,  "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
299      { RTLIB::O_F32,   "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
300
301      // Floating-point to integer conversions.
302      // RTABI chapter 4.1.2, Table 6
303      { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
304      { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
305      { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
306      { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
307      { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
308      { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
309      { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
310      { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
311
312      // Conversions between floating types.
313      // RTABI chapter 4.1.2, Table 7
314      { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
315      { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
316
317      // Integer to floating-point conversions.
318      // RTABI chapter 4.1.2, Table 8
319      { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
320      { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
321      { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
322      { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
323      { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
324      { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
325      { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
326      { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
327
328      // Long long helper functions
329      // RTABI chapter 4.2, Table 9
330      { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
331      { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332      { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333      { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334
335      // Integer division functions
336      // RTABI chapter 4.3.1
337      { RTLIB::SDIV_I8,  "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
338      { RTLIB::SDIV_I16, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
339      { RTLIB::SDIV_I32, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340      { RTLIB::SDIV_I64, "__aeabi_ldivmod",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341      { RTLIB::UDIV_I8,  "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
342      { RTLIB::UDIV_I16, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343      { RTLIB::UDIV_I32, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
344      { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
345
346      // Memory operations
347      // RTABI chapter 4.3.4
348      { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
349      { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
350      { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
351    };
352
353    for (const auto &LC : LibraryCalls) {
354      setLibcallName(LC.Op, LC.Name);
355      setLibcallCallingConv(LC.Op, LC.CC);
356      if (LC.Cond != ISD::SETCC_INVALID)
357        setCmpLibcallCC(LC.Op, LC.Cond);
358    }
359  }
360
361  if (Subtarget->isTargetWindows()) {
362    static const struct {
363      const RTLIB::Libcall Op;
364      const char * const Name;
365      const CallingConv::ID CC;
366    } LibraryCalls[] = {
367      { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
368      { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
369      { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
370      { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
371      { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
372      { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
373      { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
374      { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
375    };
376
377    for (const auto &LC : LibraryCalls) {
378      setLibcallName(LC.Op, LC.Name);
379      setLibcallCallingConv(LC.Op, LC.CC);
380    }
381  }
382
383  // Use divmod compiler-rt calls for iOS 5.0 and later.
384  if (Subtarget->getTargetTriple().isiOS() &&
385      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
386    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
387    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
388  }
389
390  if (Subtarget->isThumb1Only())
391    addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
392  else
393    addRegisterClass(MVT::i32, &ARM::GPRRegClass);
394  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
395      !Subtarget->isThumb1Only()) {
396    addRegisterClass(MVT::f32, &ARM::SPRRegClass);
397    if (!Subtarget->isFPOnlySP())
398      addRegisterClass(MVT::f64, &ARM::DPRRegClass);
399
400    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
401  }
402
403  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
404       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
405    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
406         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
407      setTruncStoreAction((MVT::SimpleValueType)VT,
408                          (MVT::SimpleValueType)InnerVT, Expand);
409    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
410    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
411    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
412
413    setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
414    setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
415    setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
416    setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
417
418    setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
419  }
420
421  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
422  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
423
424  if (Subtarget->hasNEON()) {
425    addDRTypeForNEON(MVT::v2f32);
426    addDRTypeForNEON(MVT::v8i8);
427    addDRTypeForNEON(MVT::v4i16);
428    addDRTypeForNEON(MVT::v2i32);
429    addDRTypeForNEON(MVT::v1i64);
430
431    addQRTypeForNEON(MVT::v4f32);
432    addQRTypeForNEON(MVT::v2f64);
433    addQRTypeForNEON(MVT::v16i8);
434    addQRTypeForNEON(MVT::v8i16);
435    addQRTypeForNEON(MVT::v4i32);
436    addQRTypeForNEON(MVT::v2i64);
437
438    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
439    // neither Neon nor VFP support any arithmetic operations on it.
440    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
441    // supported for v4f32.
442    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
443    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
444    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
445    // FIXME: Code duplication: FDIV and FREM are expanded always, see
446    // ARMTargetLowering::addTypeForNEON method for details.
447    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
448    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
449    // FIXME: Create unittest.
450    // In another words, find a way when "copysign" appears in DAG with vector
451    // operands.
452    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
453    // FIXME: Code duplication: SETCC has custom operation action, see
454    // ARMTargetLowering::addTypeForNEON method for details.
455    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
456    // FIXME: Create unittest for FNEG and for FABS.
457    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
458    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
459    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
460    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
461    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
462    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
463    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
464    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
465    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
466    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
467    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
468    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
469    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
470    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
471    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
472    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
473    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
474    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
475    setOperationAction(ISD::FMA, MVT::v2f64, Expand);
476
477    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
478    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
479    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
480    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
481    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
482    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
483    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
484    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
485    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
486    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
487    setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
488    setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
489    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
490    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
491    setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
492
493    // Mark v2f32 intrinsics.
494    setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
495    setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
496    setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
497    setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
498    setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
499    setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
500    setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
501    setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
502    setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
503    setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
504    setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
505    setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
506    setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
507    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
508    setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
509
510    // Neon does not support some operations on v1i64 and v2i64 types.
511    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
512    // Custom handling for some quad-vector types to detect VMULL.
513    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
514    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
515    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
516    // Custom handling for some vector types to avoid expensive expansions
517    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
518    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
519    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
520    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
521    setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
522    setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
523    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
524    // a destination type that is wider than the source, and nor does
525    // it have a FP_TO_[SU]INT instruction with a narrower destination than
526    // source.
527    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
528    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
529    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
530    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
531
532    setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
533    setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
534
535    // NEON does not have single instruction CTPOP for vectors with element
536    // types wider than 8-bits.  However, custom lowering can leverage the
537    // v8i8/v16i8 vcnt instruction.
538    setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
539    setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
540    setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
541    setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
542
543    // NEON only has FMA instructions as of VFP4.
544    if (!Subtarget->hasVFP4()) {
545      setOperationAction(ISD::FMA, MVT::v2f32, Expand);
546      setOperationAction(ISD::FMA, MVT::v4f32, Expand);
547    }
548
549    setTargetDAGCombine(ISD::INTRINSIC_VOID);
550    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
551    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
552    setTargetDAGCombine(ISD::SHL);
553    setTargetDAGCombine(ISD::SRL);
554    setTargetDAGCombine(ISD::SRA);
555    setTargetDAGCombine(ISD::SIGN_EXTEND);
556    setTargetDAGCombine(ISD::ZERO_EXTEND);
557    setTargetDAGCombine(ISD::ANY_EXTEND);
558    setTargetDAGCombine(ISD::SELECT_CC);
559    setTargetDAGCombine(ISD::BUILD_VECTOR);
560    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
561    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
562    setTargetDAGCombine(ISD::STORE);
563    setTargetDAGCombine(ISD::FP_TO_SINT);
564    setTargetDAGCombine(ISD::FP_TO_UINT);
565    setTargetDAGCombine(ISD::FDIV);
566
567    // It is legal to extload from v4i8 to v4i16 or v4i32.
568    MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
569                  MVT::v4i16, MVT::v2i16,
570                  MVT::v2i32};
571    for (unsigned i = 0; i < 6; ++i) {
572      setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
573      setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
574      setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
575    }
576  }
577
578  // ARM and Thumb2 support UMLAL/SMLAL.
579  if (!Subtarget->isThumb1Only())
580    setTargetDAGCombine(ISD::ADDC);
581
582
583  computeRegisterProperties();
584
585  // ARM does not have f32 extending load.
586  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
587
588  // ARM does not have i1 sign extending load.
589  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
590
591  // ARM supports all 4 flavors of integer indexed load / store.
592  if (!Subtarget->isThumb1Only()) {
593    for (unsigned im = (unsigned)ISD::PRE_INC;
594         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
595      setIndexedLoadAction(im,  MVT::i1,  Legal);
596      setIndexedLoadAction(im,  MVT::i8,  Legal);
597      setIndexedLoadAction(im,  MVT::i16, Legal);
598      setIndexedLoadAction(im,  MVT::i32, Legal);
599      setIndexedStoreAction(im, MVT::i1,  Legal);
600      setIndexedStoreAction(im, MVT::i8,  Legal);
601      setIndexedStoreAction(im, MVT::i16, Legal);
602      setIndexedStoreAction(im, MVT::i32, Legal);
603    }
604  }
605
606  setOperationAction(ISD::SADDO, MVT::i32, Custom);
607  setOperationAction(ISD::UADDO, MVT::i32, Custom);
608  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
609  setOperationAction(ISD::USUBO, MVT::i32, Custom);
610
611  // i64 operation support.
612  setOperationAction(ISD::MUL,     MVT::i64, Expand);
613  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
614  if (Subtarget->isThumb1Only()) {
615    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
616    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
617  }
618  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
619      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
620    setOperationAction(ISD::MULHS, MVT::i32, Expand);
621
622  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
623  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
624  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
625  setOperationAction(ISD::SRL,       MVT::i64, Custom);
626  setOperationAction(ISD::SRA,       MVT::i64, Custom);
627
628  if (!Subtarget->isThumb1Only()) {
629    // FIXME: We should do this for Thumb1 as well.
630    setOperationAction(ISD::ADDC,    MVT::i32, Custom);
631    setOperationAction(ISD::ADDE,    MVT::i32, Custom);
632    setOperationAction(ISD::SUBC,    MVT::i32, Custom);
633    setOperationAction(ISD::SUBE,    MVT::i32, Custom);
634  }
635
636  // ARM does not have ROTL.
637  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
638  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
639  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
640  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
641    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
642
643  // These just redirect to CTTZ and CTLZ on ARM.
644  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
645  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
646
647  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
648
649  // Only ARMv6 has BSWAP.
650  if (!Subtarget->hasV6Ops())
651    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
652
653  if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
654      !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
655    // These are expanded into libcalls if the cpu doesn't have HW divider.
656    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
657    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
658  }
659
660  // FIXME: Also set divmod for SREM on EABI
661  setOperationAction(ISD::SREM,  MVT::i32, Expand);
662  setOperationAction(ISD::UREM,  MVT::i32, Expand);
663  // Register based DivRem for AEABI (RTABI 4.2)
664  if (Subtarget->isTargetAEABI()) {
665    setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
666    setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
667    setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
668    setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
669    setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
670    setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
671    setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
672    setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
673
674    setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
675    setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
676    setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
677    setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
678    setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
679    setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
680    setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
681    setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
682
683    setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
684    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
685  } else {
686    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
687    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
688  }
689
690  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
691  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
692  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
693  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
694  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
695
696  setOperationAction(ISD::TRAP, MVT::Other, Legal);
697
698  // Use the default implementation.
699  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
700  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
701  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
702  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
703  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
704  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
705
706  if (!Subtarget->isTargetMachO()) {
707    // Non-MachO platforms may return values in these registers via the
708    // personality function.
709    setExceptionPointerRegister(ARM::R0);
710    setExceptionSelectorRegister(ARM::R1);
711  }
712
713  if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
714    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
715  else
716    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
717
718  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
719  // the default expansion.
720  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
721    // ATOMIC_FENCE needs custom lowering; the others should have been expanded
722    // to ldrex/strex loops already.
723    setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
724
725    // On v8, we have particularly efficient implementations of atomic fences
726    // if they can be combined with nearby atomic loads and stores.
727    if (!Subtarget->hasV8Ops()) {
728      // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
729      setInsertFencesForAtomic(true);
730    }
731  } else {
732    // If there's anything we can use as a barrier, go through custom lowering
733    // for ATOMIC_FENCE.
734    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
735                       Subtarget->hasAnyDataBarrier() ? Custom : Expand);
736
737    // Set them all for expansion, which will force libcalls.
738    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
739    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
740    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
741    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
742    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
743    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
744    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
745    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
746    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
747    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
748    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
749    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
750    // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
751    // Unordered/Monotonic case.
752    setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
753    setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
754  }
755
756  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
757
758  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
759  if (!Subtarget->hasV6Ops()) {
760    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
761    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
762  }
763  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
764
765  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
766      !Subtarget->isThumb1Only()) {
767    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
768    // iff target supports vfp2.
769    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
770    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
771  }
772
773  // We want to custom lower some of our intrinsics.
774  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
775  if (Subtarget->isTargetDarwin()) {
776    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
777    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
778    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
779  }
780
781  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
782  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
783  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
784  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
785  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
786  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
787  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
788  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
789  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
790
791  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
792  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
793  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
794  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
795  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
796
797  // We don't support sin/cos/fmod/copysign/pow
798  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
799  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
800  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
801  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
802  setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
803  setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
804  setOperationAction(ISD::FREM,      MVT::f64, Expand);
805  setOperationAction(ISD::FREM,      MVT::f32, Expand);
806  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
807      !Subtarget->isThumb1Only()) {
808    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
809    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
810  }
811  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
812  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
813
814  if (!Subtarget->hasVFP4()) {
815    setOperationAction(ISD::FMA, MVT::f64, Expand);
816    setOperationAction(ISD::FMA, MVT::f32, Expand);
817  }
818
819  // Various VFP goodness
820  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
821    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
822    if (Subtarget->hasVFP2()) {
823      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
824      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
825      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
826      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
827    }
828    // Special handling for half-precision FP.
829    if (!Subtarget->hasFP16()) {
830      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
831      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
832    }
833  }
834
835  // Combine sin / cos into one node or libcall if possible.
836  if (Subtarget->hasSinCos()) {
837    setLibcallName(RTLIB::SINCOS_F32, "sincosf");
838    setLibcallName(RTLIB::SINCOS_F64, "sincos");
839    if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
840      // For iOS, we don't want to the normal expansion of a libcall to
841      // sincos. We want to issue a libcall to __sincos_stret.
842      setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
843      setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
844    }
845  }
846
847  // We have target-specific dag combine patterns for the following nodes:
848  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
849  setTargetDAGCombine(ISD::ADD);
850  setTargetDAGCombine(ISD::SUB);
851  setTargetDAGCombine(ISD::MUL);
852  setTargetDAGCombine(ISD::AND);
853  setTargetDAGCombine(ISD::OR);
854  setTargetDAGCombine(ISD::XOR);
855
856  if (Subtarget->hasV6Ops())
857    setTargetDAGCombine(ISD::SRL);
858
859  setStackPointerRegisterToSaveRestore(ARM::SP);
860
861  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
862      !Subtarget->hasVFP2())
863    setSchedulingPreference(Sched::RegPressure);
864  else
865    setSchedulingPreference(Sched::Hybrid);
866
867  //// temporary - rewrite interface to use type
868  MaxStoresPerMemset = 8;
869  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
870  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
871  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
872  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
873  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
874
875  // On ARM arguments smaller than 4 bytes are extended, so all arguments
876  // are at least 4 bytes aligned.
877  setMinStackArgumentAlignment(4);
878
879  // Prefer likely predicted branches to selects on out-of-order cores.
880  PredictableSelectIsExpensive = Subtarget->isLikeA9();
881
882  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
883}
884
885// FIXME: It might make sense to define the representative register class as the
886// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
887// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
888// SPR's representative would be DPR_VFP2. This should work well if register
889// pressure tracking were modified such that a register use would increment the
890// pressure of the register class's representative and all of it's super
891// classes' representatives transitively. We have not implemented this because
892// of the difficulty prior to coalescing of modeling operand register classes
893// due to the common occurrence of cross class copies and subregister insertions
894// and extractions.
895std::pair<const TargetRegisterClass*, uint8_t>
896ARMTargetLowering::findRepresentativeClass(MVT VT) const{
897  const TargetRegisterClass *RRC = nullptr;
898  uint8_t Cost = 1;
899  switch (VT.SimpleTy) {
900  default:
901    return TargetLowering::findRepresentativeClass(VT);
902  // Use DPR as representative register class for all floating point
903  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
904  // the cost is 1 for both f32 and f64.
905  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
906  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
907    RRC = &ARM::DPRRegClass;
908    // When NEON is used for SP, only half of the register file is available
909    // because operations that define both SP and DP results will be constrained
910    // to the VFP2 class (D0-D15). We currently model this constraint prior to
911    // coalescing by double-counting the SP regs. See the FIXME above.
912    if (Subtarget->useNEONForSinglePrecisionFP())
913      Cost = 2;
914    break;
915  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
916  case MVT::v4f32: case MVT::v2f64:
917    RRC = &ARM::DPRRegClass;
918    Cost = 2;
919    break;
920  case MVT::v4i64:
921    RRC = &ARM::DPRRegClass;
922    Cost = 4;
923    break;
924  case MVT::v8i64:
925    RRC = &ARM::DPRRegClass;
926    Cost = 8;
927    break;
928  }
929  return std::make_pair(RRC, Cost);
930}
931
932const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
933  switch (Opcode) {
934  default: return nullptr;
935  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
936  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
937  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
938  case ARMISD::CALL:          return "ARMISD::CALL";
939  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
940  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
941  case ARMISD::tCALL:         return "ARMISD::tCALL";
942  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
943  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
944  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
945  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
946  case ARMISD::INTRET_FLAG:   return "ARMISD::INTRET_FLAG";
947  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
948  case ARMISD::CMP:           return "ARMISD::CMP";
949  case ARMISD::CMN:           return "ARMISD::CMN";
950  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
951  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
952  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
953  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
954  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
955
956  case ARMISD::CMOV:          return "ARMISD::CMOV";
957
958  case ARMISD::RBIT:          return "ARMISD::RBIT";
959
960  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
961  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
962  case ARMISD::SITOF:         return "ARMISD::SITOF";
963  case ARMISD::UITOF:         return "ARMISD::UITOF";
964
965  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
966  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
967  case ARMISD::RRX:           return "ARMISD::RRX";
968
969  case ARMISD::ADDC:          return "ARMISD::ADDC";
970  case ARMISD::ADDE:          return "ARMISD::ADDE";
971  case ARMISD::SUBC:          return "ARMISD::SUBC";
972  case ARMISD::SUBE:          return "ARMISD::SUBE";
973
974  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
975  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
976
977  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
978  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
979
980  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
981
982  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
983
984  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
985
986  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
987
988  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
989
990  case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
991
992  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
993  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
994  case ARMISD::VCGE:          return "ARMISD::VCGE";
995  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
996  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
997  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
998  case ARMISD::VCGT:          return "ARMISD::VCGT";
999  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
1000  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
1001  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
1002  case ARMISD::VTST:          return "ARMISD::VTST";
1003
1004  case ARMISD::VSHL:          return "ARMISD::VSHL";
1005  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
1006  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
1007  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
1008  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
1009  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
1010  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
1011  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
1012  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
1013  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
1014  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
1015  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
1016  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
1017  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
1018  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
1019  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
1020  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
1021  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
1022  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
1023  case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
1024  case ARMISD::VDUP:          return "ARMISD::VDUP";
1025  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
1026  case ARMISD::VEXT:          return "ARMISD::VEXT";
1027  case ARMISD::VREV64:        return "ARMISD::VREV64";
1028  case ARMISD::VREV32:        return "ARMISD::VREV32";
1029  case ARMISD::VREV16:        return "ARMISD::VREV16";
1030  case ARMISD::VZIP:          return "ARMISD::VZIP";
1031  case ARMISD::VUZP:          return "ARMISD::VUZP";
1032  case ARMISD::VTRN:          return "ARMISD::VTRN";
1033  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
1034  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
1035  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
1036  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
1037  case ARMISD::UMLAL:         return "ARMISD::UMLAL";
1038  case ARMISD::SMLAL:         return "ARMISD::SMLAL";
1039  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
1040  case ARMISD::FMAX:          return "ARMISD::FMAX";
1041  case ARMISD::FMIN:          return "ARMISD::FMIN";
1042  case ARMISD::VMAXNM:        return "ARMISD::VMAX";
1043  case ARMISD::VMINNM:        return "ARMISD::VMIN";
1044  case ARMISD::BFI:           return "ARMISD::BFI";
1045  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
1046  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
1047  case ARMISD::VBSL:          return "ARMISD::VBSL";
1048  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
1049  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
1050  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
1051  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
1052  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
1053  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
1054  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
1055  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
1056  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
1057  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
1058  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
1059  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
1060  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
1061  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
1062  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
1063  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
1064  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
1065  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
1066  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
1067  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
1068  }
1069}
1070
1071EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
1072  if (!VT.isVector()) return getPointerTy();
1073  return VT.changeVectorElementTypeToInteger();
1074}
1075
1076/// getRegClassFor - Return the register class that should be used for the
1077/// specified value type.
1078const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1079  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1080  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1081  // load / store 4 to 8 consecutive D registers.
1082  if (Subtarget->hasNEON()) {
1083    if (VT == MVT::v4i64)
1084      return &ARM::QQPRRegClass;
1085    if (VT == MVT::v8i64)
1086      return &ARM::QQQQPRRegClass;
1087  }
1088  return TargetLowering::getRegClassFor(VT);
1089}
1090
1091// Create a fast isel object.
1092FastISel *
1093ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1094                                  const TargetLibraryInfo *libInfo) const {
1095  return ARM::createFastISel(funcInfo, libInfo);
1096}
1097
1098/// getMaximalGlobalOffset - Returns the maximal possible offset which can
1099/// be used for loads / stores from the global.
1100unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
1101  return (Subtarget->isThumb1Only() ? 127 : 4095);
1102}
1103
1104Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1105  unsigned NumVals = N->getNumValues();
1106  if (!NumVals)
1107    return Sched::RegPressure;
1108
1109  for (unsigned i = 0; i != NumVals; ++i) {
1110    EVT VT = N->getValueType(i);
1111    if (VT == MVT::Glue || VT == MVT::Other)
1112      continue;
1113    if (VT.isFloatingPoint() || VT.isVector())
1114      return Sched::ILP;
1115  }
1116
1117  if (!N->isMachineOpcode())
1118    return Sched::RegPressure;
1119
1120  // Load are scheduled for latency even if there instruction itinerary
1121  // is not available.
1122  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
1123  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1124
1125  if (MCID.getNumDefs() == 0)
1126    return Sched::RegPressure;
1127  if (!Itins->isEmpty() &&
1128      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1129    return Sched::ILP;
1130
1131  return Sched::RegPressure;
1132}
1133
1134//===----------------------------------------------------------------------===//
1135// Lowering Code
1136//===----------------------------------------------------------------------===//
1137
1138/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1139static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1140  switch (CC) {
1141  default: llvm_unreachable("Unknown condition code!");
1142  case ISD::SETNE:  return ARMCC::NE;
1143  case ISD::SETEQ:  return ARMCC::EQ;
1144  case ISD::SETGT:  return ARMCC::GT;
1145  case ISD::SETGE:  return ARMCC::GE;
1146  case ISD::SETLT:  return ARMCC::LT;
1147  case ISD::SETLE:  return ARMCC::LE;
1148  case ISD::SETUGT: return ARMCC::HI;
1149  case ISD::SETUGE: return ARMCC::HS;
1150  case ISD::SETULT: return ARMCC::LO;
1151  case ISD::SETULE: return ARMCC::LS;
1152  }
1153}
1154
1155/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1156static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1157                        ARMCC::CondCodes &CondCode2) {
1158  CondCode2 = ARMCC::AL;
1159  switch (CC) {
1160  default: llvm_unreachable("Unknown FP condition!");
1161  case ISD::SETEQ:
1162  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1163  case ISD::SETGT:
1164  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1165  case ISD::SETGE:
1166  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1167  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1168  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1169  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1170  case ISD::SETO:   CondCode = ARMCC::VC; break;
1171  case ISD::SETUO:  CondCode = ARMCC::VS; break;
1172  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1173  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1174  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1175  case ISD::SETLT:
1176  case ISD::SETULT: CondCode = ARMCC::LT; break;
1177  case ISD::SETLE:
1178  case ISD::SETULE: CondCode = ARMCC::LE; break;
1179  case ISD::SETNE:
1180  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1181  }
1182}
1183
1184//===----------------------------------------------------------------------===//
1185//                      Calling Convention Implementation
1186//===----------------------------------------------------------------------===//
1187
1188#include "ARMGenCallingConv.inc"
1189
1190/// getEffectiveCallingConv - Get the effective calling convention, taking into
1191/// account presence of floating point hardware and calling convention
1192/// limitations, such as support for variadic functions.
1193CallingConv::ID
1194ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1195                                           bool isVarArg) const {
1196  switch (CC) {
1197  default:
1198    llvm_unreachable("Unsupported calling convention");
1199  case CallingConv::ARM_AAPCS:
1200  case CallingConv::ARM_APCS:
1201  case CallingConv::GHC:
1202    return CC;
1203  case CallingConv::ARM_AAPCS_VFP:
1204    return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1205  case CallingConv::C:
1206    if (!Subtarget->isAAPCS_ABI())
1207      return CallingConv::ARM_APCS;
1208    else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1209             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1210             !isVarArg)
1211      return CallingConv::ARM_AAPCS_VFP;
1212    else
1213      return CallingConv::ARM_AAPCS;
1214  case CallingConv::Fast:
1215    if (!Subtarget->isAAPCS_ABI()) {
1216      if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1217        return CallingConv::Fast;
1218      return CallingConv::ARM_APCS;
1219    } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1220      return CallingConv::ARM_AAPCS_VFP;
1221    else
1222      return CallingConv::ARM_AAPCS;
1223  }
1224}
1225
1226/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1227/// CallingConvention.
1228CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1229                                                 bool Return,
1230                                                 bool isVarArg) const {
1231  switch (getEffectiveCallingConv(CC, isVarArg)) {
1232  default:
1233    llvm_unreachable("Unsupported calling convention");
1234  case CallingConv::ARM_APCS:
1235    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1236  case CallingConv::ARM_AAPCS:
1237    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1238  case CallingConv::ARM_AAPCS_VFP:
1239    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1240  case CallingConv::Fast:
1241    return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1242  case CallingConv::GHC:
1243    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1244  }
1245}
1246
1247/// LowerCallResult - Lower the result values of a call into the
1248/// appropriate copies out of appropriate physical registers.
1249SDValue
1250ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1251                                   CallingConv::ID CallConv, bool isVarArg,
1252                                   const SmallVectorImpl<ISD::InputArg> &Ins,
1253                                   SDLoc dl, SelectionDAG &DAG,
1254                                   SmallVectorImpl<SDValue> &InVals,
1255                                   bool isThisReturn, SDValue ThisVal) const {
1256
1257  // Assign locations to each value returned by this call.
1258  SmallVector<CCValAssign, 16> RVLocs;
1259  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1260                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
1261  CCInfo.AnalyzeCallResult(Ins,
1262                           CCAssignFnForNode(CallConv, /* Return*/ true,
1263                                             isVarArg));
1264
1265  // Copy all of the result registers out of their specified physreg.
1266  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1267    CCValAssign VA = RVLocs[i];
1268
1269    // Pass 'this' value directly from the argument to return value, to avoid
1270    // reg unit interference
1271    if (i == 0 && isThisReturn) {
1272      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1273             "unexpected return calling convention register assignment");
1274      InVals.push_back(ThisVal);
1275      continue;
1276    }
1277
1278    SDValue Val;
1279    if (VA.needsCustom()) {
1280      // Handle f64 or half of a v2f64.
1281      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1282                                      InFlag);
1283      Chain = Lo.getValue(1);
1284      InFlag = Lo.getValue(2);
1285      VA = RVLocs[++i]; // skip ahead to next loc
1286      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1287                                      InFlag);
1288      Chain = Hi.getValue(1);
1289      InFlag = Hi.getValue(2);
1290      if (!Subtarget->isLittle())
1291        std::swap (Lo, Hi);
1292      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1293
1294      if (VA.getLocVT() == MVT::v2f64) {
1295        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1296        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1297                          DAG.getConstant(0, MVT::i32));
1298
1299        VA = RVLocs[++i]; // skip ahead to next loc
1300        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1301        Chain = Lo.getValue(1);
1302        InFlag = Lo.getValue(2);
1303        VA = RVLocs[++i]; // skip ahead to next loc
1304        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1305        Chain = Hi.getValue(1);
1306        InFlag = Hi.getValue(2);
1307        if (!Subtarget->isLittle())
1308          std::swap (Lo, Hi);
1309        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1310        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1311                          DAG.getConstant(1, MVT::i32));
1312      }
1313    } else {
1314      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1315                               InFlag);
1316      Chain = Val.getValue(1);
1317      InFlag = Val.getValue(2);
1318    }
1319
1320    switch (VA.getLocInfo()) {
1321    default: llvm_unreachable("Unknown loc info!");
1322    case CCValAssign::Full: break;
1323    case CCValAssign::BCvt:
1324      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1325      break;
1326    }
1327
1328    InVals.push_back(Val);
1329  }
1330
1331  return Chain;
1332}
1333
1334/// LowerMemOpCallTo - Store the argument to the stack.
1335SDValue
1336ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1337                                    SDValue StackPtr, SDValue Arg,
1338                                    SDLoc dl, SelectionDAG &DAG,
1339                                    const CCValAssign &VA,
1340                                    ISD::ArgFlagsTy Flags) const {
1341  unsigned LocMemOffset = VA.getLocMemOffset();
1342  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1343  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1344  return DAG.getStore(Chain, dl, Arg, PtrOff,
1345                      MachinePointerInfo::getStack(LocMemOffset),
1346                      false, false, 0);
1347}
1348
1349void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
1350                                         SDValue Chain, SDValue &Arg,
1351                                         RegsToPassVector &RegsToPass,
1352                                         CCValAssign &VA, CCValAssign &NextVA,
1353                                         SDValue &StackPtr,
1354                                         SmallVectorImpl<SDValue> &MemOpChains,
1355                                         ISD::ArgFlagsTy Flags) const {
1356
1357  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1358                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1359  unsigned id = Subtarget->isLittle() ? 0 : 1;
1360  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1361
1362  if (NextVA.isRegLoc())
1363    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1364  else {
1365    assert(NextVA.isMemLoc());
1366    if (!StackPtr.getNode())
1367      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1368
1369    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1370                                           dl, DAG, NextVA,
1371                                           Flags));
1372  }
1373}
1374
1375/// LowerCall - Lowering a call into a callseq_start <-
1376/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1377/// nodes.
1378SDValue
1379ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1380                             SmallVectorImpl<SDValue> &InVals) const {
1381  SelectionDAG &DAG                     = CLI.DAG;
1382  SDLoc &dl                          = CLI.DL;
1383  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1384  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
1385  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
1386  SDValue Chain                         = CLI.Chain;
1387  SDValue Callee                        = CLI.Callee;
1388  bool &isTailCall                      = CLI.IsTailCall;
1389  CallingConv::ID CallConv              = CLI.CallConv;
1390  bool doesNotRet                       = CLI.DoesNotReturn;
1391  bool isVarArg                         = CLI.IsVarArg;
1392
1393  MachineFunction &MF = DAG.getMachineFunction();
1394  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1395  bool isThisReturn   = false;
1396  bool isSibCall      = false;
1397
1398  // Disable tail calls if they're not supported.
1399  if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
1400    isTailCall = false;
1401
1402  if (isTailCall) {
1403    // Check if it's really possible to do a tail call.
1404    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1405                    isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1406                                                   Outs, OutVals, Ins, DAG);
1407    if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1408      report_fatal_error("failed to perform tail call elimination on a call "
1409                         "site marked musttail");
1410    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1411    // detected sibcalls.
1412    if (isTailCall) {
1413      ++NumTailCalls;
1414      isSibCall = true;
1415    }
1416  }
1417
1418  // Analyze operands of the call, assigning locations to each operand.
1419  SmallVector<CCValAssign, 16> ArgLocs;
1420  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1421                 getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1422  CCInfo.AnalyzeCallOperands(Outs,
1423                             CCAssignFnForNode(CallConv, /* Return*/ false,
1424                                               isVarArg));
1425
1426  // Get a count of how many bytes are to be pushed on the stack.
1427  unsigned NumBytes = CCInfo.getNextStackOffset();
1428
1429  // For tail calls, memory operands are available in our caller's stack.
1430  if (isSibCall)
1431    NumBytes = 0;
1432
1433  // Adjust the stack pointer for the new arguments...
1434  // These operations are automatically eliminated by the prolog/epilog pass
1435  if (!isSibCall)
1436    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
1437                                 dl);
1438
1439  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1440
1441  RegsToPassVector RegsToPass;
1442  SmallVector<SDValue, 8> MemOpChains;
1443
1444  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1445  // of tail call optimization, arguments are handled later.
1446  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1447       i != e;
1448       ++i, ++realArgIdx) {
1449    CCValAssign &VA = ArgLocs[i];
1450    SDValue Arg = OutVals[realArgIdx];
1451    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1452    bool isByVal = Flags.isByVal();
1453
1454    // Promote the value if needed.
1455    switch (VA.getLocInfo()) {
1456    default: llvm_unreachable("Unknown loc info!");
1457    case CCValAssign::Full: break;
1458    case CCValAssign::SExt:
1459      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1460      break;
1461    case CCValAssign::ZExt:
1462      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1463      break;
1464    case CCValAssign::AExt:
1465      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1466      break;
1467    case CCValAssign::BCvt:
1468      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1469      break;
1470    }
1471
1472    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1473    if (VA.needsCustom()) {
1474      if (VA.getLocVT() == MVT::v2f64) {
1475        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1476                                  DAG.getConstant(0, MVT::i32));
1477        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1478                                  DAG.getConstant(1, MVT::i32));
1479
1480        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1481                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1482
1483        VA = ArgLocs[++i]; // skip ahead to next loc
1484        if (VA.isRegLoc()) {
1485          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1486                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1487        } else {
1488          assert(VA.isMemLoc());
1489
1490          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1491                                                 dl, DAG, VA, Flags));
1492        }
1493      } else {
1494        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1495                         StackPtr, MemOpChains, Flags);
1496      }
1497    } else if (VA.isRegLoc()) {
1498      if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
1499        assert(VA.getLocVT() == MVT::i32 &&
1500               "unexpected calling convention register assignment");
1501        assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1502               "unexpected use of 'returned'");
1503        isThisReturn = true;
1504      }
1505      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1506    } else if (isByVal) {
1507      assert(VA.isMemLoc());
1508      unsigned offset = 0;
1509
1510      // True if this byval aggregate will be split between registers
1511      // and memory.
1512      unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1513      unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
1514
1515      if (CurByValIdx < ByValArgsCount) {
1516
1517        unsigned RegBegin, RegEnd;
1518        CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1519
1520        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1521        unsigned int i, j;
1522        for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1523          SDValue Const = DAG.getConstant(4*i, MVT::i32);
1524          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1525          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1526                                     MachinePointerInfo(),
1527                                     false, false, false,
1528                                     DAG.InferPtrAlignment(AddArg));
1529          MemOpChains.push_back(Load.getValue(1));
1530          RegsToPass.push_back(std::make_pair(j, Load));
1531        }
1532
1533        // If parameter size outsides register area, "offset" value
1534        // helps us to calculate stack slot for remained part properly.
1535        offset = RegEnd - RegBegin;
1536
1537        CCInfo.nextInRegsParam();
1538      }
1539
1540      if (Flags.getByValSize() > 4*offset) {
1541        unsigned LocMemOffset = VA.getLocMemOffset();
1542        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
1543        SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
1544                                  StkPtrOff);
1545        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
1546        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
1547        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
1548                                           MVT::i32);
1549        SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
1550
1551        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1552        SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1553        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1554                                          Ops));
1555      }
1556    } else if (!isSibCall) {
1557      assert(VA.isMemLoc());
1558
1559      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1560                                             dl, DAG, VA, Flags));
1561    }
1562  }
1563
1564  if (!MemOpChains.empty())
1565    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1566
1567  // Build a sequence of copy-to-reg nodes chained together with token chain
1568  // and flag operands which copy the outgoing args into the appropriate regs.
1569  SDValue InFlag;
1570  // Tail call byval lowering might overwrite argument registers so in case of
1571  // tail call optimization the copies to registers are lowered later.
1572  if (!isTailCall)
1573    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1574      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1575                               RegsToPass[i].second, InFlag);
1576      InFlag = Chain.getValue(1);
1577    }
1578
1579  // For tail calls lower the arguments to the 'real' stack slot.
1580  if (isTailCall) {
1581    // Force all the incoming stack arguments to be loaded from the stack
1582    // before any new outgoing arguments are stored to the stack, because the
1583    // outgoing stack slots may alias the incoming argument stack slots, and
1584    // the alias isn't otherwise explicit. This is slightly more conservative
1585    // than necessary, because it means that each store effectively depends
1586    // on every argument instead of just those arguments it would clobber.
1587
1588    // Do not flag preceding copytoreg stuff together with the following stuff.
1589    InFlag = SDValue();
1590    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1591      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1592                               RegsToPass[i].second, InFlag);
1593      InFlag = Chain.getValue(1);
1594    }
1595    InFlag = SDValue();
1596  }
1597
1598  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1599  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1600  // node so that legalize doesn't hack it.
1601  bool isDirect = false;
1602  bool isARMFunc = false;
1603  bool isLocalARMFunc = false;
1604  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1605
1606  if (EnableARMLongCalls) {
1607    assert((Subtarget->isTargetWindows() ||
1608            getTargetMachine().getRelocationModel() == Reloc::Static) &&
1609           "long-calls with non-static relocation model!");
1610    // Handle a global address or an external symbol. If it's not one of
1611    // those, the target's already in a register, so we don't need to do
1612    // anything extra.
1613    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1614      const GlobalValue *GV = G->getGlobal();
1615      // Create a constant pool entry for the callee address
1616      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1617      ARMConstantPoolValue *CPV =
1618        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1619
1620      // Get the address of the callee into a register
1621      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1622      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1623      Callee = DAG.getLoad(getPointerTy(), dl,
1624                           DAG.getEntryNode(), CPAddr,
1625                           MachinePointerInfo::getConstantPool(),
1626                           false, false, false, 0);
1627    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1628      const char *Sym = S->getSymbol();
1629
1630      // Create a constant pool entry for the callee address
1631      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1632      ARMConstantPoolValue *CPV =
1633        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1634                                      ARMPCLabelIndex, 0);
1635      // Get the address of the callee into a register
1636      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1637      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1638      Callee = DAG.getLoad(getPointerTy(), dl,
1639                           DAG.getEntryNode(), CPAddr,
1640                           MachinePointerInfo::getConstantPool(),
1641                           false, false, false, 0);
1642    }
1643  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1644    const GlobalValue *GV = G->getGlobal();
1645    isDirect = true;
1646    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1647    bool isStub = (isExt && Subtarget->isTargetMachO()) &&
1648                   getTargetMachine().getRelocationModel() != Reloc::Static;
1649    isARMFunc = !Subtarget->isThumb() || isStub;
1650    // ARM call to a local ARM function is predicable.
1651    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1652    // tBX takes a register source operand.
1653    if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1654      assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
1655      Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
1656                           DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
1657    } else if (Subtarget->isTargetCOFF()) {
1658      assert(Subtarget->isTargetWindows() &&
1659             "Windows is the only supported COFF target");
1660      unsigned TargetFlags = GV->hasDLLImportStorageClass()
1661                                 ? ARMII::MO_DLLIMPORT
1662                                 : ARMII::MO_NO_FLAG;
1663      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
1664                                          TargetFlags);
1665      if (GV->hasDLLImportStorageClass())
1666        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
1667                             DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
1668                                         Callee), MachinePointerInfo::getGOT(),
1669                             false, false, false, 0);
1670    } else {
1671      // On ELF targets for PIC code, direct calls should go through the PLT
1672      unsigned OpFlags = 0;
1673      if (Subtarget->isTargetELF() &&
1674          getTargetMachine().getRelocationModel() == Reloc::PIC_)
1675        OpFlags = ARMII::MO_PLT;
1676      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1677    }
1678  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1679    isDirect = true;
1680    bool isStub = Subtarget->isTargetMachO() &&
1681                  getTargetMachine().getRelocationModel() != Reloc::Static;
1682    isARMFunc = !Subtarget->isThumb() || isStub;
1683    // tBX takes a register source operand.
1684    const char *Sym = S->getSymbol();
1685    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1686      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1687      ARMConstantPoolValue *CPV =
1688        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1689                                      ARMPCLabelIndex, 4);
1690      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1691      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1692      Callee = DAG.getLoad(getPointerTy(), dl,
1693                           DAG.getEntryNode(), CPAddr,
1694                           MachinePointerInfo::getConstantPool(),
1695                           false, false, false, 0);
1696      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1697      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1698                           getPointerTy(), Callee, PICLabel);
1699    } else {
1700      unsigned OpFlags = 0;
1701      // On ELF targets for PIC code, direct calls should go through the PLT
1702      if (Subtarget->isTargetELF() &&
1703                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1704        OpFlags = ARMII::MO_PLT;
1705      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1706    }
1707  }
1708
1709  // FIXME: handle tail calls differently.
1710  unsigned CallOpc;
1711  bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
1712      AttributeSet::FunctionIndex, Attribute::MinSize);
1713  if (Subtarget->isThumb()) {
1714    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1715      CallOpc = ARMISD::CALL_NOLINK;
1716    else
1717      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1718  } else {
1719    if (!isDirect && !Subtarget->hasV5TOps())
1720      CallOpc = ARMISD::CALL_NOLINK;
1721    else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
1722               // Emit regular call when code size is the priority
1723               !HasMinSizeAttr)
1724      // "mov lr, pc; b _foo" to avoid confusing the RSP
1725      CallOpc = ARMISD::CALL_NOLINK;
1726    else
1727      CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
1728  }
1729
1730  std::vector<SDValue> Ops;
1731  Ops.push_back(Chain);
1732  Ops.push_back(Callee);
1733
1734  // Add argument registers to the end of the list so that they are known live
1735  // into the call.
1736  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1737    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1738                                  RegsToPass[i].second.getValueType()));
1739
1740  // Add a register mask operand representing the call-preserved registers.
1741  if (!isTailCall) {
1742    const uint32_t *Mask;
1743    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
1744    const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
1745    if (isThisReturn) {
1746      // For 'this' returns, use the R0-preserving mask if applicable
1747      Mask = ARI->getThisReturnPreservedMask(CallConv);
1748      if (!Mask) {
1749        // Set isThisReturn to false if the calling convention is not one that
1750        // allows 'returned' to be modeled in this way, so LowerCallResult does
1751        // not try to pass 'this' straight through
1752        isThisReturn = false;
1753        Mask = ARI->getCallPreservedMask(CallConv);
1754      }
1755    } else
1756      Mask = ARI->getCallPreservedMask(CallConv);
1757
1758    assert(Mask && "Missing call preserved mask for calling convention");
1759    Ops.push_back(DAG.getRegisterMask(Mask));
1760  }
1761
1762  if (InFlag.getNode())
1763    Ops.push_back(InFlag);
1764
1765  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1766  if (isTailCall)
1767    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
1768
1769  // Returns a chain and a flag for retval copy to use.
1770  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
1771  InFlag = Chain.getValue(1);
1772
1773  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1774                             DAG.getIntPtrConstant(0, true), InFlag, dl);
1775  if (!Ins.empty())
1776    InFlag = Chain.getValue(1);
1777
1778  // Handle result values, copying them out of physregs into vregs that we
1779  // return.
1780  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1781                         InVals, isThisReturn,
1782                         isThisReturn ? OutVals[0] : SDValue());
1783}
1784
1785/// HandleByVal - Every parameter *after* a byval parameter is passed
1786/// on the stack.  Remember the next parameter register to allocate,
1787/// and then confiscate the rest of the parameter registers to insure
1788/// this.
1789void
1790ARMTargetLowering::HandleByVal(
1791    CCState *State, unsigned &size, unsigned Align) const {
1792  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
1793  assert((State->getCallOrPrologue() == Prologue ||
1794          State->getCallOrPrologue() == Call) &&
1795         "unhandled ParmContext");
1796
1797  if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
1798    if (Subtarget->isAAPCS_ABI() && Align > 4) {
1799      unsigned AlignInRegs = Align / 4;
1800      unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
1801      for (unsigned i = 0; i < Waste; ++i)
1802        reg = State->AllocateReg(GPRArgRegs, 4);
1803    }
1804    if (reg != 0) {
1805      unsigned excess = 4 * (ARM::R4 - reg);
1806
1807      // Special case when NSAA != SP and parameter size greater than size of
1808      // all remained GPR regs. In that case we can't split parameter, we must
1809      // send it to stack. We also must set NCRN to R4, so waste all
1810      // remained registers.
1811      const unsigned NSAAOffset = State->getNextStackOffset();
1812      if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
1813        while (State->AllocateReg(GPRArgRegs, 4))
1814          ;
1815        return;
1816      }
1817
1818      // First register for byval parameter is the first register that wasn't
1819      // allocated before this method call, so it would be "reg".
1820      // If parameter is small enough to be saved in range [reg, r4), then
1821      // the end (first after last) register would be reg + param-size-in-regs,
1822      // else parameter would be splitted between registers and stack,
1823      // end register would be r4 in this case.
1824      unsigned ByValRegBegin = reg;
1825      unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
1826      State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
1827      // Note, first register is allocated in the beginning of function already,
1828      // allocate remained amount of registers we need.
1829      for (unsigned i = reg+1; i != ByValRegEnd; ++i)
1830        State->AllocateReg(GPRArgRegs, 4);
1831      // A byval parameter that is split between registers and memory needs its
1832      // size truncated here.
1833      // In the case where the entire structure fits in registers, we set the
1834      // size in memory to zero.
1835      if (size < excess)
1836        size = 0;
1837      else
1838        size -= excess;
1839    }
1840  }
1841}
1842
1843/// MatchingStackOffset - Return true if the given stack call argument is
1844/// already available in the same position (relatively) of the caller's
1845/// incoming argument stack.
1846static
1847bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1848                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1849                         const TargetInstrInfo *TII) {
1850  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1851  int FI = INT_MAX;
1852  if (Arg.getOpcode() == ISD::CopyFromReg) {
1853    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1854    if (!TargetRegisterInfo::isVirtualRegister(VR))
1855      return false;
1856    MachineInstr *Def = MRI->getVRegDef(VR);
1857    if (!Def)
1858      return false;
1859    if (!Flags.isByVal()) {
1860      if (!TII->isLoadFromStackSlot(Def, FI))
1861        return false;
1862    } else {
1863      return false;
1864    }
1865  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1866    if (Flags.isByVal())
1867      // ByVal argument is passed in as a pointer but it's now being
1868      // dereferenced. e.g.
1869      // define @foo(%struct.X* %A) {
1870      //   tail call @bar(%struct.X* byval %A)
1871      // }
1872      return false;
1873    SDValue Ptr = Ld->getBasePtr();
1874    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1875    if (!FINode)
1876      return false;
1877    FI = FINode->getIndex();
1878  } else
1879    return false;
1880
1881  assert(FI != INT_MAX);
1882  if (!MFI->isFixedObjectIndex(FI))
1883    return false;
1884  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1885}
1886
1887/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1888/// for tail call optimization. Targets which want to do tail call
1889/// optimization should implement this function.
1890bool
1891ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1892                                                     CallingConv::ID CalleeCC,
1893                                                     bool isVarArg,
1894                                                     bool isCalleeStructRet,
1895                                                     bool isCallerStructRet,
1896                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1897                                    const SmallVectorImpl<SDValue> &OutVals,
1898                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1899                                                     SelectionDAG& DAG) const {
1900  const Function *CallerF = DAG.getMachineFunction().getFunction();
1901  CallingConv::ID CallerCC = CallerF->getCallingConv();
1902  bool CCMatch = CallerCC == CalleeCC;
1903
1904  // Look for obvious safe cases to perform tail call optimization that do not
1905  // require ABI changes. This is what gcc calls sibcall.
1906
1907  // Do not sibcall optimize vararg calls unless the call site is not passing
1908  // any arguments.
1909  if (isVarArg && !Outs.empty())
1910    return false;
1911
1912  // Exception-handling functions need a special set of instructions to indicate
1913  // a return to the hardware. Tail-calling another function would probably
1914  // break this.
1915  if (CallerF->hasFnAttribute("interrupt"))
1916    return false;
1917
1918  // Also avoid sibcall optimization if either caller or callee uses struct
1919  // return semantics.
1920  if (isCalleeStructRet || isCallerStructRet)
1921    return false;
1922
1923  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1924  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
1925  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
1926  // support in the assembler and linker to be used. This would need to be
1927  // fixed to fully support tail calls in Thumb1.
1928  //
1929  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1930  // LR.  This means if we need to reload LR, it takes an extra instructions,
1931  // which outweighs the value of the tail call; but here we don't know yet
1932  // whether LR is going to be used.  Probably the right approach is to
1933  // generate the tail call here and turn it back into CALL/RET in
1934  // emitEpilogue if LR is used.
1935
1936  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1937  // but we need to make sure there are enough registers; the only valid
1938  // registers are the 4 used for parameters.  We don't currently do this
1939  // case.
1940  if (Subtarget->isThumb1Only())
1941    return false;
1942
1943  // If the calling conventions do not match, then we'd better make sure the
1944  // results are returned in the same way as what the caller expects.
1945  if (!CCMatch) {
1946    SmallVector<CCValAssign, 16> RVLocs1;
1947    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
1948                       getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
1949    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1950
1951    SmallVector<CCValAssign, 16> RVLocs2;
1952    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
1953                       getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
1954    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1955
1956    if (RVLocs1.size() != RVLocs2.size())
1957      return false;
1958    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1959      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1960        return false;
1961      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1962        return false;
1963      if (RVLocs1[i].isRegLoc()) {
1964        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1965          return false;
1966      } else {
1967        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1968          return false;
1969      }
1970    }
1971  }
1972
1973  // If Caller's vararg or byval argument has been split between registers and
1974  // stack, do not perform tail call, since part of the argument is in caller's
1975  // local frame.
1976  const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
1977                                      getInfo<ARMFunctionInfo>();
1978  if (AFI_Caller->getArgRegsSaveSize())
1979    return false;
1980
1981  // If the callee takes no arguments then go on to check the results of the
1982  // call.
1983  if (!Outs.empty()) {
1984    // Check if stack adjustment is needed. For now, do not do this if any
1985    // argument is passed on the stack.
1986    SmallVector<CCValAssign, 16> ArgLocs;
1987    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
1988                      getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1989    CCInfo.AnalyzeCallOperands(Outs,
1990                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1991    if (CCInfo.getNextStackOffset()) {
1992      MachineFunction &MF = DAG.getMachineFunction();
1993
1994      // Check if the arguments are already laid out in the right way as
1995      // the caller's fixed stack objects.
1996      MachineFrameInfo *MFI = MF.getFrameInfo();
1997      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1998      const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
1999      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2000           i != e;
2001           ++i, ++realArgIdx) {
2002        CCValAssign &VA = ArgLocs[i];
2003        EVT RegVT = VA.getLocVT();
2004        SDValue Arg = OutVals[realArgIdx];
2005        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2006        if (VA.getLocInfo() == CCValAssign::Indirect)
2007          return false;
2008        if (VA.needsCustom()) {
2009          // f64 and vector types are split into multiple registers or
2010          // register/stack-slot combinations.  The types will not match
2011          // the registers; give up on memory f64 refs until we figure
2012          // out what to do about this.
2013          if (!VA.isRegLoc())
2014            return false;
2015          if (!ArgLocs[++i].isRegLoc())
2016            return false;
2017          if (RegVT == MVT::v2f64) {
2018            if (!ArgLocs[++i].isRegLoc())
2019              return false;
2020            if (!ArgLocs[++i].isRegLoc())
2021              return false;
2022          }
2023        } else if (!VA.isRegLoc()) {
2024          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2025                                   MFI, MRI, TII))
2026            return false;
2027        }
2028      }
2029    }
2030  }
2031
2032  return true;
2033}
2034
2035bool
2036ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2037                                  MachineFunction &MF, bool isVarArg,
2038                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2039                                  LLVMContext &Context) const {
2040  SmallVector<CCValAssign, 16> RVLocs;
2041  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
2042  return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
2043                                                    isVarArg));
2044}
2045
2046static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2047                                    SDLoc DL, SelectionDAG &DAG) {
2048  const MachineFunction &MF = DAG.getMachineFunction();
2049  const Function *F = MF.getFunction();
2050
2051  StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2052
2053  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2054  // version of the "preferred return address". These offsets affect the return
2055  // instruction if this is a return from PL1 without hypervisor extensions.
2056  //    IRQ/FIQ: +4     "subs pc, lr, #4"
2057  //    SWI:     0      "subs pc, lr, #0"
2058  //    ABORT:   +4     "subs pc, lr, #4"
2059  //    UNDEF:   +4/+2  "subs pc, lr, #0"
2060  // UNDEF varies depending on where the exception came from ARM or Thumb
2061  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2062
2063  int64_t LROffset;
2064  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2065      IntKind == "ABORT")
2066    LROffset = 4;
2067  else if (IntKind == "SWI" || IntKind == "UNDEF")
2068    LROffset = 0;
2069  else
2070    report_fatal_error("Unsupported interrupt attribute. If present, value "
2071                       "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2072
2073  RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
2074
2075  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2076}
2077
2078SDValue
2079ARMTargetLowering::LowerReturn(SDValue Chain,
2080                               CallingConv::ID CallConv, bool isVarArg,
2081                               const SmallVectorImpl<ISD::OutputArg> &Outs,
2082                               const SmallVectorImpl<SDValue> &OutVals,
2083                               SDLoc dl, SelectionDAG &DAG) const {
2084
2085  // CCValAssign - represent the assignment of the return value to a location.
2086  SmallVector<CCValAssign, 16> RVLocs;
2087
2088  // CCState - Info about the registers and stack slots.
2089  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2090                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
2091
2092  // Analyze outgoing return values.
2093  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
2094                                               isVarArg));
2095
2096  SDValue Flag;
2097  SmallVector<SDValue, 4> RetOps;
2098  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2099  bool isLittleEndian = Subtarget->isLittle();
2100
2101  // Copy the result values into the output registers.
2102  for (unsigned i = 0, realRVLocIdx = 0;
2103       i != RVLocs.size();
2104       ++i, ++realRVLocIdx) {
2105    CCValAssign &VA = RVLocs[i];
2106    assert(VA.isRegLoc() && "Can only return in registers!");
2107
2108    SDValue Arg = OutVals[realRVLocIdx];
2109
2110    switch (VA.getLocInfo()) {
2111    default: llvm_unreachable("Unknown loc info!");
2112    case CCValAssign::Full: break;
2113    case CCValAssign::BCvt:
2114      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2115      break;
2116    }
2117
2118    if (VA.needsCustom()) {
2119      if (VA.getLocVT() == MVT::v2f64) {
2120        // Extract the first half and return it in two registers.
2121        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2122                                   DAG.getConstant(0, MVT::i32));
2123        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2124                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
2125
2126        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2127                                 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2128                                 Flag);
2129        Flag = Chain.getValue(1);
2130        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2131        VA = RVLocs[++i]; // skip ahead to next loc
2132        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2133                                 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2134                                 Flag);
2135        Flag = Chain.getValue(1);
2136        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2137        VA = RVLocs[++i]; // skip ahead to next loc
2138
2139        // Extract the 2nd half and fall through to handle it as an f64 value.
2140        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2141                          DAG.getConstant(1, MVT::i32));
2142      }
2143      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
2144      // available.
2145      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2146                                  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2147      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2148                               fmrrd.getValue(isLittleEndian ? 0 : 1),
2149                               Flag);
2150      Flag = Chain.getValue(1);
2151      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2152      VA = RVLocs[++i]; // skip ahead to next loc
2153      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2154                               fmrrd.getValue(isLittleEndian ? 1 : 0),
2155                               Flag);
2156    } else
2157      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2158
2159    // Guarantee that all emitted copies are
2160    // stuck together, avoiding something bad.
2161    Flag = Chain.getValue(1);
2162    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2163  }
2164
2165  // Update chain and glue.
2166  RetOps[0] = Chain;
2167  if (Flag.getNode())
2168    RetOps.push_back(Flag);
2169
2170  // CPUs which aren't M-class use a special sequence to return from
2171  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2172  // though we use "subs pc, lr, #N").
2173  //
2174  // M-class CPUs actually use a normal return sequence with a special
2175  // (hardware-provided) value in LR, so the normal code path works.
2176  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2177      !Subtarget->isMClass()) {
2178    if (Subtarget->isThumb1Only())
2179      report_fatal_error("interrupt attribute is not supported in Thumb1");
2180    return LowerInterruptReturn(RetOps, dl, DAG);
2181  }
2182
2183  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2184}
2185
2186bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2187  if (N->getNumValues() != 1)
2188    return false;
2189  if (!N->hasNUsesOfValue(1, 0))
2190    return false;
2191
2192  SDValue TCChain = Chain;
2193  SDNode *Copy = *N->use_begin();
2194  if (Copy->getOpcode() == ISD::CopyToReg) {
2195    // If the copy has a glue operand, we conservatively assume it isn't safe to
2196    // perform a tail call.
2197    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2198      return false;
2199    TCChain = Copy->getOperand(0);
2200  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2201    SDNode *VMov = Copy;
2202    // f64 returned in a pair of GPRs.
2203    SmallPtrSet<SDNode*, 2> Copies;
2204    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2205         UI != UE; ++UI) {
2206      if (UI->getOpcode() != ISD::CopyToReg)
2207        return false;
2208      Copies.insert(*UI);
2209    }
2210    if (Copies.size() > 2)
2211      return false;
2212
2213    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2214         UI != UE; ++UI) {
2215      SDValue UseChain = UI->getOperand(0);
2216      if (Copies.count(UseChain.getNode()))
2217        // Second CopyToReg
2218        Copy = *UI;
2219      else
2220        // First CopyToReg
2221        TCChain = UseChain;
2222    }
2223  } else if (Copy->getOpcode() == ISD::BITCAST) {
2224    // f32 returned in a single GPR.
2225    if (!Copy->hasOneUse())
2226      return false;
2227    Copy = *Copy->use_begin();
2228    if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2229      return false;
2230    TCChain = Copy->getOperand(0);
2231  } else {
2232    return false;
2233  }
2234
2235  bool HasRet = false;
2236  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2237       UI != UE; ++UI) {
2238    if (UI->getOpcode() != ARMISD::RET_FLAG &&
2239        UI->getOpcode() != ARMISD::INTRET_FLAG)
2240      return false;
2241    HasRet = true;
2242  }
2243
2244  if (!HasRet)
2245    return false;
2246
2247  Chain = TCChain;
2248  return true;
2249}
2250
2251bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2252  if (!Subtarget->supportsTailCall())
2253    return false;
2254
2255  if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
2256    return false;
2257
2258  return !Subtarget->isThumb1Only();
2259}
2260
2261// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2262// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2263// one of the above mentioned nodes. It has to be wrapped because otherwise
2264// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2265// be used to form addressing mode. These wrapped nodes will be selected
2266// into MOVi.
2267static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2268  EVT PtrVT = Op.getValueType();
2269  // FIXME there is no actual debug info here
2270  SDLoc dl(Op);
2271  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2272  SDValue Res;
2273  if (CP->isMachineConstantPoolEntry())
2274    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2275                                    CP->getAlignment());
2276  else
2277    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2278                                    CP->getAlignment());
2279  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2280}
2281
2282unsigned ARMTargetLowering::getJumpTableEncoding() const {
2283  return MachineJumpTableInfo::EK_Inline;
2284}
2285
2286SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2287                                             SelectionDAG &DAG) const {
2288  MachineFunction &MF = DAG.getMachineFunction();
2289  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2290  unsigned ARMPCLabelIndex = 0;
2291  SDLoc DL(Op);
2292  EVT PtrVT = getPointerTy();
2293  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2294  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2295  SDValue CPAddr;
2296  if (RelocM == Reloc::Static) {
2297    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2298  } else {
2299    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2300    ARMPCLabelIndex = AFI->createPICLabelUId();
2301    ARMConstantPoolValue *CPV =
2302      ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2303                                      ARMCP::CPBlockAddress, PCAdj);
2304    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2305  }
2306  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2307  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
2308                               MachinePointerInfo::getConstantPool(),
2309                               false, false, false, 0);
2310  if (RelocM == Reloc::Static)
2311    return Result;
2312  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2313  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2314}
2315
2316// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2317SDValue
2318ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2319                                                 SelectionDAG &DAG) const {
2320  SDLoc dl(GA);
2321  EVT PtrVT = getPointerTy();
2322  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2323  MachineFunction &MF = DAG.getMachineFunction();
2324  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2325  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2326  ARMConstantPoolValue *CPV =
2327    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2328                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2329  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2330  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2331  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
2332                         MachinePointerInfo::getConstantPool(),
2333                         false, false, false, 0);
2334  SDValue Chain = Argument.getValue(1);
2335
2336  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2337  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2338
2339  // call __tls_get_addr.
2340  ArgListTy Args;
2341  ArgListEntry Entry;
2342  Entry.Node = Argument;
2343  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2344  Args.push_back(Entry);
2345
2346  // FIXME: is there useful debug info available here?
2347  TargetLowering::CallLoweringInfo CLI(DAG);
2348  CLI.setDebugLoc(dl).setChain(Chain)
2349    .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2350               DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
2351               0);
2352
2353  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2354  return CallResult.first;
2355}
2356
2357// Lower ISD::GlobalTLSAddress using the "initial exec" or
2358// "local exec" model.
2359SDValue
2360ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2361                                        SelectionDAG &DAG,
2362                                        TLSModel::Model model) const {
2363  const GlobalValue *GV = GA->getGlobal();
2364  SDLoc dl(GA);
2365  SDValue Offset;
2366  SDValue Chain = DAG.getEntryNode();
2367  EVT PtrVT = getPointerTy();
2368  // Get the Thread Pointer
2369  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2370
2371  if (model == TLSModel::InitialExec) {
2372    MachineFunction &MF = DAG.getMachineFunction();
2373    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2374    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2375    // Initial exec model.
2376    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2377    ARMConstantPoolValue *CPV =
2378      ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2379                                      ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2380                                      true);
2381    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2382    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2383    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2384                         MachinePointerInfo::getConstantPool(),
2385                         false, false, false, 0);
2386    Chain = Offset.getValue(1);
2387
2388    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2389    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2390
2391    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2392                         MachinePointerInfo::getConstantPool(),
2393                         false, false, false, 0);
2394  } else {
2395    // local exec model
2396    assert(model == TLSModel::LocalExec);
2397    ARMConstantPoolValue *CPV =
2398      ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2399    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2400    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2401    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2402                         MachinePointerInfo::getConstantPool(),
2403                         false, false, false, 0);
2404  }
2405
2406  // The address of the thread local variable is the add of the thread
2407  // pointer with the offset of the variable.
2408  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2409}
2410
2411SDValue
2412ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2413  // TODO: implement the "local dynamic" model
2414  assert(Subtarget->isTargetELF() &&
2415         "TLS not implemented for non-ELF targets");
2416  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2417
2418  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2419
2420  switch (model) {
2421    case TLSModel::GeneralDynamic:
2422    case TLSModel::LocalDynamic:
2423      return LowerToTLSGeneralDynamicModel(GA, DAG);
2424    case TLSModel::InitialExec:
2425    case TLSModel::LocalExec:
2426      return LowerToTLSExecModels(GA, DAG, model);
2427  }
2428  llvm_unreachable("bogus TLS model");
2429}
2430
2431SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
2432                                                 SelectionDAG &DAG) const {
2433  EVT PtrVT = getPointerTy();
2434  SDLoc dl(Op);
2435  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2436  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2437    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2438    ARMConstantPoolValue *CPV =
2439      ARMConstantPoolConstant::Create(GV,
2440                                      UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2441    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2442    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2443    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
2444                                 CPAddr,
2445                                 MachinePointerInfo::getConstantPool(),
2446                                 false, false, false, 0);
2447    SDValue Chain = Result.getValue(1);
2448    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2449    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
2450    if (!UseGOTOFF)
2451      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
2452                           MachinePointerInfo::getGOT(),
2453                           false, false, false, 0);
2454    return Result;
2455  }
2456
2457  // If we have T2 ops, we can materialize the address directly via movt/movw
2458  // pair. This is always cheaper.
2459  if (Subtarget->useMovt(DAG.getMachineFunction())) {
2460    ++NumMovwMovt;
2461    // FIXME: Once remat is capable of dealing with instructions with register
2462    // operands, expand this into two nodes.
2463    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2464                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2465  } else {
2466    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2467    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2468    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2469                       MachinePointerInfo::getConstantPool(),
2470                       false, false, false, 0);
2471  }
2472}
2473
2474SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
2475                                                    SelectionDAG &DAG) const {
2476  EVT PtrVT = getPointerTy();
2477  SDLoc dl(Op);
2478  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2479  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2480
2481  if (Subtarget->useMovt(DAG.getMachineFunction()))
2482    ++NumMovwMovt;
2483
2484  // FIXME: Once remat is capable of dealing with instructions with register
2485  // operands, expand this into multiple nodes
2486  unsigned Wrapper =
2487      RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
2488
2489  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
2490  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
2491
2492  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2493    Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
2494                         MachinePointerInfo::getGOT(), false, false, false, 0);
2495  return Result;
2496}
2497
2498SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
2499                                                     SelectionDAG &DAG) const {
2500  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
2501  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
2502         "Windows on ARM expects to use movw/movt");
2503
2504  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2505  const ARMII::TOF TargetFlags =
2506    (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
2507  EVT PtrVT = getPointerTy();
2508  SDValue Result;
2509  SDLoc DL(Op);
2510
2511  ++NumMovwMovt;
2512
2513  // FIXME: Once remat is capable of dealing with instructions with register
2514  // operands, expand this into two nodes.
2515  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
2516                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
2517                                                  TargetFlags));
2518  if (GV->hasDLLImportStorageClass())
2519    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2520                         MachinePointerInfo::getGOT(), false, false, false, 0);
2521  return Result;
2522}
2523
2524SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
2525                                                    SelectionDAG &DAG) const {
2526  assert(Subtarget->isTargetELF() &&
2527         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2528  MachineFunction &MF = DAG.getMachineFunction();
2529  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2530  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2531  EVT PtrVT = getPointerTy();
2532  SDLoc dl(Op);
2533  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2534  ARMConstantPoolValue *CPV =
2535    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
2536                                  ARMPCLabelIndex, PCAdj);
2537  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2538  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2539  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2540                               MachinePointerInfo::getConstantPool(),
2541                               false, false, false, 0);
2542  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2543  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2544}
2545
2546SDValue
2547ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
2548  SDLoc dl(Op);
2549  SDValue Val = DAG.getConstant(0, MVT::i32);
2550  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
2551                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
2552                     Op.getOperand(1), Val);
2553}
2554
2555SDValue
2556ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
2557  SDLoc dl(Op);
2558  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
2559                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
2560}
2561
2562SDValue
2563ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2564                                          const ARMSubtarget *Subtarget) const {
2565  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2566  SDLoc dl(Op);
2567  switch (IntNo) {
2568  default: return SDValue();    // Don't custom lower most intrinsics.
2569  case Intrinsic::arm_rbit: {
2570    assert(Op.getOperand(0).getValueType() == MVT::i32 &&
2571           "RBIT intrinsic must have i32 type!");
2572    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
2573  }
2574  case Intrinsic::arm_thread_pointer: {
2575    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2576    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2577  }
2578  case Intrinsic::eh_sjlj_lsda: {
2579    MachineFunction &MF = DAG.getMachineFunction();
2580    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2581    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2582    EVT PtrVT = getPointerTy();
2583    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2584    SDValue CPAddr;
2585    unsigned PCAdj = (RelocM != Reloc::PIC_)
2586      ? 0 : (Subtarget->isThumb() ? 4 : 8);
2587    ARMConstantPoolValue *CPV =
2588      ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
2589                                      ARMCP::CPLSDA, PCAdj);
2590    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2591    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2592    SDValue Result =
2593      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2594                  MachinePointerInfo::getConstantPool(),
2595                  false, false, false, 0);
2596
2597    if (RelocM == Reloc::PIC_) {
2598      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2599      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2600    }
2601    return Result;
2602  }
2603  case Intrinsic::arm_neon_vmulls:
2604  case Intrinsic::arm_neon_vmullu: {
2605    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
2606      ? ARMISD::VMULLs : ARMISD::VMULLu;
2607    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
2608                       Op.getOperand(1), Op.getOperand(2));
2609  }
2610  }
2611}
2612
2613static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
2614                                 const ARMSubtarget *Subtarget) {
2615  // FIXME: handle "fence singlethread" more efficiently.
2616  SDLoc dl(Op);
2617  if (!Subtarget->hasDataBarrier()) {
2618    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2619    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2620    // here.
2621    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2622           "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
2623    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2624                       DAG.getConstant(0, MVT::i32));
2625  }
2626
2627  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
2628  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
2629  unsigned Domain = ARM_MB::ISH;
2630  if (Subtarget->isMClass()) {
2631    // Only a full system barrier exists in the M-class architectures.
2632    Domain = ARM_MB::SY;
2633  } else if (Subtarget->isSwift() && Ord == Release) {
2634    // Swift happens to implement ISHST barriers in a way that's compatible with
2635    // Release semantics but weaker than ISH so we'd be fools not to use
2636    // it. Beware: other processors probably don't!
2637    Domain = ARM_MB::ISHST;
2638  }
2639
2640  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
2641                     DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
2642                     DAG.getConstant(Domain, MVT::i32));
2643}
2644
2645static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
2646                             const ARMSubtarget *Subtarget) {
2647  // ARM pre v5TE and Thumb1 does not have preload instructions.
2648  if (!(Subtarget->isThumb2() ||
2649        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2650    // Just preserve the chain.
2651    return Op.getOperand(0);
2652
2653  SDLoc dl(Op);
2654  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2655  if (!isRead &&
2656      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2657    // ARMv7 with MP extension has PLDW.
2658    return Op.getOperand(0);
2659
2660  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2661  if (Subtarget->isThumb()) {
2662    // Invert the bits.
2663    isRead = ~isRead & 1;
2664    isData = ~isData & 1;
2665  }
2666
2667  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2668                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
2669                     DAG.getConstant(isData, MVT::i32));
2670}
2671
2672static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2673  MachineFunction &MF = DAG.getMachineFunction();
2674  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2675
2676  // vastart just stores the address of the VarArgsFrameIndex slot into the
2677  // memory location argument.
2678  SDLoc dl(Op);
2679  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2680  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2681  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2682  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2683                      MachinePointerInfo(SV), false, false, 0);
2684}
2685
2686SDValue
2687ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2688                                        SDValue &Root, SelectionDAG &DAG,
2689                                        SDLoc dl) const {
2690  MachineFunction &MF = DAG.getMachineFunction();
2691  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2692
2693  const TargetRegisterClass *RC;
2694  if (AFI->isThumb1OnlyFunction())
2695    RC = &ARM::tGPRRegClass;
2696  else
2697    RC = &ARM::GPRRegClass;
2698
2699  // Transform the arguments stored in physical registers into virtual ones.
2700  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2701  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2702
2703  SDValue ArgValue2;
2704  if (NextVA.isMemLoc()) {
2705    MachineFrameInfo *MFI = MF.getFrameInfo();
2706    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2707
2708    // Create load node to retrieve arguments from the stack.
2709    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2710    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2711                            MachinePointerInfo::getFixedStack(FI),
2712                            false, false, false, 0);
2713  } else {
2714    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2715    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2716  }
2717  if (!Subtarget->isLittle())
2718    std::swap (ArgValue, ArgValue2);
2719  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2720}
2721
2722void
2723ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
2724                                  unsigned InRegsParamRecordIdx,
2725                                  unsigned ArgSize,
2726                                  unsigned &ArgRegsSize,
2727                                  unsigned &ArgRegsSaveSize)
2728  const {
2729  unsigned NumGPRs;
2730  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
2731    unsigned RBegin, REnd;
2732    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2733    NumGPRs = REnd - RBegin;
2734  } else {
2735    unsigned int firstUnalloced;
2736    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
2737                                                sizeof(GPRArgRegs) /
2738                                                sizeof(GPRArgRegs[0]));
2739    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
2740  }
2741
2742  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
2743  ArgRegsSize = NumGPRs * 4;
2744
2745  // If parameter is split between stack and GPRs...
2746  if (NumGPRs && Align > 4 &&
2747      (ArgRegsSize < ArgSize ||
2748        InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
2749    // Add padding for part of param recovered from GPRs.  For example,
2750    // if Align == 8, its last byte must be at address K*8 - 1.
2751    // We need to do it, since remained (stack) part of parameter has
2752    // stack alignment, and we need to "attach" "GPRs head" without gaps
2753    // to it:
2754    // Stack:
2755    // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
2756    // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
2757    //
2758    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2759    unsigned Padding =
2760        OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
2761    ArgRegsSaveSize = ArgRegsSize + Padding;
2762  } else
2763    // We don't need to extend regs save size for byval parameters if they
2764    // are passed via GPRs only.
2765    ArgRegsSaveSize = ArgRegsSize;
2766}
2767
2768// The remaining GPRs hold either the beginning of variable-argument
2769// data, or the beginning of an aggregate passed by value (usually
2770// byval).  Either way, we allocate stack slots adjacent to the data
2771// provided by our caller, and store the unallocated registers there.
2772// If this is a variadic function, the va_list pointer will begin with
2773// these values; otherwise, this reassembles a (byval) structure that
2774// was split between registers and memory.
2775// Return: The frame index registers were stored into.
2776int
2777ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
2778                                  SDLoc dl, SDValue &Chain,
2779                                  const Value *OrigArg,
2780                                  unsigned InRegsParamRecordIdx,
2781                                  unsigned OffsetFromOrigArg,
2782                                  unsigned ArgOffset,
2783                                  unsigned ArgSize,
2784                                  bool ForceMutable,
2785                                  unsigned ByValStoreOffset,
2786                                  unsigned TotalArgRegsSaveSize) const {
2787
2788  // Currently, two use-cases possible:
2789  // Case #1. Non-var-args function, and we meet first byval parameter.
2790  //          Setup first unallocated register as first byval register;
2791  //          eat all remained registers
2792  //          (these two actions are performed by HandleByVal method).
2793  //          Then, here, we initialize stack frame with
2794  //          "store-reg" instructions.
2795  // Case #2. Var-args function, that doesn't contain byval parameters.
2796  //          The same: eat all remained unallocated registers,
2797  //          initialize stack frame.
2798
2799  MachineFunction &MF = DAG.getMachineFunction();
2800  MachineFrameInfo *MFI = MF.getFrameInfo();
2801  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2802  unsigned firstRegToSaveIndex, lastRegToSaveIndex;
2803  unsigned RBegin, REnd;
2804  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
2805    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
2806    firstRegToSaveIndex = RBegin - ARM::R0;
2807    lastRegToSaveIndex = REnd - ARM::R0;
2808  } else {
2809    firstRegToSaveIndex = CCInfo.getFirstUnallocated
2810      (GPRArgRegs, array_lengthof(GPRArgRegs));
2811    lastRegToSaveIndex = 4;
2812  }
2813
2814  unsigned ArgRegsSize, ArgRegsSaveSize;
2815  computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
2816                 ArgRegsSize, ArgRegsSaveSize);
2817
2818  // Store any by-val regs to their spots on the stack so that they may be
2819  // loaded by deferencing the result of formal parameter pointer or va_next.
2820  // Note: once stack area for byval/varargs registers
2821  // was initialized, it can't be initialized again.
2822  if (ArgRegsSaveSize) {
2823    unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
2824
2825    if (Padding) {
2826      assert(AFI->getStoredByValParamsPadding() == 0 &&
2827             "The only parameter may be padded.");
2828      AFI->setStoredByValParamsPadding(Padding);
2829    }
2830
2831    int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
2832                                            Padding +
2833                                              ByValStoreOffset -
2834                                              (int64_t)TotalArgRegsSaveSize,
2835                                            false);
2836    SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
2837    if (Padding) {
2838       MFI->CreateFixedObject(Padding,
2839                              ArgOffset + ByValStoreOffset -
2840                                (int64_t)ArgRegsSaveSize,
2841                              false);
2842    }
2843
2844    SmallVector<SDValue, 4> MemOps;
2845    for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
2846         ++firstRegToSaveIndex, ++i) {
2847      const TargetRegisterClass *RC;
2848      if (AFI->isThumb1OnlyFunction())
2849        RC = &ARM::tGPRRegClass;
2850      else
2851        RC = &ARM::GPRRegClass;
2852
2853      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
2854      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2855      SDValue Store =
2856        DAG.getStore(Val.getValue(1), dl, Val, FIN,
2857                     MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
2858                     false, false, 0);
2859      MemOps.push_back(Store);
2860      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2861                        DAG.getConstant(4, getPointerTy()));
2862    }
2863
2864    AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
2865
2866    if (!MemOps.empty())
2867      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2868    return FrameIndex;
2869  } else {
2870    if (ArgSize == 0) {
2871      // We cannot allocate a zero-byte object for the first variadic argument,
2872      // so just make up a size.
2873      ArgSize = 4;
2874    }
2875    // This will point to the next argument passed via stack.
2876    return MFI->CreateFixedObject(
2877      ArgSize, ArgOffset, !ForceMutable);
2878  }
2879}
2880
2881// Setup stack frame, the va_list pointer will start from.
2882void
2883ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
2884                                        SDLoc dl, SDValue &Chain,
2885                                        unsigned ArgOffset,
2886                                        unsigned TotalArgRegsSaveSize,
2887                                        bool ForceMutable) const {
2888  MachineFunction &MF = DAG.getMachineFunction();
2889  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2890
2891  // Try to store any remaining integer argument regs
2892  // to their spots on the stack so that they may be loaded by deferencing
2893  // the result of va_next.
2894  // If there is no regs to be stored, just point address after last
2895  // argument passed via stack.
2896  int FrameIndex =
2897    StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
2898                   CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
2899                   0, TotalArgRegsSaveSize);
2900
2901  AFI->setVarArgsFrameIndex(FrameIndex);
2902}
2903
2904SDValue
2905ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2906                                        CallingConv::ID CallConv, bool isVarArg,
2907                                        const SmallVectorImpl<ISD::InputArg>
2908                                          &Ins,
2909                                        SDLoc dl, SelectionDAG &DAG,
2910                                        SmallVectorImpl<SDValue> &InVals)
2911                                          const {
2912  MachineFunction &MF = DAG.getMachineFunction();
2913  MachineFrameInfo *MFI = MF.getFrameInfo();
2914
2915  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2916
2917  // Assign locations to all of the incoming arguments.
2918  SmallVector<CCValAssign, 16> ArgLocs;
2919  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2920                    getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
2921  CCInfo.AnalyzeFormalArguments(Ins,
2922                                CCAssignFnForNode(CallConv, /* Return*/ false,
2923                                                  isVarArg));
2924
2925  SmallVector<SDValue, 16> ArgValues;
2926  int lastInsIndex = -1;
2927  SDValue ArgValue;
2928  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2929  unsigned CurArgIdx = 0;
2930
2931  // Initially ArgRegsSaveSize is zero.
2932  // Then we increase this value each time we meet byval parameter.
2933  // We also increase this value in case of varargs function.
2934  AFI->setArgRegsSaveSize(0);
2935
2936  unsigned ByValStoreOffset = 0;
2937  unsigned TotalArgRegsSaveSize = 0;
2938  unsigned ArgRegsSaveSizeMaxAlign = 4;
2939
2940  // Calculate the amount of stack space that we need to allocate to store
2941  // byval and variadic arguments that are passed in registers.
2942  // We need to know this before we allocate the first byval or variadic
2943  // argument, as they will be allocated a stack slot below the CFA (Canonical
2944  // Frame Address, the stack pointer at entry to the function).
2945  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2946    CCValAssign &VA = ArgLocs[i];
2947    if (VA.isMemLoc()) {
2948      int index = VA.getValNo();
2949      if (index != lastInsIndex) {
2950        ISD::ArgFlagsTy Flags = Ins[index].Flags;
2951        if (Flags.isByVal()) {
2952          unsigned ExtraArgRegsSize;
2953          unsigned ExtraArgRegsSaveSize;
2954          computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
2955                         Flags.getByValSize(),
2956                         ExtraArgRegsSize, ExtraArgRegsSaveSize);
2957
2958          TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
2959          if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
2960              ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
2961          CCInfo.nextInRegsParam();
2962        }
2963        lastInsIndex = index;
2964      }
2965    }
2966  }
2967  CCInfo.rewindByValRegsInfo();
2968  lastInsIndex = -1;
2969  if (isVarArg) {
2970    unsigned ExtraArgRegsSize;
2971    unsigned ExtraArgRegsSaveSize;
2972    computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
2973                   ExtraArgRegsSize, ExtraArgRegsSaveSize);
2974    TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
2975  }
2976  // If the arg regs save area contains N-byte aligned values, the
2977  // bottom of it must be at least N-byte aligned.
2978  TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
2979  TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
2980
2981  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2982    CCValAssign &VA = ArgLocs[i];
2983    std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
2984    CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
2985    // Arguments stored in registers.
2986    if (VA.isRegLoc()) {
2987      EVT RegVT = VA.getLocVT();
2988
2989      if (VA.needsCustom()) {
2990        // f64 and vector types are split up into multiple registers or
2991        // combinations of registers and stack slots.
2992        if (VA.getLocVT() == MVT::v2f64) {
2993          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2994                                                   Chain, DAG, dl);
2995          VA = ArgLocs[++i]; // skip ahead to next loc
2996          SDValue ArgValue2;
2997          if (VA.isMemLoc()) {
2998            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2999            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
3000            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3001                                    MachinePointerInfo::getFixedStack(FI),
3002                                    false, false, false, 0);
3003          } else {
3004            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3005                                             Chain, DAG, dl);
3006          }
3007          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3008          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3009                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
3010          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3011                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
3012        } else
3013          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3014
3015      } else {
3016        const TargetRegisterClass *RC;
3017
3018        if (RegVT == MVT::f32)
3019          RC = &ARM::SPRRegClass;
3020        else if (RegVT == MVT::f64)
3021          RC = &ARM::DPRRegClass;
3022        else if (RegVT == MVT::v2f64)
3023          RC = &ARM::QPRRegClass;
3024        else if (RegVT == MVT::i32)
3025          RC = AFI->isThumb1OnlyFunction() ?
3026            (const TargetRegisterClass*)&ARM::tGPRRegClass :
3027            (const TargetRegisterClass*)&ARM::GPRRegClass;
3028        else
3029          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3030
3031        // Transform the arguments in physical registers into virtual ones.
3032        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3033        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3034      }
3035
3036      // If this is an 8 or 16-bit value, it is really passed promoted
3037      // to 32 bits.  Insert an assert[sz]ext to capture this, then
3038      // truncate to the right size.
3039      switch (VA.getLocInfo()) {
3040      default: llvm_unreachable("Unknown loc info!");
3041      case CCValAssign::Full: break;
3042      case CCValAssign::BCvt:
3043        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3044        break;
3045      case CCValAssign::SExt:
3046        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3047                               DAG.getValueType(VA.getValVT()));
3048        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3049        break;
3050      case CCValAssign::ZExt:
3051        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3052                               DAG.getValueType(VA.getValVT()));
3053        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3054        break;
3055      }
3056
3057      InVals.push_back(ArgValue);
3058
3059    } else { // VA.isRegLoc()
3060
3061      // sanity check
3062      assert(VA.isMemLoc());
3063      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3064
3065      int index = ArgLocs[i].getValNo();
3066
3067      // Some Ins[] entries become multiple ArgLoc[] entries.
3068      // Process them only once.
3069      if (index != lastInsIndex)
3070        {
3071          ISD::ArgFlagsTy Flags = Ins[index].Flags;
3072          // FIXME: For now, all byval parameter objects are marked mutable.
3073          // This can be changed with more analysis.
3074          // In case of tail call optimization mark all arguments mutable.
3075          // Since they could be overwritten by lowering of arguments in case of
3076          // a tail call.
3077          if (Flags.isByVal()) {
3078            unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
3079
3080            ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
3081            int FrameIndex = StoreByValRegs(
3082                CCInfo, DAG, dl, Chain, CurOrigArg,
3083                CurByValIndex,
3084                Ins[VA.getValNo()].PartOffset,
3085                VA.getLocMemOffset(),
3086                Flags.getByValSize(),
3087                true /*force mutable frames*/,
3088                ByValStoreOffset,
3089                TotalArgRegsSaveSize);
3090            ByValStoreOffset += Flags.getByValSize();
3091            ByValStoreOffset = std::min(ByValStoreOffset, 16U);
3092            InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
3093            CCInfo.nextInRegsParam();
3094          } else {
3095            unsigned FIOffset = VA.getLocMemOffset();
3096            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3097                                            FIOffset, true);
3098
3099            // Create load nodes to retrieve arguments from the stack.
3100            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
3101            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3102                                         MachinePointerInfo::getFixedStack(FI),
3103                                         false, false, false, 0));
3104          }
3105          lastInsIndex = index;
3106        }
3107    }
3108  }
3109
3110  // varargs
3111  if (isVarArg)
3112    VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3113                         CCInfo.getNextStackOffset(),
3114                         TotalArgRegsSaveSize);
3115
3116  AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3117
3118  return Chain;
3119}
3120
3121/// isFloatingPointZero - Return true if this is +0.0.
3122static bool isFloatingPointZero(SDValue Op) {
3123  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3124    return CFP->getValueAPF().isPosZero();
3125  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3126    // Maybe this has already been legalized into the constant pool?
3127    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3128      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3129      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3130        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3131          return CFP->getValueAPF().isPosZero();
3132    }
3133  }
3134  return false;
3135}
3136
3137/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3138/// the given operands.
3139SDValue
3140ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3141                             SDValue &ARMcc, SelectionDAG &DAG,
3142                             SDLoc dl) const {
3143  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3144    unsigned C = RHSC->getZExtValue();
3145    if (!isLegalICmpImmediate(C)) {
3146      // Constant does not fit, try adjusting it by one?
3147      switch (CC) {
3148      default: break;
3149      case ISD::SETLT:
3150      case ISD::SETGE:
3151        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3152          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3153          RHS = DAG.getConstant(C-1, MVT::i32);
3154        }
3155        break;
3156      case ISD::SETULT:
3157      case ISD::SETUGE:
3158        if (C != 0 && isLegalICmpImmediate(C-1)) {
3159          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3160          RHS = DAG.getConstant(C-1, MVT::i32);
3161        }
3162        break;
3163      case ISD::SETLE:
3164      case ISD::SETGT:
3165        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3166          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3167          RHS = DAG.getConstant(C+1, MVT::i32);
3168        }
3169        break;
3170      case ISD::SETULE:
3171      case ISD::SETUGT:
3172        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3173          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3174          RHS = DAG.getConstant(C+1, MVT::i32);
3175        }
3176        break;
3177      }
3178    }
3179  }
3180
3181  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3182  ARMISD::NodeType CompareType;
3183  switch (CondCode) {
3184  default:
3185    CompareType = ARMISD::CMP;
3186    break;
3187  case ARMCC::EQ:
3188  case ARMCC::NE:
3189    // Uses only Z Flag
3190    CompareType = ARMISD::CMPZ;
3191    break;
3192  }
3193  ARMcc = DAG.getConstant(CondCode, MVT::i32);
3194  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3195}
3196
3197/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3198SDValue
3199ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
3200                             SDLoc dl) const {
3201  SDValue Cmp;
3202  if (!isFloatingPointZero(RHS))
3203    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
3204  else
3205    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
3206  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3207}
3208
3209/// duplicateCmp - Glue values can have only one use, so this function
3210/// duplicates a comparison node.
3211SDValue
3212ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3213  unsigned Opc = Cmp.getOpcode();
3214  SDLoc DL(Cmp);
3215  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3216    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3217
3218  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3219  Cmp = Cmp.getOperand(0);
3220  Opc = Cmp.getOpcode();
3221  if (Opc == ARMISD::CMPFP)
3222    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3223  else {
3224    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3225    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
3226  }
3227  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3228}
3229
3230std::pair<SDValue, SDValue>
3231ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3232                                 SDValue &ARMcc) const {
3233  assert(Op.getValueType() == MVT::i32 &&  "Unsupported value type");
3234
3235  SDValue Value, OverflowCmp;
3236  SDValue LHS = Op.getOperand(0);
3237  SDValue RHS = Op.getOperand(1);
3238
3239
3240  // FIXME: We are currently always generating CMPs because we don't support
3241  // generating CMN through the backend. This is not as good as the natural
3242  // CMP case because it causes a register dependency and cannot be folded
3243  // later.
3244
3245  switch (Op.getOpcode()) {
3246  default:
3247    llvm_unreachable("Unknown overflow instruction!");
3248  case ISD::SADDO:
3249    ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
3250    Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
3251    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
3252    break;
3253  case ISD::UADDO:
3254    ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
3255    Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
3256    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
3257    break;
3258  case ISD::SSUBO:
3259    ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
3260    Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
3261    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
3262    break;
3263  case ISD::USUBO:
3264    ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
3265    Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
3266    OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
3267    break;
3268  } // switch (...)
3269
3270  return std::make_pair(Value, OverflowCmp);
3271}
3272
3273
3274SDValue
3275ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3276  // Let legalize expand this if it isn't a legal type yet.
3277  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3278    return SDValue();
3279
3280  SDValue Value, OverflowCmp;
3281  SDValue ARMcc;
3282  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3283  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3284  // We use 0 and 1 as false and true values.
3285  SDValue TVal = DAG.getConstant(1, MVT::i32);
3286  SDValue FVal = DAG.getConstant(0, MVT::i32);
3287  EVT VT = Op.getValueType();
3288
3289  SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
3290                                 ARMcc, CCR, OverflowCmp);
3291
3292  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3293  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
3294}
3295
3296
3297SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3298  SDValue Cond = Op.getOperand(0);
3299  SDValue SelectTrue = Op.getOperand(1);
3300  SDValue SelectFalse = Op.getOperand(2);
3301  SDLoc dl(Op);
3302  unsigned Opc = Cond.getOpcode();
3303
3304  if (Cond.getResNo() == 1 &&
3305      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3306       Opc == ISD::USUBO)) {
3307    if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3308      return SDValue();
3309
3310    SDValue Value, OverflowCmp;
3311    SDValue ARMcc;
3312    std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3313    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3314    EVT VT = Op.getValueType();
3315
3316    return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
3317                       ARMcc, CCR, OverflowCmp);
3318
3319  }
3320
3321  // Convert:
3322  //
3323  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3324  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3325  //
3326  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3327    const ConstantSDNode *CMOVTrue =
3328      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
3329    const ConstantSDNode *CMOVFalse =
3330      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3331
3332    if (CMOVTrue && CMOVFalse) {
3333      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3334      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3335
3336      SDValue True;
3337      SDValue False;
3338      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3339        True = SelectTrue;
3340        False = SelectFalse;
3341      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3342        True = SelectFalse;
3343        False = SelectTrue;
3344      }
3345
3346      if (True.getNode() && False.getNode()) {
3347        EVT VT = Op.getValueType();
3348        SDValue ARMcc = Cond.getOperand(2);
3349        SDValue CCR = Cond.getOperand(3);
3350        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3351        assert(True.getValueType() == VT);
3352        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
3353      }
3354    }
3355  }
3356
3357  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
3358  // undefined bits before doing a full-word comparison with zero.
3359  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
3360                     DAG.getConstant(1, Cond.getValueType()));
3361
3362  return DAG.getSelectCC(dl, Cond,
3363                         DAG.getConstant(0, Cond.getValueType()),
3364                         SelectTrue, SelectFalse, ISD::SETNE);
3365}
3366
3367static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
3368  if (CC == ISD::SETNE)
3369    return ISD::SETEQ;
3370  return ISD::getSetCCInverse(CC, true);
3371}
3372
3373static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
3374                                 bool &swpCmpOps, bool &swpVselOps) {
3375  // Start by selecting the GE condition code for opcodes that return true for
3376  // 'equality'
3377  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
3378      CC == ISD::SETULE)
3379    CondCode = ARMCC::GE;
3380
3381  // and GT for opcodes that return false for 'equality'.
3382  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
3383           CC == ISD::SETULT)
3384    CondCode = ARMCC::GT;
3385
3386  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
3387  // to swap the compare operands.
3388  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
3389      CC == ISD::SETULT)
3390    swpCmpOps = true;
3391
3392  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
3393  // If we have an unordered opcode, we need to swap the operands to the VSEL
3394  // instruction (effectively negating the condition).
3395  //
3396  // This also has the effect of swapping which one of 'less' or 'greater'
3397  // returns true, so we also swap the compare operands. It also switches
3398  // whether we return true for 'equality', so we compensate by picking the
3399  // opposite condition code to our original choice.
3400  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
3401      CC == ISD::SETUGT) {
3402    swpCmpOps = !swpCmpOps;
3403    swpVselOps = !swpVselOps;
3404    CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
3405  }
3406
3407  // 'ordered' is 'anything but unordered', so use the VS condition code and
3408  // swap the VSEL operands.
3409  if (CC == ISD::SETO) {
3410    CondCode = ARMCC::VS;
3411    swpVselOps = true;
3412  }
3413
3414  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
3415  // code and swap the VSEL operands.
3416  if (CC == ISD::SETUNE) {
3417    CondCode = ARMCC::EQ;
3418    swpVselOps = true;
3419  }
3420}
3421
3422SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
3423  EVT VT = Op.getValueType();
3424  SDValue LHS = Op.getOperand(0);
3425  SDValue RHS = Op.getOperand(1);
3426  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3427  SDValue TrueVal = Op.getOperand(2);
3428  SDValue FalseVal = Op.getOperand(3);
3429  SDLoc dl(Op);
3430
3431  if (LHS.getValueType() == MVT::i32) {
3432    // Try to generate VSEL on ARMv8.
3433    // The VSEL instruction can't use all the usual ARM condition
3434    // codes: it only has two bits to select the condition code, so it's
3435    // constrained to use only GE, GT, VS and EQ.
3436    //
3437    // To implement all the various ISD::SETXXX opcodes, we sometimes need to
3438    // swap the operands of the previous compare instruction (effectively
3439    // inverting the compare condition, swapping 'less' and 'greater') and
3440    // sometimes need to swap the operands to the VSEL (which inverts the
3441    // condition in the sense of firing whenever the previous condition didn't)
3442    if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
3443                                      TrueVal.getValueType() == MVT::f64)) {
3444      ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3445      if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
3446          CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
3447        CC = getInverseCCForVSEL(CC);
3448        std::swap(TrueVal, FalseVal);
3449      }
3450    }
3451
3452    SDValue ARMcc;
3453    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3454    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3455    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
3456                       Cmp);
3457  }
3458
3459  ARMCC::CondCodes CondCode, CondCode2;
3460  FPCCToARMCC(CC, CondCode, CondCode2);
3461
3462  // Try to generate VSEL on ARMv8.
3463  if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
3464                                    TrueVal.getValueType() == MVT::f64)) {
3465    // We can select VMAXNM/VMINNM from a compare followed by a select with the
3466    // same operands, as follows:
3467    //   c = fcmp [ogt, olt, ugt, ult] a, b
3468    //   select c, a, b
3469    // We only do this in unsafe-fp-math, because signed zeros and NaNs are
3470    // handled differently than the original code sequence.
3471    if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
3472        RHS == FalseVal) {
3473      if (CC == ISD::SETOGT || CC == ISD::SETUGT)
3474        return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
3475      if (CC == ISD::SETOLT || CC == ISD::SETULT)
3476        return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
3477    }
3478
3479    bool swpCmpOps = false;
3480    bool swpVselOps = false;
3481    checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
3482
3483    if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
3484        CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
3485      if (swpCmpOps)
3486        std::swap(LHS, RHS);
3487      if (swpVselOps)
3488        std::swap(TrueVal, FalseVal);
3489    }
3490  }
3491
3492  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
3493  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3494  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3495  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
3496                               ARMcc, CCR, Cmp);
3497  if (CondCode2 != ARMCC::AL) {
3498    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
3499    // FIXME: Needs another CMP because flag can have but one use.
3500    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
3501    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
3502                         Result, TrueVal, ARMcc2, CCR, Cmp2);
3503  }
3504  return Result;
3505}
3506
3507/// canChangeToInt - Given the fp compare operand, return true if it is suitable
3508/// to morph to an integer compare sequence.
3509static bool canChangeToInt(SDValue Op, bool &SeenZero,
3510                           const ARMSubtarget *Subtarget) {
3511  SDNode *N = Op.getNode();
3512  if (!N->hasOneUse())
3513    // Otherwise it requires moving the value from fp to integer registers.
3514    return false;
3515  if (!N->getNumValues())
3516    return false;
3517  EVT VT = Op.getValueType();
3518  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
3519    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
3520    // vmrs are very slow, e.g. cortex-a8.
3521    return false;
3522
3523  if (isFloatingPointZero(Op)) {
3524    SeenZero = true;
3525    return true;
3526  }
3527  return ISD::isNormalLoad(N);
3528}
3529
3530static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
3531  if (isFloatingPointZero(Op))
3532    return DAG.getConstant(0, MVT::i32);
3533
3534  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
3535    return DAG.getLoad(MVT::i32, SDLoc(Op),
3536                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
3537                       Ld->isVolatile(), Ld->isNonTemporal(),
3538                       Ld->isInvariant(), Ld->getAlignment());
3539
3540  llvm_unreachable("Unknown VFP cmp argument!");
3541}
3542
3543static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
3544                           SDValue &RetVal1, SDValue &RetVal2) {
3545  if (isFloatingPointZero(Op)) {
3546    RetVal1 = DAG.getConstant(0, MVT::i32);
3547    RetVal2 = DAG.getConstant(0, MVT::i32);
3548    return;
3549  }
3550
3551  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
3552    SDValue Ptr = Ld->getBasePtr();
3553    RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
3554                          Ld->getChain(), Ptr,
3555                          Ld->getPointerInfo(),
3556                          Ld->isVolatile(), Ld->isNonTemporal(),
3557                          Ld->isInvariant(), Ld->getAlignment());
3558
3559    EVT PtrType = Ptr.getValueType();
3560    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
3561    SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
3562                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
3563    RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
3564                          Ld->getChain(), NewPtr,
3565                          Ld->getPointerInfo().getWithOffset(4),
3566                          Ld->isVolatile(), Ld->isNonTemporal(),
3567                          Ld->isInvariant(), NewAlign);
3568    return;
3569  }
3570
3571  llvm_unreachable("Unknown VFP cmp argument!");
3572}
3573
3574/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
3575/// f32 and even f64 comparisons to integer ones.
3576SDValue
3577ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
3578  SDValue Chain = Op.getOperand(0);
3579  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3580  SDValue LHS = Op.getOperand(2);
3581  SDValue RHS = Op.getOperand(3);
3582  SDValue Dest = Op.getOperand(4);
3583  SDLoc dl(Op);
3584
3585  bool LHSSeenZero = false;
3586  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
3587  bool RHSSeenZero = false;
3588  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
3589  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
3590    // If unsafe fp math optimization is enabled and there are no other uses of
3591    // the CMP operands, and the condition code is EQ or NE, we can optimize it
3592    // to an integer comparison.
3593    if (CC == ISD::SETOEQ)
3594      CC = ISD::SETEQ;
3595    else if (CC == ISD::SETUNE)
3596      CC = ISD::SETNE;
3597
3598    SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
3599    SDValue ARMcc;
3600    if (LHS.getValueType() == MVT::f32) {
3601      LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3602                        bitcastf32Toi32(LHS, DAG), Mask);
3603      RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3604                        bitcastf32Toi32(RHS, DAG), Mask);
3605      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3606      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3607      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3608                         Chain, Dest, ARMcc, CCR, Cmp);
3609    }
3610
3611    SDValue LHS1, LHS2;
3612    SDValue RHS1, RHS2;
3613    expandf64Toi32(LHS, DAG, LHS1, LHS2);
3614    expandf64Toi32(RHS, DAG, RHS1, RHS2);
3615    LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
3616    RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
3617    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3618    ARMcc = DAG.getConstant(CondCode, MVT::i32);
3619    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3620    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
3621    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
3622  }
3623
3624  return SDValue();
3625}
3626
3627SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3628  SDValue Chain = Op.getOperand(0);
3629  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3630  SDValue LHS = Op.getOperand(2);
3631  SDValue RHS = Op.getOperand(3);
3632  SDValue Dest = Op.getOperand(4);
3633  SDLoc dl(Op);
3634
3635  if (LHS.getValueType() == MVT::i32) {
3636    SDValue ARMcc;
3637    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3638    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3639    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3640                       Chain, Dest, ARMcc, CCR, Cmp);
3641  }
3642
3643  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3644
3645  if (getTargetMachine().Options.UnsafeFPMath &&
3646      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
3647       CC == ISD::SETNE || CC == ISD::SETUNE)) {
3648    SDValue Result = OptimizeVFPBrcond(Op, DAG);
3649    if (Result.getNode())
3650      return Result;
3651  }
3652
3653  ARMCC::CondCodes CondCode, CondCode2;
3654  FPCCToARMCC(CC, CondCode, CondCode2);
3655
3656  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
3657  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3658  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3659  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3660  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
3661  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
3662  if (CondCode2 != ARMCC::AL) {
3663    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
3664    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
3665    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
3666  }
3667  return Res;
3668}
3669
3670SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
3671  SDValue Chain = Op.getOperand(0);
3672  SDValue Table = Op.getOperand(1);
3673  SDValue Index = Op.getOperand(2);
3674  SDLoc dl(Op);
3675
3676  EVT PTy = getPointerTy();
3677  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
3678  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3679  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
3680  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
3681  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
3682  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
3683  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
3684  if (Subtarget->isThumb2()) {
3685    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
3686    // which does another jump to the destination. This also makes it easier
3687    // to translate it to TBB / TBH later.
3688    // FIXME: This might not work if the function is extremely large.
3689    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
3690                       Addr, Op.getOperand(2), JTI, UId);
3691  }
3692  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
3693    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
3694                       MachinePointerInfo::getJumpTable(),
3695                       false, false, false, 0);
3696    Chain = Addr.getValue(1);
3697    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
3698    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3699  } else {
3700    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
3701                       MachinePointerInfo::getJumpTable(),
3702                       false, false, false, 0);
3703    Chain = Addr.getValue(1);
3704    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3705  }
3706}
3707
3708static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3709  EVT VT = Op.getValueType();
3710  SDLoc dl(Op);
3711
3712  if (Op.getValueType().getVectorElementType() == MVT::i32) {
3713    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
3714      return Op;
3715    return DAG.UnrollVectorOp(Op.getNode());
3716  }
3717
3718  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
3719         "Invalid type for custom lowering!");
3720  if (VT != MVT::v4i16)
3721    return DAG.UnrollVectorOp(Op.getNode());
3722
3723  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
3724  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
3725}
3726
3727static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3728  EVT VT = Op.getValueType();
3729  if (VT.isVector())
3730    return LowerVectorFP_TO_INT(Op, DAG);
3731
3732  SDLoc dl(Op);
3733  unsigned Opc;
3734
3735  switch (Op.getOpcode()) {
3736  default: llvm_unreachable("Invalid opcode!");
3737  case ISD::FP_TO_SINT:
3738    Opc = ARMISD::FTOSI;
3739    break;
3740  case ISD::FP_TO_UINT:
3741    Opc = ARMISD::FTOUI;
3742    break;
3743  }
3744  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
3745  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3746}
3747
3748static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3749  EVT VT = Op.getValueType();
3750  SDLoc dl(Op);
3751
3752  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
3753    if (VT.getVectorElementType() == MVT::f32)
3754      return Op;
3755    return DAG.UnrollVectorOp(Op.getNode());
3756  }
3757
3758  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
3759         "Invalid type for custom lowering!");
3760  if (VT != MVT::v4f32)
3761    return DAG.UnrollVectorOp(Op.getNode());
3762
3763  unsigned CastOpc;
3764  unsigned Opc;
3765  switch (Op.getOpcode()) {
3766  default: llvm_unreachable("Invalid opcode!");
3767  case ISD::SINT_TO_FP:
3768    CastOpc = ISD::SIGN_EXTEND;
3769    Opc = ISD::SINT_TO_FP;
3770    break;
3771  case ISD::UINT_TO_FP:
3772    CastOpc = ISD::ZERO_EXTEND;
3773    Opc = ISD::UINT_TO_FP;
3774    break;
3775  }
3776
3777  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
3778  return DAG.getNode(Opc, dl, VT, Op);
3779}
3780
3781static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3782  EVT VT = Op.getValueType();
3783  if (VT.isVector())
3784    return LowerVectorINT_TO_FP(Op, DAG);
3785
3786  SDLoc dl(Op);
3787  unsigned Opc;
3788
3789  switch (Op.getOpcode()) {
3790  default: llvm_unreachable("Invalid opcode!");
3791  case ISD::SINT_TO_FP:
3792    Opc = ARMISD::SITOF;
3793    break;
3794  case ISD::UINT_TO_FP:
3795    Opc = ARMISD::UITOF;
3796    break;
3797  }
3798
3799  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
3800  return DAG.getNode(Opc, dl, VT, Op);
3801}
3802
3803SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
3804  // Implement fcopysign with a fabs and a conditional fneg.
3805  SDValue Tmp0 = Op.getOperand(0);
3806  SDValue Tmp1 = Op.getOperand(1);
3807  SDLoc dl(Op);
3808  EVT VT = Op.getValueType();
3809  EVT SrcVT = Tmp1.getValueType();
3810  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
3811    Tmp0.getOpcode() == ARMISD::VMOVDRR;
3812  bool UseNEON = !InGPR && Subtarget->hasNEON();
3813
3814  if (UseNEON) {
3815    // Use VBSL to copy the sign bit.
3816    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
3817    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
3818                               DAG.getTargetConstant(EncodedVal, MVT::i32));
3819    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
3820    if (VT == MVT::f64)
3821      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3822                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
3823                         DAG.getConstant(32, MVT::i32));
3824    else /*if (VT == MVT::f32)*/
3825      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
3826    if (SrcVT == MVT::f32) {
3827      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
3828      if (VT == MVT::f64)
3829        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3830                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
3831                           DAG.getConstant(32, MVT::i32));
3832    } else if (VT == MVT::f32)
3833      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
3834                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
3835                         DAG.getConstant(32, MVT::i32));
3836    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
3837    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
3838
3839    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
3840                                            MVT::i32);
3841    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
3842    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
3843                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
3844
3845    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
3846                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
3847                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
3848    if (VT == MVT::f32) {
3849      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
3850      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
3851                        DAG.getConstant(0, MVT::i32));
3852    } else {
3853      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
3854    }
3855
3856    return Res;
3857  }
3858
3859  // Bitcast operand 1 to i32.
3860  if (SrcVT == MVT::f64)
3861    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3862                       Tmp1).getValue(1);
3863  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
3864
3865  // Or in the signbit with integer operations.
3866  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
3867  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
3868  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
3869  if (VT == MVT::f32) {
3870    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
3871                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
3872    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
3873                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
3874  }
3875
3876  // f64: Or the high part with signbit and then combine two parts.
3877  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3878                     Tmp0);
3879  SDValue Lo = Tmp0.getValue(0);
3880  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
3881  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
3882  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
3883}
3884
3885SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
3886  MachineFunction &MF = DAG.getMachineFunction();
3887  MachineFrameInfo *MFI = MF.getFrameInfo();
3888  MFI->setReturnAddressIsTaken(true);
3889
3890  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3891    return SDValue();
3892
3893  EVT VT = Op.getValueType();
3894  SDLoc dl(Op);
3895  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3896  if (Depth) {
3897    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
3898    SDValue Offset = DAG.getConstant(4, MVT::i32);
3899    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
3900                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
3901                       MachinePointerInfo(), false, false, false, 0);
3902  }
3903
3904  // Return LR, which contains the return address. Mark it an implicit live-in.
3905  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3906  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
3907}
3908
3909SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
3910  const ARMBaseRegisterInfo &ARI =
3911    *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
3912  MachineFunction &MF = DAG.getMachineFunction();
3913  MachineFrameInfo *MFI = MF.getFrameInfo();
3914  MFI->setFrameAddressIsTaken(true);
3915
3916  EVT VT = Op.getValueType();
3917  SDLoc dl(Op);  // FIXME probably not meaningful
3918  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3919  unsigned FrameReg = ARI.getFrameRegister(MF);
3920  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
3921  while (Depth--)
3922    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
3923                            MachinePointerInfo(),
3924                            false, false, false, 0);
3925  return FrameAddr;
3926}
3927
3928// FIXME? Maybe this could be a TableGen attribute on some registers and
3929// this table could be generated automatically from RegInfo.
3930unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
3931                                              EVT VT) const {
3932  unsigned Reg = StringSwitch<unsigned>(RegName)
3933                       .Case("sp", ARM::SP)
3934                       .Default(0);
3935  if (Reg)
3936    return Reg;
3937  report_fatal_error("Invalid register name global variable");
3938}
3939
3940/// ExpandBITCAST - If the target supports VFP, this function is called to
3941/// expand a bit convert where either the source or destination type is i64 to
3942/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
3943/// operand type is illegal (e.g., v2f32 for a target that doesn't support
3944/// vectors), since the legalizer won't know what to do with that.
3945static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
3946  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3947  SDLoc dl(N);
3948  SDValue Op = N->getOperand(0);
3949
3950  // This function is only supposed to be called for i64 types, either as the
3951  // source or destination of the bit convert.
3952  EVT SrcVT = Op.getValueType();
3953  EVT DstVT = N->getValueType(0);
3954  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
3955         "ExpandBITCAST called for non-i64 type");
3956
3957  // Turn i64->f64 into VMOVDRR.
3958  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
3959    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3960                             DAG.getConstant(0, MVT::i32));
3961    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3962                             DAG.getConstant(1, MVT::i32));
3963    return DAG.getNode(ISD::BITCAST, dl, DstVT,
3964                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
3965  }
3966
3967  // Turn f64->i64 into VMOVRRD.
3968  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
3969    SDValue Cvt;
3970    if (TLI.isBigEndian() && SrcVT.isVector() &&
3971        SrcVT.getVectorNumElements() > 1)
3972      Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3973                        DAG.getVTList(MVT::i32, MVT::i32),
3974                        DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
3975    else
3976      Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3977                        DAG.getVTList(MVT::i32, MVT::i32), Op);
3978    // Merge the pieces into a single i64 value.
3979    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
3980  }
3981
3982  return SDValue();
3983}
3984
3985/// getZeroVector - Returns a vector of specified type with all zero elements.
3986/// Zero vectors are used to represent vector negation and in those cases
3987/// will be implemented with the NEON VNEG instruction.  However, VNEG does
3988/// not support i64 elements, so sometimes the zero vectors will need to be
3989/// explicitly constructed.  Regardless, use a canonical VMOV to create the
3990/// zero vector.
3991static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
3992  assert(VT.isVector() && "Expected a vector type");
3993  // The canonical modified immediate encoding of a zero vector is....0!
3994  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
3995  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
3996  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
3997  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3998}
3999
4000/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4001/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4002SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4003                                                SelectionDAG &DAG) const {
4004  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4005  EVT VT = Op.getValueType();
4006  unsigned VTBits = VT.getSizeInBits();
4007  SDLoc dl(Op);
4008  SDValue ShOpLo = Op.getOperand(0);
4009  SDValue ShOpHi = Op.getOperand(1);
4010  SDValue ShAmt  = Op.getOperand(2);
4011  SDValue ARMcc;
4012  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4013
4014  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
4015
4016  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4017                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
4018  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4019  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4020                                   DAG.getConstant(VTBits, MVT::i32));
4021  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4022  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4023  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4024
4025  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4026  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
4027                          ARMcc, DAG, dl);
4028  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4029  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
4030                           CCR, Cmp);
4031
4032  SDValue Ops[2] = { Lo, Hi };
4033  return DAG.getMergeValues(Ops, dl);
4034}
4035
4036/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4037/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4038SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4039                                               SelectionDAG &DAG) const {
4040  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4041  EVT VT = Op.getValueType();
4042  unsigned VTBits = VT.getSizeInBits();
4043  SDLoc dl(Op);
4044  SDValue ShOpLo = Op.getOperand(0);
4045  SDValue ShOpHi =