1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-isel"
16#include "ARMISelLowering.h"
17#include "ARM.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMSubtarget.h"
23#include "ARMTargetMachine.h"
24#include "ARMTargetObjectFile.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "llvm/CallingConv.h"
27#include "llvm/Constants.h"
28#include "llvm/Function.h"
29#include "llvm/GlobalValue.h"
30#include "llvm/Instruction.h"
31#include "llvm/Instructions.h"
32#include "llvm/Intrinsics.h"
33#include "llvm/Type.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/IntrinsicLowering.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/MC/MCSectionMachO.h"
44#include "llvm/Target/TargetOptions.h"
45#include "llvm/ADT/StringExtras.h"
46#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/MathExtras.h"
50#include "llvm/Support/raw_ostream.h"
51using namespace llvm;
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
55
56// This option should go away when tail calls fully work.
57static cl::opt<bool>
58EnableARMTailCalls("arm-tail-calls", cl::Hidden,
59  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
60  cl::init(false));
61
62cl::opt<bool>
63EnableARMLongCalls("arm-long-calls", cl::Hidden,
64  cl::desc("Generate calls via indirect call instructions"),
65  cl::init(false));
66
67static cl::opt<bool>
68ARMInterworking("arm-interworking", cl::Hidden,
69  cl::desc("Enable / disable ARM interworking (for debugging only)"),
70  cl::init(true));
71
72namespace {
73  class ARMCCState : public CCState {
74  public:
75    ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
76               const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
77               LLVMContext &C, ParmContext PC)
78        : CCState(CC, isVarArg, MF, TM, locs, C) {
79      assert(((PC == Call) || (PC == Prologue)) &&
80             "ARMCCState users must specify whether their context is call"
81             "or prologue generation.");
82      CallOrPrologue = PC;
83    }
84  };
85}
86
87// The APCS parameter registers.
88static const uint16_t GPRArgRegs[] = {
89  ARM::R0, ARM::R1, ARM::R2, ARM::R3
90};
91
92void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
93                                       EVT PromotedBitwiseVT) {
94  if (VT != PromotedLdStVT) {
95    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
96    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
97                       PromotedLdStVT.getSimpleVT());
98
99    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
100    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
101                       PromotedLdStVT.getSimpleVT());
102  }
103
104  EVT ElemTy = VT.getVectorElementType();
105  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
106    setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
107  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
108  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
109  if (ElemTy == MVT::i32) {
110    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
111    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
112    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
113    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
114  } else {
115    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
116    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
117    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
118    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
119  }
120  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
121  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
122  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
123  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
124  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
125  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
126  setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
127  if (VT.isInteger()) {
128    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
129    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
130    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
131  }
132
133  // Promote all bit-wise operations.
134  if (VT.isInteger() && VT != PromotedBitwiseVT) {
135    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
136    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
137                       PromotedBitwiseVT.getSimpleVT());
138    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
139    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
140                       PromotedBitwiseVT.getSimpleVT());
141    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
142    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
143                       PromotedBitwiseVT.getSimpleVT());
144  }
145
146  // Neon does not support vector divide/remainder operations.
147  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
148  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
149  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
150  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
151  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
152  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
153}
154
155void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
156  addRegisterClass(VT, ARM::DPRRegisterClass);
157  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
158}
159
160void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
161  addRegisterClass(VT, ARM::QPRRegisterClass);
162  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
163}
164
165static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
166  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
167    return new TargetLoweringObjectFileMachO();
168
169  return new ARMElfTargetObjectFile();
170}
171
172ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
173    : TargetLowering(TM, createTLOF(TM)) {
174  Subtarget = &TM.getSubtarget<ARMSubtarget>();
175  RegInfo = TM.getRegisterInfo();
176  Itins = TM.getInstrItineraryData();
177
178  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
179
180  if (Subtarget->isTargetDarwin()) {
181    // Uses VFP for Thumb libfuncs if available.
182    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
183      // Single-precision floating-point arithmetic.
184      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
185      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
186      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
187      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
188
189      // Double-precision floating-point arithmetic.
190      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
191      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
192      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
193      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
194
195      // Single-precision comparisons.
196      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
197      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
198      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
199      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
200      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
201      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
202      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
203      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
204
205      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
206      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
207      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
208      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
209      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
210      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
211      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
212      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
213
214      // Double-precision comparisons.
215      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
216      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
217      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
218      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
219      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
220      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
221      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
222      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
223
224      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
225      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
226      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
227      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
228      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
229      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
230      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
231      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
232
233      // Floating-point to integer conversions.
234      // i64 conversions are done via library routines even when generating VFP
235      // instructions, so use the same ones.
236      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
237      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
238      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
239      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
240
241      // Conversions between floating types.
242      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
243      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
244
245      // Integer to floating-point conversions.
246      // i64 conversions are done via library routines even when generating VFP
247      // instructions, so use the same ones.
248      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
249      // e.g., __floatunsidf vs. __floatunssidfvfp.
250      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
251      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
252      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
253      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
254    }
255  }
256
257  // These libcalls are not available in 32-bit.
258  setLibcallName(RTLIB::SHL_I128, 0);
259  setLibcallName(RTLIB::SRL_I128, 0);
260  setLibcallName(RTLIB::SRA_I128, 0);
261
262  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
263    // Double-precision floating-point arithmetic helper functions
264    // RTABI chapter 4.1.2, Table 2
265    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
266    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
267    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
268    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
269    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
270    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
271    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
272    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
273
274    // Double-precision floating-point comparison helper functions
275    // RTABI chapter 4.1.2, Table 3
276    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
277    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
278    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
279    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
280    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
281    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
282    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
283    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
284    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
285    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
286    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
287    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
288    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
289    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
290    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
291    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
292    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
293    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
294    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
295    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
296    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
297    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
298    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
299    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
300
301    // Single-precision floating-point arithmetic helper functions
302    // RTABI chapter 4.1.2, Table 4
303    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
304    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
305    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
306    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
307    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
308    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
309    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
310    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
311
312    // Single-precision floating-point comparison helper functions
313    // RTABI chapter 4.1.2, Table 5
314    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
315    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
316    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
317    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
318    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
319    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
320    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
321    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
322    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
323    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
324    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
325    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
326    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
327    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
328    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
329    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
330    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
331    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
332    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
333    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
334    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
335    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
336    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
337    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
338
339    // Floating-point to integer conversions.
340    // RTABI chapter 4.1.2, Table 6
341    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
342    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
343    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
344    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
345    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
346    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
347    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
348    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
349    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
350    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
351    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
352    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
353    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
354    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
355    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
356    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
357
358    // Conversions between floating types.
359    // RTABI chapter 4.1.2, Table 7
360    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
361    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
362    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
363    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
364
365    // Integer to floating-point conversions.
366    // RTABI chapter 4.1.2, Table 8
367    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
368    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
369    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
370    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
371    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
372    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
373    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
374    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
375    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
376    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
377    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
378    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
379    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
380    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
381    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
382    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
383
384    // Long long helper functions
385    // RTABI chapter 4.2, Table 9
386    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
387    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
388    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
389    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
390    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
391    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
392    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
393    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
394    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
395    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
396
397    // Integer division functions
398    // RTABI chapter 4.3.1
399    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
400    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
401    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
402    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
403    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
404    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
405    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
406    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
407    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
408    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
409    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
410    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
411    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
412    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
413    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
414    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
415
416    // Memory operations
417    // RTABI chapter 4.3.4
418    setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
419    setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
420    setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
421    setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
422    setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
423    setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
424  }
425
426  // Use divmod compiler-rt calls for iOS 5.0 and later.
427  if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
428      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
429    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
430    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
431  }
432
433  if (Subtarget->isThumb1Only())
434    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
435  else
436    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
437  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
438      !Subtarget->isThumb1Only()) {
439    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
440    if (!Subtarget->isFPOnlySP())
441      addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
442
443    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
444  }
445
446  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
447       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
448    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
449         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
450      setTruncStoreAction((MVT::SimpleValueType)VT,
451                          (MVT::SimpleValueType)InnerVT, Expand);
452    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
453    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
454    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
455  }
456
457  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
458
459  if (Subtarget->hasNEON()) {
460    addDRTypeForNEON(MVT::v2f32);
461    addDRTypeForNEON(MVT::v8i8);
462    addDRTypeForNEON(MVT::v4i16);
463    addDRTypeForNEON(MVT::v2i32);
464    addDRTypeForNEON(MVT::v1i64);
465
466    addQRTypeForNEON(MVT::v4f32);
467    addQRTypeForNEON(MVT::v2f64);
468    addQRTypeForNEON(MVT::v16i8);
469    addQRTypeForNEON(MVT::v8i16);
470    addQRTypeForNEON(MVT::v4i32);
471    addQRTypeForNEON(MVT::v2i64);
472
473    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
474    // neither Neon nor VFP support any arithmetic operations on it.
475    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
476    // supported for v4f32.
477    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
478    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
479    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
480    // FIXME: Code duplication: FDIV and FREM are expanded always, see
481    // ARMTargetLowering::addTypeForNEON method for details.
482    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
483    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
484    // FIXME: Create unittest.
485    // In another words, find a way when "copysign" appears in DAG with vector
486    // operands.
487    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
488    // FIXME: Code duplication: SETCC has custom operation action, see
489    // ARMTargetLowering::addTypeForNEON method for details.
490    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
491    // FIXME: Create unittest for FNEG and for FABS.
492    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
493    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
494    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
495    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
496    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
497    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
498    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
499    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
500    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
501    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
502    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
503    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
504    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
505    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
506    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
507    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
508    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
509    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
510
511    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
512    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
513    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
514    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
515    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
516    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
517    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
518    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
519    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
520    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
521
522    // Neon does not support some operations on v1i64 and v2i64 types.
523    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
524    // Custom handling for some quad-vector types to detect VMULL.
525    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
526    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
527    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
528    // Custom handling for some vector types to avoid expensive expansions
529    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
530    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
531    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
532    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
533    setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
534    setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
535    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
536    // a destination type that is wider than the source, and nor does
537    // it have a FP_TO_[SU]INT instruction with a narrower destination than
538    // source.
539    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
540    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
541    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
542    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
543
544    setTargetDAGCombine(ISD::INTRINSIC_VOID);
545    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
546    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
547    setTargetDAGCombine(ISD::SHL);
548    setTargetDAGCombine(ISD::SRL);
549    setTargetDAGCombine(ISD::SRA);
550    setTargetDAGCombine(ISD::SIGN_EXTEND);
551    setTargetDAGCombine(ISD::ZERO_EXTEND);
552    setTargetDAGCombine(ISD::ANY_EXTEND);
553    setTargetDAGCombine(ISD::SELECT_CC);
554    setTargetDAGCombine(ISD::BUILD_VECTOR);
555    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
556    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
557    setTargetDAGCombine(ISD::STORE);
558    setTargetDAGCombine(ISD::FP_TO_SINT);
559    setTargetDAGCombine(ISD::FP_TO_UINT);
560    setTargetDAGCombine(ISD::FDIV);
561
562    // It is legal to extload from v4i8 to v4i16 or v4i32.
563    MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
564                  MVT::v4i16, MVT::v2i16,
565                  MVT::v2i32};
566    for (unsigned i = 0; i < 6; ++i) {
567      setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
568      setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
569      setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
570    }
571  }
572
573  computeRegisterProperties();
574
575  // ARM does not have f32 extending load.
576  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
577
578  // ARM does not have i1 sign extending load.
579  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
580
581  // ARM supports all 4 flavors of integer indexed load / store.
582  if (!Subtarget->isThumb1Only()) {
583    for (unsigned im = (unsigned)ISD::PRE_INC;
584         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
585      setIndexedLoadAction(im,  MVT::i1,  Legal);
586      setIndexedLoadAction(im,  MVT::i8,  Legal);
587      setIndexedLoadAction(im,  MVT::i16, Legal);
588      setIndexedLoadAction(im,  MVT::i32, Legal);
589      setIndexedStoreAction(im, MVT::i1,  Legal);
590      setIndexedStoreAction(im, MVT::i8,  Legal);
591      setIndexedStoreAction(im, MVT::i16, Legal);
592      setIndexedStoreAction(im, MVT::i32, Legal);
593    }
594  }
595
596  // i64 operation support.
597  setOperationAction(ISD::MUL,     MVT::i64, Expand);
598  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
599  if (Subtarget->isThumb1Only()) {
600    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
601    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
602  }
603  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
604      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
605    setOperationAction(ISD::MULHS, MVT::i32, Expand);
606
607  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
608  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
609  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
610  setOperationAction(ISD::SRL,       MVT::i64, Custom);
611  setOperationAction(ISD::SRA,       MVT::i64, Custom);
612
613  if (!Subtarget->isThumb1Only()) {
614    // FIXME: We should do this for Thumb1 as well.
615    setOperationAction(ISD::ADDC,    MVT::i32, Custom);
616    setOperationAction(ISD::ADDE,    MVT::i32, Custom);
617    setOperationAction(ISD::SUBC,    MVT::i32, Custom);
618    setOperationAction(ISD::SUBE,    MVT::i32, Custom);
619  }
620
621  // ARM does not have ROTL.
622  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
623  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
624  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
625  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
626    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
627
628  // These just redirect to CTTZ and CTLZ on ARM.
629  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
630  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
631
632  // Only ARMv6 has BSWAP.
633  if (!Subtarget->hasV6Ops())
634    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
635
636  // These are expanded into libcalls.
637  if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
638    // v7M has a hardware divider
639    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
640    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
641  }
642  setOperationAction(ISD::SREM,  MVT::i32, Expand);
643  setOperationAction(ISD::UREM,  MVT::i32, Expand);
644  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
645  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
646
647  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
648  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
649  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
650  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
651  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
652
653  setOperationAction(ISD::TRAP, MVT::Other, Legal);
654
655  // Use the default implementation.
656  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
657  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
658  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
659  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
660  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
661  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
662
663  if (!Subtarget->isTargetDarwin()) {
664    // Non-Darwin platforms may return values in these registers via the
665    // personality function.
666    setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
667    setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
668    setExceptionPointerRegister(ARM::R0);
669    setExceptionSelectorRegister(ARM::R1);
670  }
671
672  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
673  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
674  // the default expansion.
675  // FIXME: This should be checking for v6k, not just v6.
676  if (Subtarget->hasDataBarrier() ||
677      (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
678    // membarrier needs custom lowering; the rest are legal and handled
679    // normally.
680    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
681    setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
682    // Custom lowering for 64-bit ops
683    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
684    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Custom);
685    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
686    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
687    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
688    setOperationAction(ISD::ATOMIC_SWAP,  MVT::i64, Custom);
689    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
690    // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
691    setInsertFencesForAtomic(true);
692  } else {
693    // Set them all for expansion, which will force libcalls.
694    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
695    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
696    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
697    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
698    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
699    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
700    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
701    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
702    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
703    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
704    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
705    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
706    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
707    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
708    // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
709    // Unordered/Monotonic case.
710    setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
711    setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
712    // Since the libcalls include locking, fold in the fences
713    setShouldFoldAtomicFences(true);
714  }
715
716  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
717
718  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
719  if (!Subtarget->hasV6Ops()) {
720    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
721    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
722  }
723  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
724
725  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
726      !Subtarget->isThumb1Only()) {
727    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
728    // iff target supports vfp2.
729    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
730    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
731  }
732
733  // We want to custom lower some of our intrinsics.
734  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
735  if (Subtarget->isTargetDarwin()) {
736    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
737    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
738    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
739  }
740
741  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
742  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
743  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
744  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
745  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
746  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
747  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
748  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
749  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
750
751  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
752  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
753  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
754  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
755  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
756
757  // We don't support sin/cos/fmod/copysign/pow
758  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
759  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
760  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
761  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
762  setOperationAction(ISD::FREM,      MVT::f64, Expand);
763  setOperationAction(ISD::FREM,      MVT::f32, Expand);
764  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
765      !Subtarget->isThumb1Only()) {
766    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
767    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
768  }
769  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
770  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
771
772  if (!Subtarget->hasVFP4()) {
773    setOperationAction(ISD::FMA, MVT::f64, Expand);
774    setOperationAction(ISD::FMA, MVT::f32, Expand);
775  }
776
777  // Various VFP goodness
778  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
779    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
780    if (Subtarget->hasVFP2()) {
781      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
782      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
783      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
784      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
785    }
786    // Special handling for half-precision FP.
787    if (!Subtarget->hasFP16()) {
788      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
789      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
790    }
791  }
792
793  // We have target-specific dag combine patterns for the following nodes:
794  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
795  setTargetDAGCombine(ISD::ADD);
796  setTargetDAGCombine(ISD::SUB);
797  setTargetDAGCombine(ISD::MUL);
798
799  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
800    setTargetDAGCombine(ISD::AND);
801    setTargetDAGCombine(ISD::OR);
802    setTargetDAGCombine(ISD::XOR);
803  }
804
805  if (Subtarget->hasV6Ops())
806    setTargetDAGCombine(ISD::SRL);
807
808  setStackPointerRegisterToSaveRestore(ARM::SP);
809
810  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
811      !Subtarget->hasVFP2())
812    setSchedulingPreference(Sched::RegPressure);
813  else
814    setSchedulingPreference(Sched::Hybrid);
815
816  //// temporary - rewrite interface to use type
817  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
818  maxStoresPerMemset = 16;
819  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
820
821  // On ARM arguments smaller than 4 bytes are extended, so all arguments
822  // are at least 4 bytes aligned.
823  setMinStackArgumentAlignment(4);
824
825  benefitFromCodePlacementOpt = true;
826
827  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
828}
829
830// FIXME: It might make sense to define the representative register class as the
831// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
832// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
833// SPR's representative would be DPR_VFP2. This should work well if register
834// pressure tracking were modified such that a register use would increment the
835// pressure of the register class's representative and all of it's super
836// classes' representatives transitively. We have not implemented this because
837// of the difficulty prior to coalescing of modeling operand register classes
838// due to the common occurrence of cross class copies and subregister insertions
839// and extractions.
840std::pair<const TargetRegisterClass*, uint8_t>
841ARMTargetLowering::findRepresentativeClass(EVT VT) const{
842  const TargetRegisterClass *RRC = 0;
843  uint8_t Cost = 1;
844  switch (VT.getSimpleVT().SimpleTy) {
845  default:
846    return TargetLowering::findRepresentativeClass(VT);
847  // Use DPR as representative register class for all floating point
848  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
849  // the cost is 1 for both f32 and f64.
850  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
851  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
852    RRC = ARM::DPRRegisterClass;
853    // When NEON is used for SP, only half of the register file is available
854    // because operations that define both SP and DP results will be constrained
855    // to the VFP2 class (D0-D15). We currently model this constraint prior to
856    // coalescing by double-counting the SP regs. See the FIXME above.
857    if (Subtarget->useNEONForSinglePrecisionFP())
858      Cost = 2;
859    break;
860  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
861  case MVT::v4f32: case MVT::v2f64:
862    RRC = ARM::DPRRegisterClass;
863    Cost = 2;
864    break;
865  case MVT::v4i64:
866    RRC = ARM::DPRRegisterClass;
867    Cost = 4;
868    break;
869  case MVT::v8i64:
870    RRC = ARM::DPRRegisterClass;
871    Cost = 8;
872    break;
873  }
874  return std::make_pair(RRC, Cost);
875}
876
877const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
878  switch (Opcode) {
879  default: return 0;
880  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
881  case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
882  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
883  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
884  case ARMISD::CALL:          return "ARMISD::CALL";
885  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
886  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
887  case ARMISD::tCALL:         return "ARMISD::tCALL";
888  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
889  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
890  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
891  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
892  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
893  case ARMISD::CMP:           return "ARMISD::CMP";
894  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
895  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
896  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
897  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
898  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
899
900  case ARMISD::CMOV:          return "ARMISD::CMOV";
901  case ARMISD::CAND:          return "ARMISD::CAND";
902  case ARMISD::COR:           return "ARMISD::COR";
903  case ARMISD::CXOR:          return "ARMISD::CXOR";
904
905  case ARMISD::RBIT:          return "ARMISD::RBIT";
906
907  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
908  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
909  case ARMISD::SITOF:         return "ARMISD::SITOF";
910  case ARMISD::UITOF:         return "ARMISD::UITOF";
911
912  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
913  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
914  case ARMISD::RRX:           return "ARMISD::RRX";
915
916  case ARMISD::ADDC:          return "ARMISD::ADDC";
917  case ARMISD::ADDE:          return "ARMISD::ADDE";
918  case ARMISD::SUBC:          return "ARMISD::SUBC";
919  case ARMISD::SUBE:          return "ARMISD::SUBE";
920
921  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
922  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
923
924  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
925  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
926
927  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
928
929  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
930
931  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
932
933  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
934  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
935
936  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
937
938  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
939  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
940  case ARMISD::VCGE:          return "ARMISD::VCGE";
941  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
942  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
943  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
944  case ARMISD::VCGT:          return "ARMISD::VCGT";
945  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
946  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
947  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
948  case ARMISD::VTST:          return "ARMISD::VTST";
949
950  case ARMISD::VSHL:          return "ARMISD::VSHL";
951  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
952  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
953  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
954  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
955  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
956  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
957  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
958  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
959  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
960  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
961  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
962  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
963  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
964  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
965  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
966  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
967  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
968  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
969  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
970  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
971  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
972  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
973  case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
974  case ARMISD::VDUP:          return "ARMISD::VDUP";
975  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
976  case ARMISD::VEXT:          return "ARMISD::VEXT";
977  case ARMISD::VREV64:        return "ARMISD::VREV64";
978  case ARMISD::VREV32:        return "ARMISD::VREV32";
979  case ARMISD::VREV16:        return "ARMISD::VREV16";
980  case ARMISD::VZIP:          return "ARMISD::VZIP";
981  case ARMISD::VUZP:          return "ARMISD::VUZP";
982  case ARMISD::VTRN:          return "ARMISD::VTRN";
983  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
984  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
985  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
986  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
987  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
988  case ARMISD::FMAX:          return "ARMISD::FMAX";
989  case ARMISD::FMIN:          return "ARMISD::FMIN";
990  case ARMISD::BFI:           return "ARMISD::BFI";
991  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
992  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
993  case ARMISD::VBSL:          return "ARMISD::VBSL";
994  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
995  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
996  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
997  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
998  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
999  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
1000  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
1001  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
1002  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
1003  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
1004  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
1005  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
1006  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
1007  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
1008  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
1009  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
1010  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
1011  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
1012  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
1013  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
1014  }
1015}
1016
1017EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
1018  if (!VT.isVector()) return getPointerTy();
1019  return VT.changeVectorElementTypeToInteger();
1020}
1021
1022/// getRegClassFor - Return the register class that should be used for the
1023/// specified value type.
1024const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
1025  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1026  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1027  // load / store 4 to 8 consecutive D registers.
1028  if (Subtarget->hasNEON()) {
1029    if (VT == MVT::v4i64)
1030      return ARM::QQPRRegisterClass;
1031    else if (VT == MVT::v8i64)
1032      return ARM::QQQQPRRegisterClass;
1033  }
1034  return TargetLowering::getRegClassFor(VT);
1035}
1036
1037// Create a fast isel object.
1038FastISel *
1039ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
1040  return ARM::createFastISel(funcInfo);
1041}
1042
1043/// getMaximalGlobalOffset - Returns the maximal possible offset which can
1044/// be used for loads / stores from the global.
1045unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
1046  return (Subtarget->isThumb1Only() ? 127 : 4095);
1047}
1048
1049Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1050  unsigned NumVals = N->getNumValues();
1051  if (!NumVals)
1052    return Sched::RegPressure;
1053
1054  for (unsigned i = 0; i != NumVals; ++i) {
1055    EVT VT = N->getValueType(i);
1056    if (VT == MVT::Glue || VT == MVT::Other)
1057      continue;
1058    if (VT.isFloatingPoint() || VT.isVector())
1059      return Sched::ILP;
1060  }
1061
1062  if (!N->isMachineOpcode())
1063    return Sched::RegPressure;
1064
1065  // Load are scheduled for latency even if there instruction itinerary
1066  // is not available.
1067  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
1068  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1069
1070  if (MCID.getNumDefs() == 0)
1071    return Sched::RegPressure;
1072  if (!Itins->isEmpty() &&
1073      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1074    return Sched::ILP;
1075
1076  return Sched::RegPressure;
1077}
1078
1079//===----------------------------------------------------------------------===//
1080// Lowering Code
1081//===----------------------------------------------------------------------===//
1082
1083/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1084static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1085  switch (CC) {
1086  default: llvm_unreachable("Unknown condition code!");
1087  case ISD::SETNE:  return ARMCC::NE;
1088  case ISD::SETEQ:  return ARMCC::EQ;
1089  case ISD::SETGT:  return ARMCC::GT;
1090  case ISD::SETGE:  return ARMCC::GE;
1091  case ISD::SETLT:  return ARMCC::LT;
1092  case ISD::SETLE:  return ARMCC::LE;
1093  case ISD::SETUGT: return ARMCC::HI;
1094  case ISD::SETUGE: return ARMCC::HS;
1095  case ISD::SETULT: return ARMCC::LO;
1096  case ISD::SETULE: return ARMCC::LS;
1097  }
1098}
1099
1100/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1101static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1102                        ARMCC::CondCodes &CondCode2) {
1103  CondCode2 = ARMCC::AL;
1104  switch (CC) {
1105  default: llvm_unreachable("Unknown FP condition!");
1106  case ISD::SETEQ:
1107  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1108  case ISD::SETGT:
1109  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1110  case ISD::SETGE:
1111  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1112  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1113  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1114  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1115  case ISD::SETO:   CondCode = ARMCC::VC; break;
1116  case ISD::SETUO:  CondCode = ARMCC::VS; break;
1117  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1118  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1119  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1120  case ISD::SETLT:
1121  case ISD::SETULT: CondCode = ARMCC::LT; break;
1122  case ISD::SETLE:
1123  case ISD::SETULE: CondCode = ARMCC::LE; break;
1124  case ISD::SETNE:
1125  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1126  }
1127}
1128
1129//===----------------------------------------------------------------------===//
1130//                      Calling Convention Implementation
1131//===----------------------------------------------------------------------===//
1132
1133#include "ARMGenCallingConv.inc"
1134
1135/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
1136/// given CallingConvention value.
1137CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1138                                                 bool Return,
1139                                                 bool isVarArg) const {
1140  switch (CC) {
1141  default:
1142    llvm_unreachable("Unsupported calling convention");
1143  case CallingConv::Fast:
1144    if (Subtarget->hasVFP2() && !isVarArg) {
1145      if (!Subtarget->isAAPCS_ABI())
1146        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1147      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1148      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1149    }
1150    // Fallthrough
1151  case CallingConv::C: {
1152    // Use target triple & subtarget features to do actual dispatch.
1153    if (!Subtarget->isAAPCS_ABI())
1154      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1155    else if (Subtarget->hasVFP2() &&
1156             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1157             !isVarArg)
1158      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1159    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1160  }
1161  case CallingConv::ARM_AAPCS_VFP:
1162    if (!isVarArg)
1163      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1164    // Fallthrough
1165  case CallingConv::ARM_AAPCS:
1166    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1167  case CallingConv::ARM_APCS:
1168    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1169  }
1170}
1171
1172/// LowerCallResult - Lower the result values of a call into the
1173/// appropriate copies out of appropriate physical registers.
1174SDValue
1175ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1176                                   CallingConv::ID CallConv, bool isVarArg,
1177                                   const SmallVectorImpl<ISD::InputArg> &Ins,
1178                                   DebugLoc dl, SelectionDAG &DAG,
1179                                   SmallVectorImpl<SDValue> &InVals) const {
1180
1181  // Assign locations to each value returned by this call.
1182  SmallVector<CCValAssign, 16> RVLocs;
1183  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1184                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
1185  CCInfo.AnalyzeCallResult(Ins,
1186                           CCAssignFnForNode(CallConv, /* Return*/ true,
1187                                             isVarArg));
1188
1189  // Copy all of the result registers out of their specified physreg.
1190  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1191    CCValAssign VA = RVLocs[i];
1192
1193    SDValue Val;
1194    if (VA.needsCustom()) {
1195      // Handle f64 or half of a v2f64.
1196      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1197                                      InFlag);
1198      Chain = Lo.getValue(1);
1199      InFlag = Lo.getValue(2);
1200      VA = RVLocs[++i]; // skip ahead to next loc
1201      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1202                                      InFlag);
1203      Chain = Hi.getValue(1);
1204      InFlag = Hi.getValue(2);
1205      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1206
1207      if (VA.getLocVT() == MVT::v2f64) {
1208        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1209        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1210                          DAG.getConstant(0, MVT::i32));
1211
1212        VA = RVLocs[++i]; // skip ahead to next loc
1213        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1214        Chain = Lo.getValue(1);
1215        InFlag = Lo.getValue(2);
1216        VA = RVLocs[++i]; // skip ahead to next loc
1217        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1218        Chain = Hi.getValue(1);
1219        InFlag = Hi.getValue(2);
1220        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1221        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1222                          DAG.getConstant(1, MVT::i32));
1223      }
1224    } else {
1225      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1226                               InFlag);
1227      Chain = Val.getValue(1);
1228      InFlag = Val.getValue(2);
1229    }
1230
1231    switch (VA.getLocInfo()) {
1232    default: llvm_unreachable("Unknown loc info!");
1233    case CCValAssign::Full: break;
1234    case CCValAssign::BCvt:
1235      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1236      break;
1237    }
1238
1239    InVals.push_back(Val);
1240  }
1241
1242  return Chain;
1243}
1244
1245/// LowerMemOpCallTo - Store the argument to the stack.
1246SDValue
1247ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1248                                    SDValue StackPtr, SDValue Arg,
1249                                    DebugLoc dl, SelectionDAG &DAG,
1250                                    const CCValAssign &VA,
1251                                    ISD::ArgFlagsTy Flags) const {
1252  unsigned LocMemOffset = VA.getLocMemOffset();
1253  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1254  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1255  return DAG.getStore(Chain, dl, Arg, PtrOff,
1256                      MachinePointerInfo::getStack(LocMemOffset),
1257                      false, false, 0);
1258}
1259
1260void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
1261                                         SDValue Chain, SDValue &Arg,
1262                                         RegsToPassVector &RegsToPass,
1263                                         CCValAssign &VA, CCValAssign &NextVA,
1264                                         SDValue &StackPtr,
1265                                         SmallVector<SDValue, 8> &MemOpChains,
1266                                         ISD::ArgFlagsTy Flags) const {
1267
1268  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1269                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1270  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1271
1272  if (NextVA.isRegLoc())
1273    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
1274  else {
1275    assert(NextVA.isMemLoc());
1276    if (StackPtr.getNode() == 0)
1277      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1278
1279    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1280                                           dl, DAG, NextVA,
1281                                           Flags));
1282  }
1283}
1284
1285/// LowerCall - Lowering a call into a callseq_start <-
1286/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1287/// nodes.
1288SDValue
1289ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1290                             CallingConv::ID CallConv, bool isVarArg,
1291                             bool doesNotRet, bool &isTailCall,
1292                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1293                             const SmallVectorImpl<SDValue> &OutVals,
1294                             const SmallVectorImpl<ISD::InputArg> &Ins,
1295                             DebugLoc dl, SelectionDAG &DAG,
1296                             SmallVectorImpl<SDValue> &InVals) const {
1297  MachineFunction &MF = DAG.getMachineFunction();
1298  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1299  bool IsSibCall = false;
1300  // Disable tail calls if they're not supported.
1301  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
1302    isTailCall = false;
1303  if (isTailCall) {
1304    // Check if it's really possible to do a tail call.
1305    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1306                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1307                                                   Outs, OutVals, Ins, DAG);
1308    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1309    // detected sibcalls.
1310    if (isTailCall) {
1311      ++NumTailCalls;
1312      IsSibCall = true;
1313    }
1314  }
1315
1316  // Analyze operands of the call, assigning locations to each operand.
1317  SmallVector<CCValAssign, 16> ArgLocs;
1318  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1319                 getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1320  CCInfo.AnalyzeCallOperands(Outs,
1321                             CCAssignFnForNode(CallConv, /* Return*/ false,
1322                                               isVarArg));
1323
1324  // Get a count of how many bytes are to be pushed on the stack.
1325  unsigned NumBytes = CCInfo.getNextStackOffset();
1326
1327  // For tail calls, memory operands are available in our caller's stack.
1328  if (IsSibCall)
1329    NumBytes = 0;
1330
1331  // Adjust the stack pointer for the new arguments...
1332  // These operations are automatically eliminated by the prolog/epilog pass
1333  if (!IsSibCall)
1334    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1335
1336  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1337
1338  RegsToPassVector RegsToPass;
1339  SmallVector<SDValue, 8> MemOpChains;
1340
1341  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1342  // of tail call optimization, arguments are handled later.
1343  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1344       i != e;
1345       ++i, ++realArgIdx) {
1346    CCValAssign &VA = ArgLocs[i];
1347    SDValue Arg = OutVals[realArgIdx];
1348    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1349    bool isByVal = Flags.isByVal();
1350
1351    // Promote the value if needed.
1352    switch (VA.getLocInfo()) {
1353    default: llvm_unreachable("Unknown loc info!");
1354    case CCValAssign::Full: break;
1355    case CCValAssign::SExt:
1356      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1357      break;
1358    case CCValAssign::ZExt:
1359      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1360      break;
1361    case CCValAssign::AExt:
1362      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1363      break;
1364    case CCValAssign::BCvt:
1365      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1366      break;
1367    }
1368
1369    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1370    if (VA.needsCustom()) {
1371      if (VA.getLocVT() == MVT::v2f64) {
1372        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1373                                  DAG.getConstant(0, MVT::i32));
1374        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1375                                  DAG.getConstant(1, MVT::i32));
1376
1377        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1378                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1379
1380        VA = ArgLocs[++i]; // skip ahead to next loc
1381        if (VA.isRegLoc()) {
1382          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1383                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1384        } else {
1385          assert(VA.isMemLoc());
1386
1387          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1388                                                 dl, DAG, VA, Flags));
1389        }
1390      } else {
1391        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1392                         StackPtr, MemOpChains, Flags);
1393      }
1394    } else if (VA.isRegLoc()) {
1395      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1396    } else if (isByVal) {
1397      assert(VA.isMemLoc());
1398      unsigned offset = 0;
1399
1400      // True if this byval aggregate will be split between registers
1401      // and memory.
1402      if (CCInfo.isFirstByValRegValid()) {
1403        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1404        unsigned int i, j;
1405        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
1406          SDValue Const = DAG.getConstant(4*i, MVT::i32);
1407          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1408          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1409                                     MachinePointerInfo(),
1410                                     false, false, false, 0);
1411          MemOpChains.push_back(Load.getValue(1));
1412          RegsToPass.push_back(std::make_pair(j, Load));
1413        }
1414        offset = ARM::R4 - CCInfo.getFirstByValReg();
1415        CCInfo.clearFirstByValReg();
1416      }
1417
1418      unsigned LocMemOffset = VA.getLocMemOffset();
1419      SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
1420      SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
1421                                StkPtrOff);
1422      SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
1423      SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
1424      SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
1425                                         MVT::i32);
1426      MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
1427                                          Flags.getByValAlign(),
1428                                          /*isVolatile=*/false,
1429                                          /*AlwaysInline=*/false,
1430                                          MachinePointerInfo(0),
1431                                          MachinePointerInfo(0)));
1432
1433    } else if (!IsSibCall) {
1434      assert(VA.isMemLoc());
1435
1436      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1437                                             dl, DAG, VA, Flags));
1438    }
1439  }
1440
1441  if (!MemOpChains.empty())
1442    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1443                        &MemOpChains[0], MemOpChains.size());
1444
1445  // Build a sequence of copy-to-reg nodes chained together with token chain
1446  // and flag operands which copy the outgoing args into the appropriate regs.
1447  SDValue InFlag;
1448  // Tail call byval lowering might overwrite argument registers so in case of
1449  // tail call optimization the copies to registers are lowered later.
1450  if (!isTailCall)
1451    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1452      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1453                               RegsToPass[i].second, InFlag);
1454      InFlag = Chain.getValue(1);
1455    }
1456
1457  // For tail calls lower the arguments to the 'real' stack slot.
1458  if (isTailCall) {
1459    // Force all the incoming stack arguments to be loaded from the stack
1460    // before any new outgoing arguments are stored to the stack, because the
1461    // outgoing stack slots may alias the incoming argument stack slots, and
1462    // the alias isn't otherwise explicit. This is slightly more conservative
1463    // than necessary, because it means that each store effectively depends
1464    // on every argument instead of just those arguments it would clobber.
1465
1466    // Do not flag preceding copytoreg stuff together with the following stuff.
1467    InFlag = SDValue();
1468    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1469      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1470                               RegsToPass[i].second, InFlag);
1471      InFlag = Chain.getValue(1);
1472    }
1473    InFlag =SDValue();
1474  }
1475
1476  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1477  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1478  // node so that legalize doesn't hack it.
1479  bool isDirect = false;
1480  bool isARMFunc = false;
1481  bool isLocalARMFunc = false;
1482  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1483
1484  if (EnableARMLongCalls) {
1485    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1486            && "long-calls with non-static relocation model!");
1487    // Handle a global address or an external symbol. If it's not one of
1488    // those, the target's already in a register, so we don't need to do
1489    // anything extra.
1490    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1491      const GlobalValue *GV = G->getGlobal();
1492      // Create a constant pool entry for the callee address
1493      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1494      ARMConstantPoolValue *CPV =
1495        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1496
1497      // Get the address of the callee into a register
1498      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1499      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1500      Callee = DAG.getLoad(getPointerTy(), dl,
1501                           DAG.getEntryNode(), CPAddr,
1502                           MachinePointerInfo::getConstantPool(),
1503                           false, false, false, 0);
1504    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1505      const char *Sym = S->getSymbol();
1506
1507      // Create a constant pool entry for the callee address
1508      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1509      ARMConstantPoolValue *CPV =
1510        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1511                                      ARMPCLabelIndex, 0);
1512      // Get the address of the callee into a register
1513      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1514      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1515      Callee = DAG.getLoad(getPointerTy(), dl,
1516                           DAG.getEntryNode(), CPAddr,
1517                           MachinePointerInfo::getConstantPool(),
1518                           false, false, false, 0);
1519    }
1520  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1521    const GlobalValue *GV = G->getGlobal();
1522    isDirect = true;
1523    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1524    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1525                   getTargetMachine().getRelocationModel() != Reloc::Static;
1526    isARMFunc = !Subtarget->isThumb() || isStub;
1527    // ARM call to a local ARM function is predicable.
1528    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1529    // tBX takes a register source operand.
1530    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1531      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1532      ARMConstantPoolValue *CPV =
1533        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
1534      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1535      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1536      Callee = DAG.getLoad(getPointerTy(), dl,
1537                           DAG.getEntryNode(), CPAddr,
1538                           MachinePointerInfo::getConstantPool(),
1539                           false, false, false, 0);
1540      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1541      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1542                           getPointerTy(), Callee, PICLabel);
1543    } else {
1544      // On ELF targets for PIC code, direct calls should go through the PLT
1545      unsigned OpFlags = 0;
1546      if (Subtarget->isTargetELF() &&
1547                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1548        OpFlags = ARMII::MO_PLT;
1549      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1550    }
1551  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1552    isDirect = true;
1553    bool isStub = Subtarget->isTargetDarwin() &&
1554                  getTargetMachine().getRelocationModel() != Reloc::Static;
1555    isARMFunc = !Subtarget->isThumb() || isStub;
1556    // tBX takes a register source operand.
1557    const char *Sym = S->getSymbol();
1558    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1559      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1560      ARMConstantPoolValue *CPV =
1561        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1562                                      ARMPCLabelIndex, 4);
1563      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1564      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1565      Callee = DAG.getLoad(getPointerTy(), dl,
1566                           DAG.getEntryNode(), CPAddr,
1567                           MachinePointerInfo::getConstantPool(),
1568                           false, false, false, 0);
1569      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1570      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1571                           getPointerTy(), Callee, PICLabel);
1572    } else {
1573      unsigned OpFlags = 0;
1574      // On ELF targets for PIC code, direct calls should go through the PLT
1575      if (Subtarget->isTargetELF() &&
1576                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1577        OpFlags = ARMII::MO_PLT;
1578      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1579    }
1580  }
1581
1582  // FIXME: handle tail calls differently.
1583  unsigned CallOpc;
1584  if (Subtarget->isThumb()) {
1585    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1586      CallOpc = ARMISD::CALL_NOLINK;
1587    else if (doesNotRet && isDirect && !isARMFunc &&
1588             Subtarget->hasRAS() && !Subtarget->isThumb1Only())
1589      // "mov lr, pc; b _foo" to avoid confusing the RSP
1590      CallOpc = ARMISD::CALL_NOLINK;
1591    else
1592      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1593  } else {
1594    if (!isDirect && !Subtarget->hasV5TOps()) {
1595      CallOpc = ARMISD::CALL_NOLINK;
1596    } else if (doesNotRet && isDirect && Subtarget->hasRAS())
1597      // "mov lr, pc; b _foo" to avoid confusing the RSP
1598      CallOpc = ARMISD::CALL_NOLINK;
1599    else
1600      CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
1601  }
1602
1603  std::vector<SDValue> Ops;
1604  Ops.push_back(Chain);
1605  Ops.push_back(Callee);
1606
1607  // Add argument registers to the end of the list so that they are known live
1608  // into the call.
1609  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1610    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1611                                  RegsToPass[i].second.getValueType()));
1612
1613  // Add a register mask operand representing the call-preserved registers.
1614  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
1615  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
1616  assert(Mask && "Missing call preserved mask for calling convention");
1617  Ops.push_back(DAG.getRegisterMask(Mask));
1618
1619  if (InFlag.getNode())
1620    Ops.push_back(InFlag);
1621
1622  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1623  if (isTailCall)
1624    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1625
1626  // Returns a chain and a flag for retval copy to use.
1627  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1628  InFlag = Chain.getValue(1);
1629
1630  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1631                             DAG.getIntPtrConstant(0, true), InFlag);
1632  if (!Ins.empty())
1633    InFlag = Chain.getValue(1);
1634
1635  // Handle result values, copying them out of physregs into vregs that we
1636  // return.
1637  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1638                         dl, DAG, InVals);
1639}
1640
1641/// HandleByVal - Every parameter *after* a byval parameter is passed
1642/// on the stack.  Remember the next parameter register to allocate,
1643/// and then confiscate the rest of the parameter registers to insure
1644/// this.
1645void
1646ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
1647  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
1648  assert((State->getCallOrPrologue() == Prologue ||
1649          State->getCallOrPrologue() == Call) &&
1650         "unhandled ParmContext");
1651  if ((!State->isFirstByValRegValid()) &&
1652      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
1653    State->setFirstByValReg(reg);
1654    // At a call site, a byval parameter that is split between
1655    // registers and memory needs its size truncated here.  In a
1656    // function prologue, such byval parameters are reassembled in
1657    // memory, and are not truncated.
1658    if (State->getCallOrPrologue() == Call) {
1659      unsigned excess = 4 * (ARM::R4 - reg);
1660      assert(size >= excess && "expected larger existing stack allocation");
1661      size -= excess;
1662    }
1663  }
1664  // Confiscate any remaining parameter registers to preclude their
1665  // assignment to subsequent parameters.
1666  while (State->AllocateReg(GPRArgRegs, 4))
1667    ;
1668}
1669
1670/// MatchingStackOffset - Return true if the given stack call argument is
1671/// already available in the same position (relatively) of the caller's
1672/// incoming argument stack.
1673static
1674bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1675                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1676                         const TargetInstrInfo *TII) {
1677  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1678  int FI = INT_MAX;
1679  if (Arg.getOpcode() == ISD::CopyFromReg) {
1680    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1681    if (!TargetRegisterInfo::isVirtualRegister(VR))
1682      return false;
1683    MachineInstr *Def = MRI->getVRegDef(VR);
1684    if (!Def)
1685      return false;
1686    if (!Flags.isByVal()) {
1687      if (!TII->isLoadFromStackSlot(Def, FI))
1688        return false;
1689    } else {
1690      return false;
1691    }
1692  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1693    if (Flags.isByVal())
1694      // ByVal argument is passed in as a pointer but it's now being
1695      // dereferenced. e.g.
1696      // define @foo(%struct.X* %A) {
1697      //   tail call @bar(%struct.X* byval %A)
1698      // }
1699      return false;
1700    SDValue Ptr = Ld->getBasePtr();
1701    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1702    if (!FINode)
1703      return false;
1704    FI = FINode->getIndex();
1705  } else
1706    return false;
1707
1708  assert(FI != INT_MAX);
1709  if (!MFI->isFixedObjectIndex(FI))
1710    return false;
1711  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1712}
1713
1714/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1715/// for tail call optimization. Targets which want to do tail call
1716/// optimization should implement this function.
1717bool
1718ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1719                                                     CallingConv::ID CalleeCC,
1720                                                     bool isVarArg,
1721                                                     bool isCalleeStructRet,
1722                                                     bool isCallerStructRet,
1723                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1724                                    const SmallVectorImpl<SDValue> &OutVals,
1725                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1726                                                     SelectionDAG& DAG) const {
1727  const Function *CallerF = DAG.getMachineFunction().getFunction();
1728  CallingConv::ID CallerCC = CallerF->getCallingConv();
1729  bool CCMatch = CallerCC == CalleeCC;
1730
1731  // Look for obvious safe cases to perform tail call optimization that do not
1732  // require ABI changes. This is what gcc calls sibcall.
1733
1734  // Do not sibcall optimize vararg calls unless the call site is not passing
1735  // any arguments.
1736  if (isVarArg && !Outs.empty())
1737    return false;
1738
1739  // Also avoid sibcall optimization if either caller or callee uses struct
1740  // return semantics.
1741  if (isCalleeStructRet || isCallerStructRet)
1742    return false;
1743
1744  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1745  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
1746  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
1747  // support in the assembler and linker to be used. This would need to be
1748  // fixed to fully support tail calls in Thumb1.
1749  //
1750  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1751  // LR.  This means if we need to reload LR, it takes an extra instructions,
1752  // which outweighs the value of the tail call; but here we don't know yet
1753  // whether LR is going to be used.  Probably the right approach is to
1754  // generate the tail call here and turn it back into CALL/RET in
1755  // emitEpilogue if LR is used.
1756
1757  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1758  // but we need to make sure there are enough registers; the only valid
1759  // registers are the 4 used for parameters.  We don't currently do this
1760  // case.
1761  if (Subtarget->isThumb1Only())
1762    return false;
1763
1764  // If the calling conventions do not match, then we'd better make sure the
1765  // results are returned in the same way as what the caller expects.
1766  if (!CCMatch) {
1767    SmallVector<CCValAssign, 16> RVLocs1;
1768    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
1769                       getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
1770    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1771
1772    SmallVector<CCValAssign, 16> RVLocs2;
1773    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
1774                       getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
1775    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1776
1777    if (RVLocs1.size() != RVLocs2.size())
1778      return false;
1779    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1780      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1781        return false;
1782      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1783        return false;
1784      if (RVLocs1[i].isRegLoc()) {
1785        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1786          return false;
1787      } else {
1788        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1789          return false;
1790      }
1791    }
1792  }
1793
1794  // If the callee takes no arguments then go on to check the results of the
1795  // call.
1796  if (!Outs.empty()) {
1797    // Check if stack adjustment is needed. For now, do not do this if any
1798    // argument is passed on the stack.
1799    SmallVector<CCValAssign, 16> ArgLocs;
1800    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
1801                      getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1802    CCInfo.AnalyzeCallOperands(Outs,
1803                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1804    if (CCInfo.getNextStackOffset()) {
1805      MachineFunction &MF = DAG.getMachineFunction();
1806
1807      // Check if the arguments are already laid out in the right way as
1808      // the caller's fixed stack objects.
1809      MachineFrameInfo *MFI = MF.getFrameInfo();
1810      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1811      const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
1812      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1813           i != e;
1814           ++i, ++realArgIdx) {
1815        CCValAssign &VA = ArgLocs[i];
1816        EVT RegVT = VA.getLocVT();
1817        SDValue Arg = OutVals[realArgIdx];
1818        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1819        if (VA.getLocInfo() == CCValAssign::Indirect)
1820          return false;
1821        if (VA.needsCustom()) {
1822          // f64 and vector types are split into multiple registers or
1823          // register/stack-slot combinations.  The types will not match
1824          // the registers; give up on memory f64 refs until we figure
1825          // out what to do about this.
1826          if (!VA.isRegLoc())
1827            return false;
1828          if (!ArgLocs[++i].isRegLoc())
1829            return false;
1830          if (RegVT == MVT::v2f64) {
1831            if (!ArgLocs[++i].isRegLoc())
1832              return false;
1833            if (!ArgLocs[++i].isRegLoc())
1834              return false;
1835          }
1836        } else if (!VA.isRegLoc()) {
1837          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1838                                   MFI, MRI, TII))
1839            return false;
1840        }
1841      }
1842    }
1843  }
1844
1845  return true;
1846}
1847
1848SDValue
1849ARMTargetLowering::LowerReturn(SDValue Chain,
1850                               CallingConv::ID CallConv, bool isVarArg,
1851                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1852                               const SmallVectorImpl<SDValue> &OutVals,
1853                               DebugLoc dl, SelectionDAG &DAG) const {
1854
1855  // CCValAssign - represent the assignment of the return value to a location.
1856  SmallVector<CCValAssign, 16> RVLocs;
1857
1858  // CCState - Info about the registers and stack slots.
1859  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1860                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
1861
1862  // Analyze outgoing return values.
1863  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1864                                               isVarArg));
1865
1866  // If this is the first return lowered for this function, add
1867  // the regs to the liveout set for the function.
1868  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1869    for (unsigned i = 0; i != RVLocs.size(); ++i)
1870      if (RVLocs[i].isRegLoc())
1871        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1872  }
1873
1874  SDValue Flag;
1875
1876  // Copy the result values into the output registers.
1877  for (unsigned i = 0, realRVLocIdx = 0;
1878       i != RVLocs.size();
1879       ++i, ++realRVLocIdx) {
1880    CCValAssign &VA = RVLocs[i];
1881    assert(VA.isRegLoc() && "Can only return in registers!");
1882
1883    SDValue Arg = OutVals[realRVLocIdx];
1884
1885    switch (VA.getLocInfo()) {
1886    default: llvm_unreachable("Unknown loc info!");
1887    case CCValAssign::Full: break;
1888    case CCValAssign::BCvt:
1889      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1890      break;
1891    }
1892
1893    if (VA.needsCustom()) {
1894      if (VA.getLocVT() == MVT::v2f64) {
1895        // Extract the first half and return it in two registers.
1896        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1897                                   DAG.getConstant(0, MVT::i32));
1898        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1899                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
1900
1901        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1902        Flag = Chain.getValue(1);
1903        VA = RVLocs[++i]; // skip ahead to next loc
1904        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1905                                 HalfGPRs.getValue(1), Flag);
1906        Flag = Chain.getValue(1);
1907        VA = RVLocs[++i]; // skip ahead to next loc
1908
1909        // Extract the 2nd half and fall through to handle it as an f64 value.
1910        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1911                          DAG.getConstant(1, MVT::i32));
1912      }
1913      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1914      // available.
1915      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1916                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1917      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1918      Flag = Chain.getValue(1);
1919      VA = RVLocs[++i]; // skip ahead to next loc
1920      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1921                               Flag);
1922    } else
1923      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1924
1925    // Guarantee that all emitted copies are
1926    // stuck together, avoiding something bad.
1927    Flag = Chain.getValue(1);
1928  }
1929
1930  SDValue result;
1931  if (Flag.getNode())
1932    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1933  else // Return Void
1934    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1935
1936  return result;
1937}
1938
1939bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
1940  if (N->getNumValues() != 1)
1941    return false;
1942  if (!N->hasNUsesOfValue(1, 0))
1943    return false;
1944
1945  SDValue TCChain = Chain;
1946  SDNode *Copy = *N->use_begin();
1947  if (Copy->getOpcode() == ISD::CopyToReg) {
1948    // If the copy has a glue operand, we conservatively assume it isn't safe to
1949    // perform a tail call.
1950    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
1951      return false;
1952    TCChain = Copy->getOperand(0);
1953  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
1954    SDNode *VMov = Copy;
1955    // f64 returned in a pair of GPRs.
1956    SmallPtrSet<SDNode*, 2> Copies;
1957    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
1958         UI != UE; ++UI) {
1959      if (UI->getOpcode() != ISD::CopyToReg)
1960        return false;
1961      Copies.insert(*UI);
1962    }
1963    if (Copies.size() > 2)
1964      return false;
1965
1966    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
1967         UI != UE; ++UI) {
1968      SDValue UseChain = UI->getOperand(0);
1969      if (Copies.count(UseChain.getNode()))
1970        // Second CopyToReg
1971        Copy = *UI;
1972      else
1973        // First CopyToReg
1974        TCChain = UseChain;
1975    }
1976  } else if (Copy->getOpcode() == ISD::BITCAST) {
1977    // f32 returned in a single GPR.
1978    if (!Copy->hasOneUse())
1979      return false;
1980    Copy = *Copy->use_begin();
1981    if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
1982      return false;
1983    Chain = Copy->getOperand(0);
1984  } else {
1985    return false;
1986  }
1987
1988  bool HasRet = false;
1989  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
1990       UI != UE; ++UI) {
1991    if (UI->getOpcode() != ARMISD::RET_FLAG)
1992      return false;
1993    HasRet = true;
1994  }
1995
1996  if (!HasRet)
1997    return false;
1998
1999  Chain = TCChain;
2000  return true;
2001}
2002
2003bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2004  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
2005    return false;
2006
2007  if (!CI->isTailCall())
2008    return false;
2009
2010  return !Subtarget->isThumb1Only();
2011}
2012
2013// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2014// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2015// one of the above mentioned nodes. It has to be wrapped because otherwise
2016// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2017// be used to form addressing mode. These wrapped nodes will be selected
2018// into MOVi.
2019static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2020  EVT PtrVT = Op.getValueType();
2021  // FIXME there is no actual debug info here
2022  DebugLoc dl = Op.getDebugLoc();
2023  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2024  SDValue Res;
2025  if (CP->isMachineConstantPoolEntry())
2026    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2027                                    CP->getAlignment());
2028  else
2029    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2030                                    CP->getAlignment());
2031  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2032}
2033
2034unsigned ARMTargetLowering::getJumpTableEncoding() const {
2035  return MachineJumpTableInfo::EK_Inline;
2036}
2037
2038SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2039                                             SelectionDAG &DAG) const {
2040  MachineFunction &MF = DAG.getMachineFunction();
2041  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2042  unsigned ARMPCLabelIndex = 0;
2043  DebugLoc DL = Op.getDebugLoc();
2044  EVT PtrVT = getPointerTy();
2045  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2046  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2047  SDValue CPAddr;
2048  if (RelocM == Reloc::Static) {
2049    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2050  } else {
2051    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2052    ARMPCLabelIndex = AFI->createPICLabelUId();
2053    ARMConstantPoolValue *CPV =
2054      ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2055                                      ARMCP::CPBlockAddress, PCAdj);
2056    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2057  }
2058  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2059  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
2060                               MachinePointerInfo::getConstantPool(),
2061                               false, false, false, 0);
2062  if (RelocM == Reloc::Static)
2063    return Result;
2064  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2065  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2066}
2067
2068// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2069SDValue
2070ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2071                                                 SelectionDAG &DAG) const {
2072  DebugLoc dl = GA->getDebugLoc();
2073  EVT PtrVT = getPointerTy();
2074  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2075  MachineFunction &MF = DAG.getMachineFunction();
2076  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2077  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2078  ARMConstantPoolValue *CPV =
2079    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2080                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2081  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2082  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2083  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
2084                         MachinePointerInfo::getConstantPool(),
2085                         false, false, false, 0);
2086  SDValue Chain = Argument.getValue(1);
2087
2088  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2089  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2090
2091  // call __tls_get_addr.
2092  ArgListTy Args;
2093  ArgListEntry Entry;
2094  Entry.Node = Argument;
2095  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2096  Args.push_back(Entry);
2097  // FIXME: is there useful debug info available here?
2098  std::pair<SDValue, SDValue> CallResult =
2099    LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
2100                false, false, false, false,
2101                0, CallingConv::C, /*isTailCall=*/false,
2102                /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
2103                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
2104  return CallResult.first;
2105}
2106
2107// Lower ISD::GlobalTLSAddress using the "initial exec" or
2108// "local exec" model.
2109SDValue
2110ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2111                                        SelectionDAG &DAG) const {
2112  const GlobalValue *GV = GA->getGlobal();
2113  DebugLoc dl = GA->getDebugLoc();
2114  SDValue Offset;
2115  SDValue Chain = DAG.getEntryNode();
2116  EVT PtrVT = getPointerTy();
2117  // Get the Thread Pointer
2118  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2119
2120  if (GV->isDeclaration()) {
2121    MachineFunction &MF = DAG.getMachineFunction();
2122    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2123    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2124    // Initial exec model.
2125    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2126    ARMConstantPoolValue *CPV =
2127      ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2128                                      ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2129                                      true);
2130    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2131    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2132    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2133                         MachinePointerInfo::getConstantPool(),
2134                         false, false, false, 0);
2135    Chain = Offset.getValue(1);
2136
2137    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2138    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2139
2140    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2141                         MachinePointerInfo::getConstantPool(),
2142                         false, false, false, 0);
2143  } else {
2144    // local exec model
2145    ARMConstantPoolValue *CPV =
2146      ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2147    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2148    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2149    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2150                         MachinePointerInfo::getConstantPool(),
2151                         false, false, false, 0);
2152  }
2153
2154  // The address of the thread local variable is the add of the thread
2155  // pointer with the offset of the variable.
2156  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2157}
2158
2159SDValue
2160ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2161  // TODO: implement the "local dynamic" model
2162  assert(Subtarget->isTargetELF() &&
2163         "TLS not implemented for non-ELF targets");
2164  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2165  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
2166  // otherwise use the "Local Exec" TLS Model
2167  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
2168    return LowerToTLSGeneralDynamicModel(GA, DAG);
2169  else
2170    return LowerToTLSExecModels(GA, DAG);
2171}
2172
2173SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
2174                                                 SelectionDAG &DAG) const {
2175  EVT PtrVT = getPointerTy();
2176  DebugLoc dl = Op.getDebugLoc();
2177  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2178  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2179  if (RelocM == Reloc::PIC_) {
2180    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2181    ARMConstantPoolValue *CPV =
2182      ARMConstantPoolConstant::Create(GV,
2183                                      UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2184    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2185    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2186    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
2187                                 CPAddr,
2188                                 MachinePointerInfo::getConstantPool(),
2189                                 false, false, false, 0);
2190    SDValue Chain = Result.getValue(1);
2191    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2192    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
2193    if (!UseGOTOFF)
2194      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
2195                           MachinePointerInfo::getGOT(),
2196                           false, false, false, 0);
2197    return Result;
2198  }
2199
2200  // If we have T2 ops, we can materialize the address directly via movt/movw
2201  // pair. This is always cheaper.
2202  if (Subtarget->useMovt()) {
2203    ++NumMovwMovt;
2204    // FIXME: Once remat is capable of dealing with instructions with register
2205    // operands, expand this into two nodes.
2206    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2207                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2208  } else {
2209    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2210    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2211    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2212                       MachinePointerInfo::getConstantPool(),
2213                       false, false, false, 0);
2214  }
2215}
2216
2217SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
2218                                                    SelectionDAG &DAG) const {
2219  EVT PtrVT = getPointerTy();
2220  DebugLoc dl = Op.getDebugLoc();
2221  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2222  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2223  MachineFunction &MF = DAG.getMachineFunction();
2224  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2225
2226  // FIXME: Enable this for static codegen when tool issues are fixed.  Also
2227  // update ARMFastISel::ARMMaterializeGV.
2228  if (Subtarget->useMovt() && RelocM != Reloc::Static) {
2229    ++NumMovwMovt;
2230    // FIXME: Once remat is capable of dealing with instructions with register
2231    // operands, expand this into two nodes.
2232    if (RelocM == Reloc::Static)
2233      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2234                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2235
2236    unsigned Wrapper = (RelocM == Reloc::PIC_)
2237      ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
2238    SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
2239                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2240    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2241      Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
2242                           MachinePointerInfo::getGOT(),
2243                           false, false, false, 0);
2244    return Result;
2245  }
2246
2247  unsigned ARMPCLabelIndex = 0;
2248  SDValue CPAddr;
2249  if (RelocM == Reloc::Static) {
2250    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2251  } else {
2252    ARMPCLabelIndex = AFI->createPICLabelUId();
2253    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
2254    ARMConstantPoolValue *CPV =
2255      ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
2256                                      PCAdj);
2257    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2258  }
2259  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2260
2261  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2262                               MachinePointerInfo::getConstantPool(),
2263                               false, false, false, 0);
2264  SDValue Chain = Result.getValue(1);
2265
2266  if (RelocM == Reloc::PIC_) {
2267    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2268    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2269  }
2270
2271  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2272    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
2273                         false, false, false, 0);
2274
2275  return Result;
2276}
2277
2278SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
2279                                                    SelectionDAG &DAG) const {
2280  assert(Subtarget->isTargetELF() &&
2281         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2282  MachineFunction &MF = DAG.getMachineFunction();
2283  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2284  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2285  EVT PtrVT = getPointerTy();
2286  DebugLoc dl = Op.getDebugLoc();
2287  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2288  ARMConstantPoolValue *CPV =
2289    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
2290                                  ARMPCLabelIndex, PCAdj);
2291  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2292  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2293  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2294                               MachinePointerInfo::getConstantPool(),
2295                               false, false, false, 0);
2296  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2297  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2298}
2299
2300SDValue
2301ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
2302  DebugLoc dl = Op.getDebugLoc();
2303  SDValue Val = DAG.getConstant(0, MVT::i32);
2304  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
2305                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
2306                     Op.getOperand(1), Val);
2307}
2308
2309SDValue
2310ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
2311  DebugLoc dl = Op.getDebugLoc();
2312  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
2313                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
2314}
2315
2316SDValue
2317ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2318                                          const ARMSubtarget *Subtarget) const {
2319  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2320  DebugLoc dl = Op.getDebugLoc();
2321  switch (IntNo) {
2322  default: return SDValue();    // Don't custom lower most intrinsics.
2323  case Intrinsic::arm_thread_pointer: {
2324    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2325    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2326  }
2327  case Intrinsic::eh_sjlj_lsda: {
2328    MachineFunction &MF = DAG.getMachineFunction();
2329    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2330    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2331    EVT PtrVT = getPointerTy();
2332    DebugLoc dl = Op.getDebugLoc();
2333    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2334    SDValue CPAddr;
2335    unsigned PCAdj = (RelocM != Reloc::PIC_)
2336      ? 0 : (Subtarget->isThumb() ? 4 : 8);
2337    ARMConstantPoolValue *CPV =
2338      ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
2339                                      ARMCP::CPLSDA, PCAdj);
2340    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2341    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2342    SDValue Result =
2343      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2344                  MachinePointerInfo::getConstantPool(),
2345                  false, false, false, 0);
2346
2347    if (RelocM == Reloc::PIC_) {
2348      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2349      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2350    }
2351    return Result;
2352  }
2353  case Intrinsic::arm_neon_vmulls:
2354  case Intrinsic::arm_neon_vmullu: {
2355    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
2356      ? ARMISD::VMULLs : ARMISD::VMULLu;
2357    return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
2358                       Op.getOperand(1), Op.getOperand(2));
2359  }
2360  }
2361}
2362
2363static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2364                               const ARMSubtarget *Subtarget) {
2365  DebugLoc dl = Op.getDebugLoc();
2366  if (!Subtarget->hasDataBarrier()) {
2367    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2368    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2369    // here.
2370    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2371           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2372    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2373                       DAG.getConstant(0, MVT::i32));
2374  }
2375
2376  SDValue Op5 = Op.getOperand(5);
2377  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
2378  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2379  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2380  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
2381
2382  ARM_MB::MemBOpt DMBOpt;
2383  if (isDeviceBarrier)
2384    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
2385  else
2386    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
2387  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2388                     DAG.getConstant(DMBOpt, MVT::i32));
2389}
2390
2391
2392static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
2393                                 const ARMSubtarget *Subtarget) {
2394  // FIXME: handle "fence singlethread" more efficiently.
2395  DebugLoc dl = Op.getDebugLoc();
2396  if (!Subtarget->hasDataBarrier()) {
2397    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2398    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2399    // here.
2400    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2401           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2402    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2403                       DAG.getConstant(0, MVT::i32));
2404  }
2405
2406  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2407                     DAG.getConstant(ARM_MB::ISH, MVT::i32));
2408}
2409
2410static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
2411                             const ARMSubtarget *Subtarget) {
2412  // ARM pre v5TE and Thumb1 does not have preload instructions.
2413  if (!(Subtarget->isThumb2() ||
2414        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2415    // Just preserve the chain.
2416    return Op.getOperand(0);
2417
2418  DebugLoc dl = Op.getDebugLoc();
2419  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2420  if (!isRead &&
2421      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2422    // ARMv7 with MP extension has PLDW.
2423    return Op.getOperand(0);
2424
2425  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2426  if (Subtarget->isThumb()) {
2427    // Invert the bits.
2428    isRead = ~isRead & 1;
2429    isData = ~isData & 1;
2430  }
2431
2432  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2433                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
2434                     DAG.getConstant(isData, MVT::i32));
2435}
2436
2437static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2438  MachineFunction &MF = DAG.getMachineFunction();
2439  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2440
2441  // vastart just stores the address of the VarArgsFrameIndex slot into the
2442  // memory location argument.
2443  DebugLoc dl = Op.getDebugLoc();
2444  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2445  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2446  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2447  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2448                      MachinePointerInfo(SV), false, false, 0);
2449}
2450
2451SDValue
2452ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2453                                        SDValue &Root, SelectionDAG &DAG,
2454                                        DebugLoc dl) const {
2455  MachineFunction &MF = DAG.getMachineFunction();
2456  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2457
2458  const TargetRegisterClass *RC;
2459  if (AFI->isThumb1OnlyFunction())
2460    RC = ARM::tGPRRegisterClass;
2461  else
2462    RC = ARM::GPRRegisterClass;
2463
2464  // Transform the arguments stored in physical registers into virtual ones.
2465  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2466  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2467
2468  SDValue ArgValue2;
2469  if (NextVA.isMemLoc()) {
2470    MachineFrameInfo *MFI = MF.getFrameInfo();
2471    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2472
2473    // Create load node to retrieve arguments from the stack.
2474    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2475    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2476                            MachinePointerInfo::getFixedStack(FI),
2477                            false, false, false, 0);
2478  } else {
2479    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2480    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2481  }
2482
2483  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2484}
2485
2486void
2487ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
2488                                  unsigned &VARegSize, unsigned &VARegSaveSize)
2489  const {
2490  unsigned NumGPRs;
2491  if (CCInfo.isFirstByValRegValid())
2492    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
2493  else {
2494    unsigned int firstUnalloced;
2495    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
2496                                                sizeof(GPRArgRegs) /
2497                                                sizeof(GPRArgRegs[0]));
2498    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
2499  }
2500
2501  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
2502  VARegSize = NumGPRs * 4;
2503  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2504}
2505
2506// The remaining GPRs hold either the beginning of variable-argument
2507// data, or the beginning of an aggregate passed by value (usuall
2508// byval).  Either way, we allocate stack slots adjacent to the data
2509// provided by our caller, and store the unallocated registers there.
2510// If this is a variadic function, the va_list pointer will begin with
2511// these values; otherwise, this reassembles a (byval) structure that
2512// was split between registers and memory.
2513void
2514ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
2515                                        DebugLoc dl, SDValue &Chain,
2516                                        unsigned ArgOffset) const {
2517  MachineFunction &MF = DAG.getMachineFunction();
2518  MachineFrameInfo *MFI = MF.getFrameInfo();
2519  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2520  unsigned firstRegToSaveIndex;
2521  if (CCInfo.isFirstByValRegValid())
2522    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
2523  else {
2524    firstRegToSaveIndex = CCInfo.getFirstUnallocated
2525      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2526  }
2527
2528  unsigned VARegSize, VARegSaveSize;
2529  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2530  if (VARegSaveSize) {
2531    // If this function is vararg, store any remaining integer argument regs
2532    // to their spots on the stack so that they may be loaded by deferencing
2533    // the result of va_next.
2534    AFI->setVarArgsRegSaveSize(VARegSaveSize);
2535    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
2536                                                     ArgOffset + VARegSaveSize
2537                                                     - VARegSize,
2538                                                     false));
2539    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2540                                    getPointerTy());
2541
2542    SmallVector<SDValue, 4> MemOps;
2543    for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
2544      const TargetRegisterClass *RC;
2545      if (AFI->isThumb1OnlyFunction())
2546        RC = ARM::tGPRRegisterClass;
2547      else
2548        RC = ARM::GPRRegisterClass;
2549
2550      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
2551      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2552      SDValue Store =
2553        DAG.getStore(Val.getValue(1), dl, Val, FIN,
2554                 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
2555                     false, false, 0);
2556      MemOps.push_back(Store);
2557      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2558                        DAG.getConstant(4, getPointerTy()));
2559    }
2560    if (!MemOps.empty())
2561      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2562                          &MemOps[0], MemOps.size());
2563  } else
2564    // This will point to the next argument passed via stack.
2565    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2566}
2567
2568SDValue
2569ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2570                                        CallingConv::ID CallConv, bool isVarArg,
2571                                        const SmallVectorImpl<ISD::InputArg>
2572                                          &Ins,
2573                                        DebugLoc dl, SelectionDAG &DAG,
2574                                        SmallVectorImpl<SDValue> &InVals)
2575                                          const {
2576  MachineFunction &MF = DAG.getMachineFunction();
2577  MachineFrameInfo *MFI = MF.getFrameInfo();
2578
2579  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2580
2581  // Assign locations to all of the incoming arguments.
2582  SmallVector<CCValAssign, 16> ArgLocs;
2583  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2584                    getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
2585  CCInfo.AnalyzeFormalArguments(Ins,
2586                                CCAssignFnForNode(CallConv, /* Return*/ false,
2587                                                  isVarArg));
2588
2589  SmallVector<SDValue, 16> ArgValues;
2590  int lastInsIndex = -1;
2591
2592  SDValue ArgValue;
2593  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2594    CCValAssign &VA = ArgLocs[i];
2595
2596    // Arguments stored in registers.
2597    if (VA.isRegLoc()) {
2598      EVT RegVT = VA.getLocVT();
2599
2600      if (VA.needsCustom()) {
2601        // f64 and vector types are split up into multiple registers or
2602        // combinations of registers and stack slots.
2603        if (VA.getLocVT() == MVT::v2f64) {
2604          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2605                                                   Chain, DAG, dl);
2606          VA = ArgLocs[++i]; // skip ahead to next loc
2607          SDValue ArgValue2;
2608          if (VA.isMemLoc()) {
2609            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2610            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2611            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2612                                    MachinePointerInfo::getFixedStack(FI),
2613                                    false, false, false, 0);
2614          } else {
2615            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2616                                             Chain, DAG, dl);
2617          }
2618          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2619          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2620                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2621          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2622                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2623        } else
2624          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2625
2626      } else {
2627        const TargetRegisterClass *RC;
2628
2629        if (RegVT == MVT::f32)
2630          RC = ARM::SPRRegisterClass;
2631        else if (RegVT == MVT::f64)
2632          RC = ARM::DPRRegisterClass;
2633        else if (RegVT == MVT::v2f64)
2634          RC = ARM::QPRRegisterClass;
2635        else if (RegVT == MVT::i32)
2636          RC = (AFI->isThumb1OnlyFunction() ?
2637                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2638        else
2639          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2640
2641        // Transform the arguments in physical registers into virtual ones.
2642        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2643        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2644      }
2645
2646      // If this is an 8 or 16-bit value, it is really passed promoted
2647      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2648      // truncate to the right size.
2649      switch (VA.getLocInfo()) {
2650      default: llvm_unreachable("Unknown loc info!");
2651      case CCValAssign::Full: break;
2652      case CCValAssign::BCvt:
2653        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2654        break;
2655      case CCValAssign::SExt:
2656        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2657                               DAG.getValueType(VA.getValVT()));
2658        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2659        break;
2660      case CCValAssign::ZExt:
2661        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2662                               DAG.getValueType(VA.getValVT()));
2663        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2664        break;
2665      }
2666
2667      InVals.push_back(ArgValue);
2668
2669    } else { // VA.isRegLoc()
2670
2671      // sanity check
2672      assert(VA.isMemLoc());
2673      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2674
2675      int index = ArgLocs[i].getValNo();
2676
2677      // Some Ins[] entries become multiple ArgLoc[] entries.
2678      // Process them only once.
2679      if (index != lastInsIndex)
2680        {
2681          ISD::ArgFlagsTy Flags = Ins[index].Flags;
2682          // FIXME: For now, all byval parameter objects are marked mutable.
2683          // This can be changed with more analysis.
2684          // In case of tail call optimization mark all arguments mutable.
2685          // Since they could be overwritten by lowering of arguments in case of
2686          // a tail call.
2687          if (Flags.isByVal()) {
2688            unsigned VARegSize, VARegSaveSize;
2689            computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2690            VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
2691            unsigned Bytes = Flags.getByValSize() - VARegSize;
2692            if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2693            int FI = MFI->CreateFixedObject(Bytes,
2694                                            VA.getLocMemOffset(), false);
2695            InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
2696          } else {
2697            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
2698                                            VA.getLocMemOffset(), true);
2699
2700            // Create load nodes to retrieve arguments from the stack.
2701            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2702            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2703                                         MachinePointerInfo::getFixedStack(FI),
2704                                         false, false, false, 0));
2705          }
2706          lastInsIndex = index;
2707        }
2708    }
2709  }
2710
2711  // varargs
2712  if (isVarArg)
2713    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
2714
2715  return Chain;
2716}
2717
2718/// isFloatingPointZero - Return true if this is +0.0.
2719static bool isFloatingPointZero(SDValue Op) {
2720  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2721    return CFP->getValueAPF().isPosZero();
2722  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2723    // Maybe this has already been legalized into the constant pool?
2724    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2725      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2726      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2727        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2728          return CFP->getValueAPF().isPosZero();
2729    }
2730  }
2731  return false;
2732}
2733
2734/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2735/// the given operands.
2736SDValue
2737ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2738                             SDValue &ARMcc, SelectionDAG &DAG,
2739                             DebugLoc dl) const {
2740  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2741    unsigned C = RHSC->getZExtValue();
2742    if (!isLegalICmpImmediate(C)) {
2743      // Constant does not fit, try adjusting it by one?
2744      switch (CC) {
2745      default: break;
2746      case ISD::SETLT:
2747      case ISD::SETGE:
2748        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2749          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2750          RHS = DAG.getConstant(C-1, MVT::i32);
2751        }
2752        break;
2753      case ISD::SETULT:
2754      case ISD::SETUGE:
2755        if (C != 0 && isLegalICmpImmediate(C-1)) {
2756          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2757          RHS = DAG.getConstant(C-1, MVT::i32);
2758        }
2759        break;
2760      case ISD::SETLE:
2761      case ISD::SETGT:
2762        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2763          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2764          RHS = DAG.getConstant(C+1, MVT::i32);
2765        }
2766        break;
2767      case ISD::SETULE:
2768      case ISD::SETUGT:
2769        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2770          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2771          RHS = DAG.getConstant(C+1, MVT::i32);
2772        }
2773        break;
2774      }
2775    }
2776  }
2777
2778  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2779  ARMISD::NodeType CompareType;
2780  switch (CondCode) {
2781  default:
2782    CompareType = ARMISD::CMP;
2783    break;
2784  case ARMCC::EQ:
2785  case ARMCC::NE:
2786    // Uses only Z Flag
2787    CompareType = ARMISD::CMPZ;
2788    break;
2789  }
2790  ARMcc = DAG.getConstant(CondCode, MVT::i32);
2791  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
2792}
2793
2794/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2795SDValue
2796ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2797                             DebugLoc dl) const {
2798  SDValue Cmp;
2799  if (!isFloatingPointZero(RHS))
2800    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
2801  else
2802    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
2803  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
2804}
2805
2806/// duplicateCmp - Glue values can have only one use, so this function
2807/// duplicates a comparison node.
2808SDValue
2809ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
2810  unsigned Opc = Cmp.getOpcode();
2811  DebugLoc DL = Cmp.getDebugLoc();
2812  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
2813    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
2814
2815  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
2816  Cmp = Cmp.getOperand(0);
2817  Opc = Cmp.getOpcode();
2818  if (Opc == ARMISD::CMPFP)
2819    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
2820  else {
2821    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
2822    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
2823  }
2824  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
2825}
2826
2827SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2828  SDValue Cond = Op.getOperand(0);
2829  SDValue SelectTrue = Op.getOperand(1);
2830  SDValue SelectFalse = Op.getOperand(2);
2831  DebugLoc dl = Op.getDebugLoc();
2832
2833  // Convert:
2834  //
2835  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2836  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2837  //
2838  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2839    const ConstantSDNode *CMOVTrue =
2840      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2841    const ConstantSDNode *CMOVFalse =
2842      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2843
2844    if (CMOVTrue && CMOVFalse) {
2845      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2846      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2847
2848      SDValue True;
2849      SDValue False;
2850      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2851        True = SelectTrue;
2852        False = SelectFalse;
2853      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2854        True = SelectFalse;
2855        False = SelectTrue;
2856      }
2857
2858      if (True.getNode() && False.getNode()) {
2859        EVT VT = Op.getValueType();
2860        SDValue ARMcc = Cond.getOperand(2);
2861        SDValue CCR = Cond.getOperand(3);
2862        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
2863        assert(True.getValueType() == VT);
2864        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2865      }
2866    }
2867  }
2868
2869  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
2870  // undefined bits before doing a full-word comparison with zero.
2871  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
2872                     DAG.getConstant(1, Cond.getValueType()));
2873
2874  return DAG.getSelectCC(dl, Cond,
2875                         DAG.getConstant(0, Cond.getValueType()),
2876                         SelectTrue, SelectFalse, ISD::SETNE);
2877}
2878
2879SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2880  EVT VT = Op.getValueType();
2881  SDValue LHS = Op.getOperand(0);
2882  SDValue RHS = Op.getOperand(1);
2883  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2884  SDValue TrueVal = Op.getOperand(2);
2885  SDValue FalseVal = Op.getOperand(3);
2886  DebugLoc dl = Op.getDebugLoc();
2887
2888  if (LHS.getValueType() == MVT::i32) {
2889    SDValue ARMcc;
2890    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2891    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2892    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2893  }
2894
2895  ARMCC::CondCodes CondCode, CondCode2;
2896  FPCCToARMCC(CC, CondCode, CondCode2);
2897
2898  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2899  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2900  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2901  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2902                               ARMcc, CCR, Cmp);
2903  if (CondCode2 != ARMCC::AL) {
2904    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2905    // FIXME: Needs another CMP because flag can have but one use.
2906    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2907    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2908                         Result, TrueVal, ARMcc2, CCR, Cmp2);
2909  }
2910  return Result;
2911}
2912
2913/// canChangeToInt - Given the fp compare operand, return true if it is suitable
2914/// to morph to an integer compare sequence.
2915static bool canChangeToInt(SDValue Op, bool &SeenZero,
2916                           const ARMSubtarget *Subtarget) {
2917  SDNode *N = Op.getNode();
2918  if (!N->hasOneUse())
2919    // Otherwise it requires moving the value from fp to integer registers.
2920    return false;
2921  if (!N->getNumValues())
2922    return false;
2923  EVT VT = Op.getValueType();
2924  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2925    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2926    // vmrs are very slow, e.g. cortex-a8.
2927    return false;
2928
2929  if (isFloatingPointZero(Op)) {
2930    SeenZero = true;
2931    return true;
2932  }
2933  return ISD::isNormalLoad(N);
2934}
2935
2936static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2937  if (isFloatingPointZero(Op))
2938    return DAG.getConstant(0, MVT::i32);
2939
2940  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2941    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2942                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
2943                       Ld->isVolatile(), Ld->isNonTemporal(),
2944                       Ld->isInvariant(), Ld->getAlignment());
2945
2946  llvm_unreachable("Unknown VFP cmp argument!");
2947}
2948
2949static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2950                           SDValue &RetVal1, SDValue &RetVal2) {
2951  if (isFloatingPointZero(Op)) {
2952    RetVal1 = DAG.getConstant(0, MVT::i32);
2953    RetVal2 = DAG.getConstant(0, MVT::i32);
2954    return;
2955  }
2956
2957  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2958    SDValue Ptr = Ld->getBasePtr();
2959    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2960                          Ld->getChain(), Ptr,
2961                          Ld->getPointerInfo(),
2962                          Ld->isVolatile(), Ld->isNonTemporal(),
2963                          Ld->isInvariant(), Ld->getAlignment());
2964
2965    EVT PtrType = Ptr.getValueType();
2966    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2967    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2968                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
2969    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2970                          Ld->getChain(), NewPtr,
2971                          Ld->getPointerInfo().getWithOffset(4),
2972                          Ld->isVolatile(), Ld->isNonTemporal(),
2973                          Ld->isInvariant(), NewAlign);
2974    return;
2975  }
2976
2977  llvm_unreachable("Unknown VFP cmp argument!");
2978}
2979
2980/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2981/// f32 and even f64 comparisons to integer ones.
2982SDValue
2983ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2984  SDValue Chain = Op.getOperand(0);
2985  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2986  SDValue LHS = Op.getOperand(2);
2987  SDValue RHS = Op.getOperand(3);
2988  SDValue Dest = Op.getOperand(4);
2989  DebugLoc dl = Op.getDebugLoc();
2990
2991  bool LHSSeenZero = false;
2992  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
2993  bool RHSSeenZero = false;
2994  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
2995  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
2996    // If unsafe fp math optimization is enabled and there are no other uses of
2997    // the CMP operands, and the condition code is EQ or NE, we can optimize it
2998    // to an integer comparison.
2999    if (CC == ISD::SETOEQ)
3000      CC = ISD::SETEQ;
3001    else if (CC == ISD::SETUNE)
3002      CC = ISD::SETNE;
3003
3004    SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
3005    SDValue ARMcc;
3006    if (LHS.getValueType() == MVT::f32) {
3007      LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3008                        bitcastf32Toi32(LHS, DAG), Mask);
3009      RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3010                        bitcastf32Toi32(RHS, DAG), Mask);
3011      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3012      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3013      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3014                         Chain, Dest, ARMcc, CCR, Cmp);
3015    }
3016
3017    SDValue LHS1, LHS2;
3018    SDValue RHS1, RHS2;
3019    expandf64Toi32(LHS, DAG, LHS1, LHS2);
3020    expandf64Toi32(RHS, DAG, RHS1, RHS2);
3021    LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
3022    RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
3023    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3024    ARMcc = DAG.getConstant(CondCode, MVT::i32);
3025    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3026    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
3027    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
3028  }
3029
3030  return SDValue();
3031}
3032
3033SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3034  SDValue Chain = Op.getOperand(0);
3035  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3036  SDValue LHS = Op.getOperand(2);
3037  SDValue RHS = Op.getOperand(3);
3038  SDValue Dest = Op.getOperand(4);
3039  DebugLoc dl = Op.getDebugLoc();
3040
3041  if (LHS.getValueType() == MVT::i32) {
3042    SDValue ARMcc;
3043    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3044    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3045    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3046                       Chain, Dest, ARMcc, CCR, Cmp);
3047  }
3048
3049  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3050
3051  if (getTargetMachine().Options.UnsafeFPMath &&
3052      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
3053       CC == ISD::SETNE || CC == ISD::SETUNE)) {
3054    SDValue Result = OptimizeVFPBrcond(Op, DAG);
3055    if (Result.getNode())
3056      return Result;
3057  }
3058
3059  ARMCC::CondCodes CondCode, CondCode2;
3060  FPCCToARMCC(CC, CondCode, CondCode2);
3061
3062  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
3063  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3064  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3065  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3066  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
3067  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
3068  if (CondCode2 != ARMCC::AL) {
3069    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
3070    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
3071    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
3072  }
3073  return Res;
3074}
3075
3076SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
3077  SDValue Chain = Op.getOperand(0);
3078  SDValue Table = Op.getOperand(1);
3079  SDValue Index = Op.getOperand(2);
3080  DebugLoc dl = Op.getDebugLoc();
3081
3082  EVT PTy = getPointerTy();
3083  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
3084  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3085  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
3086  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
3087  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
3088  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
3089  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
3090  if (Subtarget->isThumb2()) {
3091    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
3092    // which does another jump to the destination. This also makes it easier
3093    // to translate it to TBB / TBH later.
3094    // FIXME: This might not work if the function is extremely large.
3095    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
3096                       Addr, Op.getOperand(2), JTI, UId);
3097  }
3098  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
3099    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
3100                       MachinePointerInfo::getJumpTable(),
3101                       false, false, false, 0);
3102    Chain = Addr.getValue(1);
3103    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
3104    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3105  } else {
3106    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
3107                       MachinePointerInfo::getJumpTable(),
3108                       false, false, false, 0);
3109    Chain = Addr.getValue(1);
3110    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3111  }
3112}
3113
3114static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3115  EVT VT = Op.getValueType();
3116  DebugLoc dl = Op.getDebugLoc();
3117
3118  if (Op.getValueType().getVectorElementType() == MVT::i32) {
3119    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
3120      return Op;
3121    return DAG.UnrollVectorOp(Op.getNode());
3122  }
3123
3124  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
3125         "Invalid type for custom lowering!");
3126  if (VT != MVT::v4i16)
3127    return DAG.UnrollVectorOp(Op.getNode());
3128
3129  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
3130  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
3131}
3132
3133static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3134  EVT VT = Op.getValueType();
3135  if (VT.isVector())
3136    return LowerVectorFP_TO_INT(Op, DAG);
3137
3138  DebugLoc dl = Op.getDebugLoc();
3139  unsigned Opc;
3140
3141  switch (Op.getOpcode()) {
3142  default: llvm_unreachable("Invalid opcode!");
3143  case ISD::FP_TO_SINT:
3144    Opc = ARMISD::FTOSI;
3145    break;
3146  case ISD::FP_TO_UINT:
3147    Opc = ARMISD::FTOUI;
3148    break;
3149  }
3150  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
3151  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3152}
3153
3154static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3155  EVT VT = Op.getValueType();
3156  DebugLoc dl = Op.getDebugLoc();
3157
3158  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
3159    if (VT.getVectorElementType() == MVT::f32)
3160      return Op;
3161    return DAG.UnrollVectorOp(Op.getNode());
3162  }
3163
3164  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
3165         "Invalid type for custom lowering!");
3166  if (VT != MVT::v4f32)
3167    return DAG.UnrollVectorOp(Op.getNode());
3168
3169  unsigned CastOpc;
3170  unsigned Opc;
3171  switch (Op.getOpcode()) {
3172  default: llvm_unreachable("Invalid opcode!");
3173  case ISD::SINT_TO_FP:
3174    CastOpc = ISD::SIGN_EXTEND;
3175    Opc = ISD::SINT_TO_FP;
3176    break;
3177  case ISD::UINT_TO_FP:
3178    CastOpc = ISD::ZERO_EXTEND;
3179    Opc = ISD::UINT_TO_FP;
3180    break;
3181  }
3182
3183  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
3184  return DAG.getNode(Opc, dl, VT, Op);
3185}
3186
3187static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3188  EVT VT = Op.getValueType();
3189  if (VT.isVector())
3190    return LowerVectorINT_TO_FP(Op, DAG);
3191
3192  DebugLoc dl = Op.getDebugLoc();
3193  unsigned Opc;
3194
3195  switch (Op.getOpcode()) {
3196  default: llvm_unreachable("Invalid opcode!");
3197  case ISD::SINT_TO_FP:
3198    Opc = ARMISD::SITOF;
3199    break;
3200  case ISD::UINT_TO_FP:
3201    Opc = ARMISD::UITOF;
3202    break;
3203  }
3204
3205  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
3206  return DAG.getNode(Opc, dl, VT, Op);
3207}
3208
3209SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
3210  // Implement fcopysign with a fabs and a conditional fneg.
3211  SDValue Tmp0 = Op.getOperand(0);
3212  SDValue Tmp1 = Op.getOperand(1);
3213  DebugLoc dl = Op.getDebugLoc();
3214  EVT VT = Op.getValueType();
3215  EVT SrcVT = Tmp1.getValueType();
3216  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
3217    Tmp0.getOpcode() == ARMISD::VMOVDRR;
3218  bool UseNEON = !InGPR && Subtarget->hasNEON();
3219
3220  if (UseNEON) {
3221    // Use VBSL to copy the sign bit.
3222    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
3223    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
3224                               DAG.getTargetConstant(EncodedVal, MVT::i32));
3225    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
3226    if (VT == MVT::f64)
3227      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3228                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
3229                         DAG.getConstant(32, MVT::i32));
3230    else /*if (VT == MVT::f32)*/
3231      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
3232    if (SrcVT == MVT::f32) {
3233      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
3234      if (VT == MVT::f64)
3235        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3236                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
3237                           DAG.getConstant(32, MVT::i32));
3238    } else if (VT == MVT::f32)
3239      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
3240                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
3241                         DAG.getConstant(32, MVT::i32));
3242    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
3243    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
3244
3245    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
3246                                            MVT::i32);
3247    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
3248    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
3249                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
3250
3251    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
3252                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
3253                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
3254    if (VT == MVT::f32) {
3255      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
3256      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
3257                        DAG.getConstant(0, MVT::i32));
3258    } else {
3259      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
3260    }
3261
3262    return Res;
3263  }
3264
3265  // Bitcast operand 1 to i32.
3266  if (SrcVT == MVT::f64)
3267    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3268                       &Tmp1, 1).getValue(1);
3269  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
3270
3271  // Or in the signbit with integer operations.
3272  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
3273  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
3274  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
3275  if (VT == MVT::f32) {
3276    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
3277                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
3278    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
3279                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
3280  }
3281
3282  // f64: Or the high part with signbit and then combine two parts.
3283  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3284                     &Tmp0, 1);
3285  SDValue Lo = Tmp0.getValue(0);
3286  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
3287  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
3288  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
3289}
3290
3291SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
3292  MachineFunction &MF = DAG.getMachineFunction();
3293  MachineFrameInfo *MFI = MF.getFrameInfo();
3294  MFI->setReturnAddressIsTaken(true);
3295
3296  EVT VT = Op.getValueType();
3297  DebugLoc dl = Op.getDebugLoc();
3298  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3299  if (Depth) {
3300    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
3301    SDValue Offset = DAG.getConstant(4, MVT::i32);
3302    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
3303                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
3304                       MachinePointerInfo(), false, false, false, 0);
3305  }
3306
3307  // Return LR, which contains the return address. Mark it an implicit live-in.
3308  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3309  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
3310}
3311
3312SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
3313  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3314  MFI->setFrameAddressIsTaken(true);
3315
3316  EVT VT = Op.getValueType();
3317  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
3318  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3319  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
3320    ? ARM::R7 : ARM::R11;
3321  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
3322  while (Depth--)
3323    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
3324                            MachinePointerInfo(),
3325                            false, false, false, 0);
3326  return FrameAddr;
3327}
3328
3329/// ExpandBITCAST - If the target supports VFP, this function is called to
3330/// expand a bit convert where either the source or destination type is i64 to
3331/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
3332/// operand type is illegal (e.g., v2f32 for a target that doesn't support
3333/// vectors), since the legalizer won't know what to do with that.
3334static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
3335  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3336  DebugLoc dl = N->getDebugLoc();
3337  SDValue Op = N->getOperand(0);
3338
3339  // This function is only supposed to be called for i64 types, either as the
3340  // source or destination of the bit convert.
3341  EVT SrcVT = Op.getValueType();
3342  EVT DstVT = N->getValueType(0);
3343  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
3344         "ExpandBITCAST called for non-i64 type");
3345
3346  // Turn i64->f64 into VMOVDRR.
3347  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
3348    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3349                             DAG.getConstant(0, MVT::i32));
3350    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3351                             DAG.getConstant(1, MVT::i32));
3352    return DAG.getNode(ISD::BITCAST, dl, DstVT,
3353                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
3354  }
3355
3356  // Turn f64->i64 into VMOVRRD.
3357  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
3358    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3359                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
3360    // Merge the pieces into a single i64 value.
3361    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
3362  }
3363
3364  return SDValue();
3365}
3366
3367/// getZeroVector - Returns a vector of specified type with all zero elements.
3368/// Zero vectors are used to represent vector negation and in those cases
3369/// will be implemented with the NEON VNEG instruction.  However, VNEG does
3370/// not support i64 elements, so sometimes the zero vectors will need to be
3371/// explicitly constructed.  Regardless, use a canonical VMOV to create the
3372/// zero vector.
3373static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
3374  assert(VT.isVector() && "Expected a vector type");
3375  // The canonical modified immediate encoding of a zero vector is....0!
3376  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
3377  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
3378  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
3379  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3380}
3381
3382/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
3383/// i32 values and take a 2 x i32 value to shift plus a shift amount.
3384SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
3385                                                SelectionDAG &DAG) const {
3386  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
3387  EVT VT = Op.getValueType();
3388  unsigned VTBits = VT.getSizeInBits();
3389  DebugLoc dl = Op.getDebugLoc();
3390  SDValue ShOpLo = Op.getOperand(0);
3391  SDValue ShOpHi = Op.getOperand(1);
3392  SDValue ShAmt  = Op.getOperand(2);
3393  SDValue ARMcc;
3394  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
3395
3396  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
3397
3398  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
3399                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
3400  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
3401  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
3402                                   DAG.getConstant(VTBits, MVT::i32));
3403  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
3404  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
3405  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
3406
3407  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3408  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
3409                          ARMcc, DAG, dl);
3410  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
3411  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
3412                           CCR, Cmp);
3413
3414  SDValue Ops[2] = { Lo, Hi };
3415  return DAG.getMergeValues(Ops, 2, dl);
3416}
3417
3418/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
3419/// i32 values and take a 2 x i32 value to shift plus a shift amount.
3420SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
3421                                               SelectionDAG &DAG) const {
3422  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
3423  EVT VT = Op.getValueType();
3424  unsigned VTBits = VT.getSizeInBits();
3425  DebugLoc dl = Op.getDebugLoc();
3426  SDValue ShOpLo = Op.getOperand(0);
3427  SDValue ShOpHi = Op.getOperand(1);
3428  SDValue ShAmt  = Op.getOperand(2);
3429  SDValue ARMcc;
3430
3431  assert(Op.getOpcode() == ISD::SHL_PARTS);
3432  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
3433                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
3434  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
3435  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
3436                                   DAG.getConstant(VTBits, MVT::i32));
3437  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
3438  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
3439
3440  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
3441  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3442  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
3443                          ARMcc, DAG, dl);
3444  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
3445  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
3446                           CCR, Cmp);
3447
3448  SDValue Ops[2] = { Lo, Hi };
3449  return DAG.getMergeValues(Ops, 2, dl);
3450}
3451
3452SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3453                                            SelectionDAG &DAG) const {
3454  // The rounding mode is in bits 23:22 of the FPSCR.
3455  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3456  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3457  // so that the shift + and get folded into a bitfield extract.
3458  DebugLoc dl = Op.getDebugLoc();
3459  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
3460                              DAG.getConstant(Intrinsic::arm_get_fpscr,
3461                                              MVT::i32));
3462  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
3463                                  DAG.getConstant(1U << 22, MVT::i32));
3464  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3465                              DAG.getConstant(22, MVT::i32));
3466  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3467                     DAG.getConstant(3, MVT::i32));
3468}
3469
3470static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
3471                         const ARMSubtarget *ST) {
3472  EVT VT = N->getValueType(0);
3473  DebugLoc dl = N->getDebugLoc();
3474
3475  if (!ST->hasV6T2Ops())
3476    return SDValue();
3477
3478  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
3479  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
3480}
3481
3482static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
3483                          const ARMSubtarget *ST) {
3484  EVT VT = N->getValueType(0);
3485  DebugLoc dl = N->getDebugLoc();
3486
3487  if (!VT.isVector())
3488    return SDValue();
3489
3490  // Lower vector shifts on NEON to use VSHL.
3491  assert(ST->hasNEON() && "unexpected vector shift");
3492
3493  // Left shifts translate directly to the vshiftu intrinsic.
3494  if (N->getOpcode() == ISD::SHL)
3495    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
3496                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
3497                       N->getOperand(0), N->getOperand(1));
3498
3499  assert((N->getOpcode() == ISD::SRA ||
3500          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
3501
3502  // NEON uses the same intrinsics for both left and right shifts.  For
3503  // right shifts, the shift amounts are negative, so negate the vector of
3504  // shift amounts.
3505  EVT ShiftVT = N->getOperand(1).getValueType();
3506  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
3507                                     getZeroVector(ShiftVT, DAG, dl),
3508                                     N->getOperand(1));
3509  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
3510                             Intrinsic::arm_neon_vshifts :
3511                             Intrinsic::arm_neon_vshiftu);
3512  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
3513                     DAG.getConstant(vshiftInt, MVT::i32),
3514                     N->getOperand(0), NegatedCount);
3515}
3516
3517static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
3518                                const ARMSubtarget *ST) {
3519  EVT VT = N->getValueType(0);
3520  DebugLoc dl = N->getDebugLoc();
3521
3522  // We can get here for a node like i32 = ISD::SHL i32, i64
3523  if (VT != MVT::i64)
3524    return SDValue();
3525
3526  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
3527         "Unknown shift to lower!");
3528
3529  // We only lower SRA, SRL of 1 here, all others use generic lowering.
3530  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
3531      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
3532    return SDValue();
3533
3534  // If we are in thumb mode, we don't have RRX.
3535  if (ST->isThumb1Only()) return SDValue();
3536
3537  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
3538  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
3539                           DAG.getConstant(0, MVT::i32));
3540  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
3541                           DAG.getConstant(1, MVT::i32));
3542
3543  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
3544  // captures the result into a carry flag.
3545  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
3546  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
3547
3548  // The low part is an ARMISD::RRX operand, which shifts the carry in.
3549  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
3550
3551  // Merge the pieces into a single i64 value.
3552 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
3553}
3554
3555static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
3556  SDValue TmpOp0, TmpOp1;
3557  bool Invert = false;
3558  bool Swap = false;
3559  unsigned Opc = 0;
3560
3561  SDValue Op0 = Op.getOperand(0);
3562  SDValue Op1 = Op.getOperand(1);
3563  SDValue CC = Op.getOperand(2);
3564  EVT VT = Op.getValueType();
3565  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3566  DebugLoc dl = Op.getDebugLoc();
3567
3568  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
3569    switch (SetCCOpcode) {
3570    default: llvm_unreachable("Illegal FP comparison");
3571    case ISD::SETUNE:
3572    case ISD::SETNE:  Invert = true; // Fallthrough
3573    case ISD::SETOEQ:
3574    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3575    case ISD::SETOLT:
3576    case ISD::SETLT: Swap = true; // Fallthrough
3577    case ISD::SETOGT:
3578    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3579    case ISD::SETOLE:
3580    case ISD::SETLE:  Swap = true; // Fallthrough
3581    case ISD::SETOGE:
3582    case ISD::SETGE: Opc = ARMISD::VCGE; break;
3583    case ISD::SETUGE: Swap = true; // Fallthrough
3584    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
3585    case ISD::SETUGT: Swap = true; // Fallthrough
3586    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
3587    case ISD::SETUEQ: Invert = true; // Fallthrough
3588    case ISD::SETONE:
3589      // Expand this to (OLT | OGT).
3590      TmpOp0 = Op0;
3591      TmpOp1 = Op1;
3592      Opc = ISD::OR;
3593      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3594      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
3595      break;
3596    case ISD::SETUO: Invert = true; // Fallthrough
3597    case ISD::SETO:
3598      // Expand this to (OLT | OGE).
3599      TmpOp0 = Op0;
3600      TmpOp1 = Op1;
3601      Opc = ISD::OR;
3602      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3603      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
3604      break;
3605    }
3606  } else {
3607    // Integer comparisons.
3608    switch (SetCCOpcode) {
3609    default: llvm_unreachable("Illegal integer comparison");
3610    case ISD::SETNE:  Invert = true;
3611    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3612    case ISD::SETLT:  Swap = true;
3613    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3614    case ISD::SETLE:  Swap = true;
3615    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
3616    case ISD::SETULT: Swap = true;
3617    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
3618    case ISD::SETULE: Swap = true;
3619    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
3620    }
3621
3622    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
3623    if (Opc == ARMISD::VCEQ) {
3624
3625      SDValue AndOp;
3626      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3627        AndOp = Op0;
3628      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
3629        AndOp = Op1;
3630
3631      // Ignore bitconvert.
3632      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
3633        AndOp = AndOp.getOperand(0);
3634
3635      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
3636        Opc = ARMISD::VTST;
3637        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
3638        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
3639        Invert = !Invert;
3640      }
3641    }
3642  }
3643
3644  if (Swap)
3645    std::swap(Op0, Op1);
3646
3647  // If one of the operands is a constant vector zero, attempt to fold the
3648  // comparison to a specialized compare-against-zero form.
3649  SDValue SingleOp;
3650  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3651    SingleOp = Op0;
3652  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
3653    if (Opc == ARMISD::VCGE)
3654      Opc = ARMISD::VCLEZ;
3655    else if (Opc == ARMISD::VCGT)
3656      Opc = ARMISD::VCLTZ;
3657    SingleOp = Op1;
3658  }
3659
3660  SDValue Result;
3661  if (SingleOp.getNode()) {
3662    switch (Opc) {
3663    case ARMISD::VCEQ:
3664      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
3665    case ARMISD::VCGE:
3666      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
3667    case ARMISD::VCLEZ:
3668      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
3669    case ARMISD::VCGT:
3670      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
3671    case ARMISD::VCLTZ:
3672      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
3673    default:
3674      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3675    }
3676  } else {
3677     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3678  }
3679
3680  if (Invert)
3681    Result = DAG.getNOT(dl, Result, VT);
3682
3683  return Result;
3684}
3685
3686/// isNEONModifiedImm - Check if the specified splat value corresponds to a
3687/// valid vector constant for a NEON instruction with a "modified immediate"
3688/// operand (e.g., VMOV).  If so, return the encoded value.
3689static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3690                                 unsigned SplatBitSize, SelectionDAG &DAG,
3691                                 EVT &VT, bool is128Bits, NEONModImmType type) {
3692  unsigned OpCmode, Imm;
3693
3694  // SplatBitSize is set to the smallest size that splats the vector, so a
3695  // zero vector will always have SplatBitSize == 8.  However, NEON modified
3696  // immediate instructions others than VMOV do not support the 8-bit encoding
3697  // of a zero vector, and the default encoding of zero is supposed to be the
3698  // 32-bit version.
3699  if (SplatBits == 0)
3700    SplatBitSize = 32;
3701
3702  switch (SplatBitSize) {
3703  case 8:
3704    if (type != VMOVModImm)
3705      return SDValue();
3706    // Any 1-byte value is OK.  Op=0, Cmode=1110.
3707    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3708    OpCmode = 0xe;
3709    Imm = SplatBits;
3710    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3711    break;
3712
3713  case 16:
3714    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3715    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3716    if ((SplatBits & ~0xff) == 0) {
3717      // Value = 0x00nn: Op=x, Cmode=100x.
3718      OpCmode = 0x8;
3719      Imm = SplatBits;
3720      break;
3721    }
3722    if ((SplatBits & ~0xff00) == 0) {
3723      // Value = 0xnn00: Op=x, Cmode=101x.
3724      OpCmode = 0xa;
3725      Imm = SplatBits >> 8;
3726      break;
3727    }
3728    return SDValue();
3729
3730  case 32:
3731    // NEON's 32-bit VMOV supports splat values where:
3732    // * only one byte is nonzero, or
3733    // * the least significant byte is 0xff and the second byte is nonzero, or
3734    // * the least significant 2 bytes are 0xff and the third is nonzero.
3735    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3736    if ((SplatBits & ~0xff) == 0) {
3737      // Value = 0x000000nn: Op=x, Cmode=000x.
3738      OpCmode = 0;
3739      Imm = SplatBits;
3740      break;
3741    }
3742    if ((SplatBits & ~0xff00) == 0) {
3743      // Value = 0x0000nn00: Op=x, Cmode=001x.
3744      OpCmode = 0x2;
3745      Imm = SplatBits >> 8;
3746      break;
3747    }
3748    if ((SplatBits & ~0xff0000) == 0) {
3749      // Value = 0x00nn0000: Op=x, Cmode=010x.
3750      OpCmode = 0x4;
3751      Imm = SplatBits >> 16;
3752      break;
3753    }
3754    if ((SplatBits & ~0xff000000) == 0) {
3755      // Value = 0xnn000000: Op=x, Cmode=011x.
3756      OpCmode = 0x6;
3757      Imm = SplatBits >> 24;
3758      break;
3759    }
3760
3761    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
3762    if (type == OtherModImm) return SDValue();
3763
3764    if ((SplatBits & ~0xffff) == 0 &&
3765        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3766      // Value = 0x0000nnff: Op=x, Cmode=1100.
3767      OpCmode = 0xc;
3768      Imm = SplatBits >> 8;
3769      SplatBits |= 0xff;
3770      break;
3771    }
3772
3773    if ((SplatBits & ~0xffffff) == 0 &&
3774        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3775      // Value = 0x00nnffff: Op=x, Cmode=1101.
3776      OpCmode = 0xd;
3777      Imm = SplatBits >> 16;
3778      SplatBits |= 0xffff;
3779      break;
3780    }
3781
3782    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3783    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3784    // VMOV.I32.  A (very) minor optimization would be to replicate the value
3785    // and fall through here to test for a valid 64-bit splat.  But, then the
3786    // caller would also need to check and handle the change in size.
3787    return SDValue();
3788
3789  case 64: {
3790    if (type != VMOVModImm)
3791      return SDValue();
3792    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3793    uint64_t BitMask = 0xff;
3794    uint64_t Val = 0;
3795    unsigned ImmMask = 1;
3796    Imm = 0;
3797    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3798      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3799        Val |= BitMask;
3800        Imm |= ImmMask;
3801      } else if ((SplatBits & BitMask) != 0) {
3802        return SDValue();
3803      }
3804      BitMask <<= 8;
3805      ImmMask <<= 1;
3806    }
3807    // Op=1, Cmode=1110.
3808    OpCmode = 0x1e;
3809    SplatBits = Val;
3810    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3811    break;
3812  }
3813
3814  default:
3815    llvm_unreachable("unexpected size for isNEONModifiedImm");
3816  }
3817
3818  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3819  return DAG.getTargetConstant(EncodedVal, MVT::i32);
3820}
3821
3822SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
3823                                           const ARMSubtarget *ST) const {
3824  if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
3825    return SDValue();
3826
3827  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
3828  assert(Op.getValueType() == MVT::f32 &&
3829         "ConstantFP custom lowering should only occur for f32.");
3830
3831  // Try splatting with a VMOV.f32...
3832  APFloat FPVal = CFP->getValueAPF();
3833  int ImmVal = ARM_AM::getFP32Imm(FPVal);
3834  if (ImmVal != -1) {
3835    DebugLoc DL = Op.getDebugLoc();
3836    SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
3837    SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
3838                                      NewVal);
3839    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
3840                       DAG.getConstant(0, MVT::i32));
3841  }
3842
3843  // If that fails, try a VMOV.i32
3844  EVT VMovVT;
3845  unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
3846  SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
3847                                     VMOVModImm);
3848  if (NewVal != SDValue()) {
3849    DebugLoc DL = Op.getDebugLoc();
3850    SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
3851                                      NewVal);
3852    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
3853                                       VecConstant);
3854    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
3855                       DAG.getConstant(0, MVT::i32));
3856  }
3857
3858  // Finally, try a VMVN.i32
3859  NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
3860                             VMVNModImm);
3861  if (NewVal != SDValue()) {
3862    DebugLoc DL = Op.getDebugLoc();
3863    SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
3864    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
3865                                       VecConstant);
3866    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
3867                       DAG.getConstant(0, MVT::i32));
3868  }
3869
3870  return SDValue();
3871}
3872
3873
3874static bool isVEXTMask(ArrayRef<int> M, EVT VT,
3875                       bool &ReverseVEXT, unsigned &Imm) {
3876  unsigned NumElts = VT.getVectorNumElements();
3877  ReverseVEXT = false;
3878
3879  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3880  if (M[0] < 0)
3881    return false;
3882
3883  Imm = M[0];
3884
3885  // If this is a VEXT shuffle, the immediate value is the index of the first
3886  // element.  The other shuffle indices must be the successive elements after
3887  // the first one.
3888  unsigned ExpectedElt = Imm;
3889  for (unsigned i = 1; i < NumElts; ++i) {
3890    // Increment the expected index.  If it wraps around, it may still be
3891    // a VEXT but the source vectors must be swapped.
3892    ExpectedElt += 1;
3893    if (ExpectedElt == NumElts * 2) {
3894      ExpectedElt = 0;
3895      ReverseVEXT = true;
3896    }
3897
3898    if (M[i] < 0) continue; // ignore UNDEF indices
3899    if (ExpectedElt != static_cast<unsigned>(M[i]))
3900      return false;
3901  }
3902
3903  // Adjust the index value if the source operands will be swapped.
3904  if (ReverseVEXT)
3905    Imm -= NumElts;
3906
3907  return true;
3908}
3909
3910/// isVREVMask - Check if a vector shuffle corresponds to a VREV
3911/// instruction with the specified blocksize.  (The order of the elements
3912/// within each block of the vector is reversed.)
3913static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
3914  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3915         "Only possible block sizes for VREV are: 16, 32, 64");
3916
3917  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3918  if (EltSz == 64)
3919    return false;
3920
3921  unsigned NumElts = VT.getVectorNumElements();
3922  unsigned BlockElts = M[0] + 1;
3923  // If the first shuffle index is UNDEF, be optimistic.
3924  if (M[0] < 0)
3925    BlockElts = BlockSize / EltSz;
3926
3927  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3928    return false;
3929
3930  for (unsigned i = 0; i < NumElts; ++i) {
3931    if (M[i] < 0) continue; // ignore UNDEF indices
3932    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3933      return false;
3934  }
3935
3936  return true;
3937}
3938
3939static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
3940  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
3941  // range, then 0 is placed into the resulting vector. So pretty much any mask
3942  // of 8 elements can work here.
3943  return VT == MVT::v8i8 && M.size() == 8;
3944}
3945
3946static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
3947  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3948  if (EltSz == 64)
3949    return false;
3950
3951  unsigned NumElts = VT.getVectorNumElements();
3952  WhichResult = (M[0] == 0 ? 0 : 1);
3953  for (unsigned i = 0; i < NumElts; i += 2) {
3954    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3955        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3956      return false;
3957  }
3958  return true;
3959}
3960
3961/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3962/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3963/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3964static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
3965  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3966  if (EltSz == 64)
3967    return false;
3968
3969  unsigned NumElts = VT.getVectorNumElements();
3970  WhichResult = (M[0] == 0 ? 0 : 1);
3971  for (unsigned i = 0; i < NumElts; i += 2) {
3972    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3973        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
3974      return false;
3975  }
3976  return true;
3977}
3978
3979static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
3980  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3981  if (EltSz == 64)
3982    return false;
3983
3984  unsigned NumElts = VT.getVectorNumElements();
3985  WhichResult = (M[0] == 0 ? 0 : 1);
3986  for (unsigned i = 0; i != NumElts; ++i) {
3987    if (M[i] < 0) continue; // ignore UNDEF indices
3988    if ((unsigned) M[i] != 2 * i + WhichResult)
3989      return false;
3990  }
3991
3992  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3993  if (VT.is64BitVector() && EltSz == 32)
3994    return false;
3995
3996  return true;
3997}
3998
3999/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
4000/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4001/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
4002static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4003  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4004  if (EltSz == 64)
4005    return false;
4006
4007  unsigned Half = VT.getVectorNumElements() / 2;
4008  WhichResult = (M[0] == 0 ? 0 : 1);
4009  for (unsigned j = 0; j != 2; ++j) {
4010    unsigned Idx = WhichResult;
4011    for (unsigned i = 0; i != Half; ++i) {
4012      int MIdx = M[i + j * Half];
4013      if (MIdx >= 0 && (unsigned) MIdx != Idx)
4014        return false;
4015      Idx += 2;
4016    }
4017  }
4018
4019  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4020  if (VT.is64BitVector() && EltSz == 32)
4021    return false;
4022
4023  return true;
4024}
4025
4026static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4027  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4028  if (EltSz == 64)
4029    return false;
4030
4031  unsigned NumElts = VT.getVectorNumElements();
4032  WhichResult = (M[0] == 0 ? 0 : 1);
4033  unsigned Idx = WhichResult * NumElts / 2;
4034  for (unsigned i = 0; i != NumElts; i += 2) {
4035    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
4036        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
4037      return false;
4038    Idx += 1;
4039  }
4040
4041  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4042  if (VT.is64BitVector() && EltSz == 32)
4043    return false;
4044
4045  return true;
4046}
4047
4048/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
4049/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4050/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
4051static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4052  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4053  if (EltSz == 64)
4054    return false;
4055
4056  unsigned NumElts = VT.getVectorNumElements();
4057  WhichResult = (M[0] == 0 ? 0 : 1);
4058  unsigned Idx = WhichResult * NumElts / 2;
4059  for (unsigned i = 0; i != NumElts; i += 2) {
4060    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
4061        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
4062      return false;
4063    Idx += 1;
4064  }
4065
4066  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4067  if (VT.is64BitVector() && EltSz == 32)
4068    return false;
4069
4070  return true;
4071}
4072
4073// If N is an integer constant that can be moved into a register in one
4074// instruction, return an SDValue of such a constant (will become a MOV
4075// instruction).  Otherwise return null.
4076static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
4077                                     const ARMSubtarget *ST, DebugLoc dl) {
4078  uint64_t Val;
4079  if (!isa<ConstantSDNode>(N))
4080    return SDValue();
4081  Val = cast<ConstantSDNode>(N)->getZExtValue();
4082
4083  if (ST->isThumb1Only()) {
4084    if (Val <= 255 || ~Val <= 255)
4085      return DAG.getConstant(Val, MVT::i32);
4086  } else {
4087    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
4088      return DAG.getConstant(Val, MVT::i32);
4089  }
4090  return SDValue();
4091}
4092
4093// If this is a case we can't handle, return null and let the default
4094// expansion code take care of it.
4095SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
4096                                             const ARMSubtarget *ST) const {
4097  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
4098  DebugLoc dl = Op.getDebugLoc();
4099  EVT VT = Op.getValueType();
4100
4101  APInt SplatBits, SplatUndef;
4102  unsigned SplatBitSize;
4103  bool HasAnyUndefs;
4104  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
4105    if (SplatBitSize <= 64) {
4106      // Check if an immediate VMOV works.
4107      EVT VmovVT;
4108      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
4109                                      SplatUndef.getZExtValue(), SplatBitSize,
4110                                      DAG, VmovVT, VT.is128BitVector(),
4111                                      VMOVModImm);
4112      if (Val.getNode()) {
4113        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
4114        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4115      }
4116
4117      // Try an immediate VMVN.
4118      uint64_t NegatedImm = (~SplatBits).getZExtValue();
4119      Val = isNEONModifiedImm(NegatedImm,
4120                                      SplatUndef.getZExtValue(), SplatBitSize,
4121                                      DAG, VmovVT, VT.is128BitVector(),
4122                                      VMVNModImm);
4123      if (Val.getNode()) {
4124        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
4125        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4126      }
4127
4128      // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
4129      if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
4130        int ImmVal = ARM_AM::getFP32Imm(SplatBits);
4131        if (ImmVal != -1) {
4132          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
4133          return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
4134        }
4135      }
4136    }
4137  }
4138
4139  // Scan through the operands to see if only one value is used.
4140  unsigned NumElts = VT.getVectorNumElements();
4141  bool isOnlyLowElement = true;
4142  bool usesOnlyOneValue = true;
4143  bool isConstant = true;
4144  SDValue Value;
4145  for (unsigned i = 0; i < NumElts; ++i) {
4146    SDValue V = Op.getOperand(i);
4147    if (V.getOpcode() == ISD::UNDEF)
4148      continue;
4149    if (i > 0)
4150      isOnlyLowElement = false;
4151    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
4152      isConstant = false;
4153
4154    if (!Value.getNode())
4155      Value = V;
4156    else if (V != Value)
4157      usesOnlyOneValue = false;
4158  }
4159
4160  if (!Value.getNode())
4161    return DAG.getUNDEF(VT);
4162
4163  if (isOnlyLowElement)
4164    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
4165
4166  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4167
4168  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
4169  // i32 and try again.
4170  if (usesOnlyOneValue && EltSize <= 32) {
4171    if (!isConstant)
4172      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
4173    if (VT.getVectorElementType().isFloatingPoint()) {
4174      SmallVector<SDValue, 8> Ops;
4175      for (unsigned i = 0; i < NumElts; ++i)
4176        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
4177                                  Op.getOperand(i)));
4178      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
4179      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
4180      Val = LowerBUILD_VECTOR(Val, DAG, ST);
4181      if (Val.getNode())
4182        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4183    }
4184    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
4185    if (Val.getNode())
4186      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
4187  }
4188
4189  // If all elements are constants and the case above didn't get hit, fall back
4190  // to the default expansion, which will generate a load from the constant
4191  // pool.
4192  if (isConstant)
4193    return SDValue();
4194
4195  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
4196  if (NumElts >= 4) {
4197    SDValue shuffle = ReconstructShuffle(Op, DAG);
4198    if (shuffle != SDValue())
4199      return shuffle;
4200  }
4201
4202  // Vectors with 32- or 64-bit elements can be built by directly assigning
4203  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
4204  // will be legalized.
4205  if (EltSize >= 32) {
4206    // Do the expansion with floating-point types, since that is what the VFP
4207    // registers are defined to use, and since i64 is not legal.
4208    EVT EltVT = EVT::getFloatingPointVT(EltSize);
4209    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
4210    SmallVector<SDValue, 8> Ops;
4211    for (unsigned i = 0; i < NumElts; ++i)
4212      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
4213    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
4214    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4215  }
4216
4217  return SDValue();
4218}
4219
4220// Gather data to see if the operation can be modelled as a
4221// shuffle in combination with VEXTs.
4222SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
4223                                              SelectionDAG &DAG) const {
4224  DebugLoc dl = Op.getDebugLoc();
4225  EVT VT = Op.getValueType();
4226  unsigned NumElts = VT.getVectorNumElements();
4227
4228  SmallVector<SDValue, 2> SourceVecs;
4229  SmallVector<unsigned, 2> MinElts;
4230  SmallVector<unsigned, 2> MaxElts;
4231
4232  for (unsigned i = 0; i < NumElts; ++i) {
4233    SDValue V = Op.getOperand(i);
4234    if (V.getOpcode() == ISD::UNDEF)
4235      continue;
4236    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
4237      // A shuffle can only come from building a vector from various
4238      // elements of other vectors.
4239      return SDValue();
4240    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
4241               VT.getVectorElementType()) {
4242      // This code doesn't know how to handle shuffles where the vector
4243      // element types do not match (this happens because type legalization
4244      // promotes the return type of EXTRACT_VECTOR_ELT).
4245      // FIXME: It might be appropriate to extend this code to handle
4246      // mismatched types.
4247      return SDValue();
4248    }
4249
4250    // Record this extraction against the appropriate vector if possible...
4251    SDValue SourceVec = V.getOperand(0);
4252    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
4253    bool FoundSource = false;
4254    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
4255      if (SourceVecs[j] == SourceVec) {
4256        if (MinElts[j] > EltNo)
4257          MinElts[j] = EltNo;
4258        if (MaxElts[j] < EltNo)
4259          MaxElts[j] = EltNo;
4260        FoundSource = true;
4261        break;
4262      }
4263    }
4264
4265    // Or record a new source if not...
4266    if (!FoundSource) {
4267      SourceVecs.push_back(SourceVec);
4268      MinElts.push_back(EltNo);
4269      MaxElts.push_back(EltNo);
4270    }
4271  }
4272
4273  // Currently only do something sane when at most two source vectors
4274  // involved.
4275  if (SourceVecs.size() > 2)
4276    return SDValue();
4277
4278  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
4279  int VEXTOffsets[2] = {0, 0};
4280
4281  // This loop extracts the usage patterns of the source vectors
4282  // and prepares appropriate SDValues for a shuffle if possible.
4283  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
4284    if (SourceVecs[i].getValueType() == VT) {
4285      // No VEXT necessary
4286      ShuffleSrcs[i] = SourceVecs[i];
4287      VEXTOffsets[i] = 0;
4288      continue;
4289    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
4290      // It probably isn't worth padding out a smaller vector just to
4291      // break it down again in a shuffle.
4292      return SDValue();
4293    }
4294
4295    // Since only 64-bit and 128-bit vectors are legal on ARM and
4296    // we've eliminated the other cases...
4297    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
4298           "unexpected vector sizes in ReconstructShuffle");
4299
4300    if (MaxElts[i] - MinElts[i] >= NumElts) {
4301      // Span too large for a VEXT to cope
4302      return SDValue();
4303    }
4304
4305    if (MinElts[i] >= NumElts) {
4306      // The extraction can just take the second half
4307      VEXTOffsets[i] = NumElts;
4308      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4309                                   SourceVecs[i],
4310                                   DAG.getIntPtrConstant(NumElts));
4311    } else if (MaxElts[i] < NumElts) {
4312      // The extraction can just take the first half
4313      VEXTOffsets[i] = 0;
4314      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4315                                   SourceVecs[i],
4316                                   DAG.getIntPtrConstant(0));
4317    } else {
4318      // An actual VEXT is needed
4319      VEXTOffsets[i] = MinElts[i];
4320      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4321                                     SourceVecs[i],
4322                                     DAG.getIntPtrConstant(0));
4323      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4324                                     SourceVecs[i],
4325                                     DAG.getIntPtrConstant(NumElts));
4326      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
4327                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
4328    }
4329  }
4330
4331  SmallVector<int, 8> Mask;
4332
4333  for (unsigned i = 0; i < NumElts; ++i) {
4334    SDValue Entry = Op.getOperand(i);
4335    if (Entry.getOpcode() == ISD::UNDEF) {
4336      Mask.push_back(-1);
4337      continue;
4338    }
4339
4340    SDValue ExtractVec = Entry.getOperand(0);
4341    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
4342                                          .getOperand(1))->getSExtValue();
4343    if (ExtractVec == SourceVecs[0]) {
4344      Mask.push_back(ExtractElt - VEXTOffsets[0]);
4345    } else {
4346      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
4347    }
4348  }
4349
4350  // Final check before we try to produce nonsense...
4351  if (isShuffleMaskLegal(Mask, VT))
4352    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
4353                                &Mask[0]);
4354
4355  return SDValue();
4356}
4357
4358/// isShuffleMaskLegal - Targets can use this to indicate that they only
4359/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4360/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4361/// are assumed to be legal.
4362bool
4363ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
4364                                      EVT VT) const {
4365  if (VT.getVectorNumElements() == 4 &&
4366      (VT.is128BitVector() || VT.is64BitVector())) {
4367    unsigned PFIndexes[4];
4368    for (unsigned i = 0; i != 4; ++i) {
4369      if (M[i] < 0)
4370        PFIndexes[i] = 8;
4371      else
4372        PFIndexes[i] = M[i];
4373    }
4374
4375    // Compute the index in the perfect shuffle table.
4376    unsigned PFTableIndex =
4377      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4378    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4379    unsigned Cost = (PFEntry >> 30);
4380
4381    if (Cost <= 4)
4382      return true;
4383  }
4384
4385  bool ReverseVEXT;
4386  unsigned Imm, WhichResult;
4387
4388  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4389  return (EltSize >= 32 ||
4390          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
4391          isVREVMask(M, VT, 64) ||
4392          isVREVMask(M, VT, 32) ||
4393          isVREVMask(M, VT, 16) ||
4394          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
4395          isVTBLMask(M, VT) ||
4396          isVTRNMask(M, VT, WhichResult) ||
4397          isVUZPMask(M, VT, WhichResult) ||
4398          isVZIPMask(M, VT, WhichResult) ||
4399          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
4400          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
4401          isVZIP_v_undef_Mask(M, VT, WhichResult));
4402}
4403
4404/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4405/// the specified operations to build the shuffle.
4406static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
4407                                      SDValue RHS, SelectionDAG &DAG,
4408                                      DebugLoc dl) {
4409  unsigned OpNum = (PFEntry >> 26) & 0x0F;
4410  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
4411  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
4412
4413  enum {
4414    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
4415    OP_VREV,
4416    OP_VDUP0,
4417    OP_VDUP1,
4418    OP_VDUP2,
4419    OP_VDUP3,
4420    OP_VEXT1,
4421    OP_VEXT2,
4422    OP_VEXT3,
4423    OP_VUZPL, // VUZP, left result
4424    OP_VUZPR, // VUZP, right result
4425    OP_VZIPL, // VZIP, left result
4426    OP_VZIPR, // VZIP, right result
4427    OP_VTRNL, // VTRN, left result
4428    OP_VTRNR  // VTRN, right result
4429  };
4430
4431  if (OpNum == OP_COPY) {
4432    if (LHSID == (1*9+2)*9+3) return LHS;
4433    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
4434    return RHS;
4435  }
4436
4437  SDValue OpLHS, OpRHS;
4438  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
4439  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
4440  EVT VT = OpLHS.getValueType();
4441
4442  switch (OpNum) {
4443  default: llvm_unreachable("Unknown shuffle opcode!");
4444  case OP_VREV:
4445    // VREV divides the vector in half and swaps within the half.
4446    if (VT.getVectorElementType() == MVT::i32 ||
4447        VT.getVectorElementType() == MVT::f32)
4448      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
4449    // vrev <4 x i16> -> VREV32
4450    if (VT.getVectorElementType() == MVT::i16)
4451      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
4452    // vrev <4 x i8> -> VREV16
4453    assert(VT.getVectorElementType() == MVT::i8);
4454    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
4455  case OP_VDUP0:
4456  case OP_VDUP1:
4457  case OP_VDUP2:
4458  case OP_VDUP3:
4459    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
4460                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
4461  case OP_VEXT1:
4462  case OP_VEXT2:
4463  case OP_VEXT3:
4464    return DAG.getNode(ARMISD::VEXT, dl, VT,
4465                       OpLHS, OpRHS,
4466                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
4467  case OP_VUZPL:
4468  case OP_VUZPR:
4469    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4470                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
4471  case OP_VZIPL:
4472  case OP_VZIPR:
4473    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4474                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
4475  case OP_VTRNL:
4476  case OP_VTRNR:
4477    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4478                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
4479  }
4480}
4481
4482static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
4483                                       ArrayRef<int> ShuffleMask,
4484                                       SelectionDAG &DAG) {
4485  // Check to see if we can use the VTBL instruction.
4486  SDValue V1 = Op.getOperand(0);
4487  SDValue V2 = Op.getOperand(1);
4488  DebugLoc DL = Op.getDebugLoc();
4489
4490  SmallVector<SDValue, 8> VTBLMask;
4491  for (ArrayRef<int>::iterator
4492         I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
4493    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
4494
4495  if (V2.getNode()->getOpcode() == ISD::UNDEF)
4496    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
4497                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
4498                                   &VTBLMask[0], 8));
4499
4500  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
4501                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
4502                                 &VTBLMask[0], 8));
4503}
4504
4505static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
4506  SDValue V1 = Op.getOperand(0);
4507  SDValue V2 = Op.getOperand(1);
4508  DebugLoc dl = Op.getDebugLoc();
4509  EVT VT = Op.getValueType();
4510  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4511
4512  // Convert shuffles that are directly supported on NEON to target-specific
4513  // DAG nodes, instead of keeping them as shuffles and matching them again
4514  // during code selection.  This is more efficient and avoids the possibility
4515  // of inconsistencies between legalization and selection.
4516  // FIXME: floating-point vectors should be canonicalized to integer vectors
4517  // of the same time so that they get CSEd properly.
4518  ArrayRef<int> ShuffleMask = SVN->getMask();
4519
4520  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4521  if (EltSize <= 32) {
4522    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
4523      int Lane = SVN->getSplatIndex();
4524      // If this is undef splat, generate it via "just" vdup, if possible.
4525      if (Lane == -1) Lane = 0;
4526
4527      // Test if V1 is a SCALAR_TO_VECTOR.
4528      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4529        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
4530      }
4531      // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
4532      // (and probably will turn into a SCALAR_TO_VECTOR once legalization
4533      // reaches it).
4534      if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
4535          !isa<ConstantSDNode>(V1.getOperand(0))) {
4536        bool IsScalarToVector = true;
4537        for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
4538          if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
4539            IsScalarToVector = false;
4540            break;
4541          }
4542        if (IsScalarToVector)
4543          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
4544      }
4545      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
4546                         DAG.getConstant(Lane, MVT::i32));
4547    }
4548
4549    bool ReverseVEXT;
4550    unsigned Imm;
4551    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
4552      if (ReverseVEXT)
4553        std::swap(V1, V2);
4554      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
4555                         DAG.getConstant(Imm, MVT::i32));
4556    }
4557
4558    if (isVREVMask(ShuffleMask, VT, 64))
4559      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
4560    if (isVREVMask(ShuffleMask, VT, 32))
4561      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
4562    if (isVREVMask(ShuffleMask, VT, 16))
4563      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
4564
4565    // Check for Neon shuffles that modify both input vectors in place.
4566    // If both results are used, i.e., if there are two shuffles with the same
4567    // source operands and with masks corresponding to both results of one of
4568    // these operations, DAG memoization will ensure that a single node is
4569    // used for both shuffles.
4570    unsigned WhichResult;
4571    if (isVTRNMask(ShuffleMask, VT, WhichResult))
4572      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4573                         V1, V2).getValue(WhichResult);
4574    if (isVUZPMask(ShuffleMask, VT, WhichResult))
4575      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4576                         V1, V2).getValue(WhichResult);
4577    if (isVZIPMask(ShuffleMask, VT, WhichResult))
4578      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4579                         V1, V2).getValue(WhichResult);
4580
4581    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
4582      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4583                         V1, V1).getValue(WhichResult);
4584    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
4585      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4586                         V1, V1).getValue(WhichResult);
4587    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
4588      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4589                         V1, V1).getValue(WhichResult);
4590  }
4591
4592  // If the shuffle is not directly supported and it has 4 elements, use
4593  // the PerfectShuffle-generated table to synthesize it from other shuffles.
4594  unsigned NumElts = VT.getVectorNumElements();
4595  if (NumElts == 4) {
4596    unsigned PFIndexes[4];
4597    for (unsigned i = 0; i != 4; ++i) {
4598      if (ShuffleMask[i] < 0)
4599        PFIndexes[i] = 8;
4600      else
4601        PFIndexes[i] = ShuffleMask[i];
4602    }
4603
4604    // Compute the index in the perfect shuffle table.
4605    unsigned PFTableIndex =
4606      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4607    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4608    unsigned Cost = (PFEntry >> 30);
4609
4610    if (Cost <= 4)
4611      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
4612  }
4613
4614  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
4615  if (EltSize >= 32) {
4616    // Do the expansion with floating-point types, since that is what the VFP
4617    // registers are defined to use, and since i64 is not legal.
4618    EVT EltVT = EVT::getFloatingPointVT(EltSize);
4619    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
4620    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
4621    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
4622    SmallVector<SDValue, 8> Ops;
4623    for (unsigned i = 0; i < NumElts; ++i) {
4624      if (ShuffleMask[i] < 0)
4625        Ops.push_back(DAG.getUNDEF(EltVT));
4626      else
4627        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
4628                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
4629                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
4630                                                  MVT::i32)));
4631    }
4632    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
4633    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4634  }
4635
4636  if (VT == MVT::v8i8) {
4637    SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
4638    if (NewOp.getNode())
4639      return NewOp;
4640  }
4641
4642  return SDValue();
4643}
4644
4645static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
4646  // INSERT_VECTOR_ELT is legal only for immediate indexes.
4647  SDValue Lane = Op.getOperand(2);
4648  if (!isa<ConstantSDNode>(Lane))
4649    return SDValue();
4650
4651  return Op;
4652}
4653
4654static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
4655  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
4656  SDValue Lane = Op.getOperand(1);
4657  if (!isa<ConstantSDNode>(Lane))
4658    return SDValue();
4659
4660  SDValue Vec = Op.getOperand(0);
4661  if (Op.getValueType() == MVT::i32 &&
4662      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
4663    DebugLoc dl = Op.getDebugLoc();
4664    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
4665  }
4666
4667  return Op;
4668}
4669
4670static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
4671  // The only time a CONCAT_VECTORS operation can have legal types is when
4672  // two 64-bit vectors are concatenated to a 128-bit vector.
4673  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
4674         "unexpected CONCAT_VECTORS");
4675  DebugLoc dl = Op.getDebugLoc();
4676  SDValue Val = DAG.getUNDEF(MVT::v2f64);
4677  SDValue Op0 = Op.getOperand(0);
4678  SDValue Op1 = Op.getOperand(1);
4679  if (Op0.getOpcode() != ISD::UNDEF)
4680    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
4681                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
4682                      DAG.getIntPtrConstant(0));
4683  if (Op1.getOpcode() != ISD::UNDEF)
4684    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
4685                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
4686                      DAG.getIntPtrConstant(1));
4687  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
4688}
4689
4690/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
4691/// element has been zero/sign-extended, depending on the isSigned parameter,
4692/// from an integer type half its size.
4693static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
4694                                   bool isSigned) {
4695  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
4696  EVT VT = N->getValueType(0);
4697  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
4698    SDNode *BVN = N->getOperand(0).getNode();
4699    if (BVN->getValueType(0) != MVT::v4i32 ||
4700        BVN->getOpcode() != ISD::BUILD_VECTOR)
4701      return false;
4702    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4703    unsigned HiElt = 1 - LoElt;
4704    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
4705    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
4706    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
4707    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
4708    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
4709      return false;
4710    if (isSigned) {
4711      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
4712          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
4713        return true;
4714    } else {
4715      if (Hi0->isNullValue() && Hi1->isNullValue())
4716        return true;
4717    }
4718    return false;
4719  }
4720
4721  if (N->getOpcode() != ISD::BUILD_VECTOR)
4722    return false;
4723
4724  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
4725    SDNode *Elt = N->getOperand(i).getNode();
4726    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
4727      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4728      unsigned HalfSize = EltSize / 2;
4729      if (isSigned) {
4730        if (!isIntN(HalfSize, C->getSExtValue()))
4731          return false;
4732      } else {
4733        if (!isUIntN(HalfSize, C->getZExtValue()))
4734          return false;
4735      }
4736      continue;
4737    }
4738    return false;
4739  }
4740
4741  return true;
4742}
4743
4744/// isSignExtended - Check if a node is a vector value that is sign-extended
4745/// or a constant BUILD_VECTOR with sign-extended elements.
4746static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
4747  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
4748    return true;
4749  if (isExtendedBUILD_VECTOR(N, DAG, true))
4750    return true;
4751  return false;
4752}
4753
4754/// isZeroExtended - Check if a node is a vector value that is zero-extended
4755/// or a constant BUILD_VECTOR with zero-extended elements.
4756static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
4757  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
4758    return true;
4759  if (isExtendedBUILD_VECTOR(N, DAG, false))
4760    return true;
4761  return false;
4762}
4763
4764/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
4765/// load, or BUILD_VECTOR with extended elements, return the unextended value.
4766static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
4767  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
4768    return N->getOperand(0);
4769  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
4770    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
4771                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
4772                       LD->isNonTemporal(), LD->isInvariant(),
4773                       LD->getAlignment());
4774  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
4775  // have been legalized as a BITCAST from v4i32.
4776  if (N->getOpcode() == ISD::BITCAST) {
4777    SDNode *BVN = N->getOperand(0).getNode();
4778    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
4779           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
4780    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4781    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
4782                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
4783  }
4784  // Construct a new BUILD_VECTOR with elements truncated to half the size.
4785  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
4786  EVT VT = N->getValueType(0);
4787  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
4788  unsigned NumElts = VT.getVectorNumElements();
4789  MVT TruncVT = MVT::getIntegerVT(EltSize);
4790  SmallVector<SDValue, 8> Ops;
4791  for (unsigned i = 0; i != NumElts; ++i) {
4792    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
4793    const APInt &CInt = C->getAPIntValue();
4794    Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
4795  }
4796  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
4797                     MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
4798}
4799
4800static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
4801  unsigned Opcode = N->getOpcode();
4802  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4803    SDNode *N0 = N->getOperand(0).getNode();
4804    SDNode *N1 = N->getOperand(1).getNode();
4805    return N0->hasOneUse() && N1->hasOneUse() &&
4806      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
4807  }
4808  return false;
4809}
4810
4811static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
4812  unsigned Opcode = N->getOpcode();
4813  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4814    SDNode *N0 = N->getOperand(0).getNode();
4815    SDNode *N1 = N->getOperand(1).getNode();
4816    return N0->hasOneUse() && N1->hasOneUse() &&
4817      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
4818  }
4819  return false;
4820}
4821
4822static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
4823  // Multiplications are only custom-lowered for 128-bit vectors so that
4824  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
4825  EVT VT = Op.getValueType();
4826  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
4827  SDNode *N0 = Op.getOperand(0).getNode();
4828  SDNode *N1 = Op.getOperand(1).getNode();
4829  unsigned NewOpc = 0;
4830  bool isMLA = false;
4831  bool isN0SExt = isSignExtended(N0, DAG);
4832  bool isN1SExt = isSignExtended(N1, DAG);
4833  if (isN0SExt && isN1SExt)
4834    NewOpc = ARMISD::VMULLs;
4835  else {
4836    bool isN0ZExt = isZeroExtended(N0, DAG);
4837    bool isN1ZExt = isZeroExtended(N1, DAG);
4838    if (isN0ZExt && isN1ZExt)
4839      NewOpc = ARMISD::VMULLu;
4840    else if (isN1SExt || isN1ZExt) {
4841      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
4842      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
4843      if (isN1SExt && isAddSubSExt(N0, DAG)) {
4844        NewOpc = ARMISD::VMULLs;
4845        isMLA = true;
4846      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
4847        NewOpc = ARMISD::VMULLu;
4848        isMLA = true;
4849      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
4850        std::swap(N0, N1);
4851        NewOpc = ARMISD::VMULLu;
4852        isMLA = true;
4853      }
4854    }
4855
4856    if (!NewOpc) {
4857      if (VT == MVT::v2i64)
4858        // Fall through to expand this.  It is not legal.
4859        return SDValue();
4860      else
4861        // Other vector multiplications are legal.
4862        return Op;
4863    }
4864  }
4865
4866  // Legalize to a VMULL instruction.
4867  DebugLoc DL = Op.getDebugLoc();
4868  SDValue Op0;
4869  SDValue Op1 = SkipExtension(N1, DAG);
4870  if (!isMLA) {
4871    Op0 = SkipExtension(N0, DAG);
4872    assert(Op0.getValueType().is64BitVector() &&
4873           Op1.getValueType().is64BitVector() &&
4874           "unexpected types for extended operands to VMULL");
4875    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
4876  }
4877
4878  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
4879  // isel lowering to take advantage of no-stall back to back vmul + vmla.
4880  //   vmull q0, d4, d6
4881  //   vmlal q0, d5, d6
4882  // is faster than
4883  //   vaddl q0, d4, d5
4884  //   vmovl q1, d6
4885  //   vmul  q0, q0, q1
4886  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
4887  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
4888  EVT Op1VT = Op1.getValueType();
4889  return DAG.getNode(N0->getOpcode(), DL, VT,
4890                     DAG.getNode(NewOpc, DL, VT,
4891                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
4892                     DAG.getNode(NewOpc, DL, VT,
4893                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
4894}
4895
4896static SDValue
4897LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
4898  // Convert to float
4899  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
4900  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
4901  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
4902  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
4903  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
4904  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
4905  // Get reciprocal estimate.
4906  // float4 recip = vrecpeq_f32(yf);
4907  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4908                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
4909  // Because char has a smaller range than uchar, we can actually get away
4910  // without any newton steps.  This requires that we use a weird bias
4911  // of 0xb000, however (again, this has been exhaustively tested).
4912  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
4913  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
4914  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
4915  Y = DAG.getConstant(0xb000, MVT::i32);
4916  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
4917  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
4918  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
4919  // Convert back to short.
4920  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
4921  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
4922  return X;
4923}
4924
4925static SDValue
4926LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
4927  SDValue N2;
4928  // Convert to float.
4929  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
4930  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
4931  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
4932  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
4933  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
4934  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
4935
4936  // Use reciprocal estimate and one refinement step.
4937  // float4 recip = vrecpeq_f32(yf);
4938  // recip *= vrecpsq_f32(yf, recip);
4939  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4940                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
4941  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4942                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
4943                   N1, N2);
4944  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
4945  // Because short has a smaller range than ushort, we can actually get away
4946  // with only a single newton step.  This requires that we use a weird bias
4947  // of 89, however (again, this has been exhaustively tested).
4948  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
4949  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
4950  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
4951  N1 = DAG.getConstant(0x89, MVT::i32);
4952  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
4953  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
4954  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
4955  // Convert back to integer and return.
4956  // return vmovn_s32(vcvt_s32_f32(result));
4957  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
4958  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
4959  return N0;
4960}
4961
4962static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
4963  EVT VT = Op.getValueType();
4964  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
4965         "unexpected type for custom-lowering ISD::SDIV");
4966
4967  DebugLoc dl = Op.getDebugLoc();
4968  SDValue N0 = Op.getOperand(0);
4969  SDValue N1 = Op.getOperand(1);
4970  SDValue N2, N3;
4971
4972  if (VT == MVT::v8i8) {
4973    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
4974    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
4975
4976    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
4977                     DAG.getIntPtrConstant(4));
4978    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
4979                     DAG.getIntPtrConstant(4));
4980    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
4981                     DAG.getIntPtrConstant(0));
4982    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
4983                     DAG.getIntPtrConstant(0));
4984
4985    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
4986    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
4987
4988    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
4989    N0 = LowerCONCAT_VECTORS(N0, DAG);
4990
4991    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
4992    return N0;
4993  }
4994  return LowerSDIV_v4i16(N0, N1, dl, DAG);
4995}
4996
4997static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
4998  EVT VT = Op.getValueType();
4999  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
5000         "unexpected type for custom-lowering ISD::UDIV");
5001
5002  DebugLoc dl = Op.getDebugLoc();
5003  SDValue N0 = Op.getOperand(0);
5004  SDValue N1 = Op.getOperand(1);
5005  SDValue N2, N3;
5006
5007  if (VT == MVT::v8i8) {
5008    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
5009    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
5010
5011    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
5012                     DAG.getIntPtrConstant(4));
5013    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
5014                     DAG.getIntPtrConstant(4));
5015    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
5016                     DAG.getIntPtrConstant(0));
5017    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
5018                     DAG.getIntPtrConstant(0));
5019
5020    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
5021    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
5022
5023    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
5024    N0 = LowerCONCAT_VECTORS(N0, DAG);
5025
5026    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
5027                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
5028                     N0);
5029    return N0;
5030  }
5031
5032  // v4i16 sdiv ... Convert to float.
5033  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
5034  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
5035  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
5036  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
5037  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
5038  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
5039
5040  // Use reciprocal estimate and two refinement steps.
5041  // float4 recip = vrecpeq_f32(yf);
5042  // recip *= vrecpsq_f32(yf, recip);
5043  // recip *= vrecpsq_f32(yf, recip);
5044  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5045                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
5046  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5047                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
5048                   BN1, N2);
5049  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
5050  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5051                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
5052                   BN1, N2);
5053  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
5054  // Simply multiplying by the reciprocal estimate can leave us a few ulps
5055  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
5056  // and that it will never cause us to return an answer too large).
5057  // float4 result = as_float4(as_int4(xf*recip) + 2);
5058  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
5059  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
5060  N1 = DAG.getConstant(2, MVT::i32);
5061  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
5062  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
5063  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
5064  // Convert back to integer and return.
5065  // return vmovn_u32(vcvt_s32_f32(result));
5066  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
5067  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
5068  return N0;
5069}
5070
5071static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
5072  EVT VT = Op.getNode()->getValueType(0);
5073  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5074
5075  unsigned Opc;
5076  bool ExtraOp = false;
5077  switch (Op.getOpcode()) {
5078  default: llvm_unreachable("Invalid code");
5079  case ISD::ADDC: Opc = ARMISD::ADDC; break;
5080  case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
5081  case ISD::SUBC: Opc = ARMISD::SUBC; break;
5082  case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
5083  }
5084
5085  if (!ExtraOp)
5086    return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
5087                       Op.getOperand(1));
5088  return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
5089                     Op.getOperand(1), Op.getOperand(2));
5090}
5091
5092static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
5093  // Monotonic load/store is legal for all targets
5094  if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
5095    return Op;
5096
5097  // Aquire/Release load/store is not legal for targets without a
5098  // dmb or equivalent available.
5099  return SDValue();
5100}
5101
5102
5103static void
5104ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
5105                    SelectionDAG &DAG, unsigned NewOp) {
5106  DebugLoc dl = Node->getDebugLoc();
5107  assert (Node->getValueType(0) == MVT::i64 &&
5108          "Only know how to expand i64 atomics");
5109
5110  SmallVector<SDValue, 6> Ops;
5111  Ops.push_back(Node->getOperand(0)); // Chain
5112  Ops.push_back(Node->getOperand(1)); // Ptr
5113  // Low part of Val1
5114  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5115                            Node->getOperand(2), DAG.getIntPtrConstant(0)));
5116  // High part of Val1
5117  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5118                            Node->getOperand(2), DAG.getIntPtrConstant(1)));
5119  if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
5120    // High part of Val1
5121    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5122                              Node->getOperand(3), DAG.getIntPtrConstant(0)));
5123    // High part of Val2
5124    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5125                              Node->getOperand(3), DAG.getIntPtrConstant(1)));
5126  }
5127  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
5128  SDValue Result =
5129    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
5130                            cast<MemSDNode>(Node)->getMemOperand());
5131  SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
5132  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
5133  Results.push_back(Result.getValue(2));
5134}
5135
5136SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
5137  switch (Op.getOpcode()) {
5138  default: llvm_unreachable("Don't know how to custom lower this!");
5139  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
5140  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
5141  case ISD::GlobalAddress:
5142    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
5143      LowerGlobalAddressELF(Op, DAG);
5144  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
5145  case ISD::SELECT:        return LowerSELECT(Op, DAG);
5146  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
5147  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
5148  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
5149  case ISD::VASTART:       return LowerVASTART(Op, DAG);
5150  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
5151  case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
5152  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
5153  case ISD::SINT_TO_FP:
5154  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
5155  case ISD::FP_TO_SINT:
5156  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
5157  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
5158  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
5159  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
5160  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
5161  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
5162  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
5163  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
5164                                                               Subtarget);
5165  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
5166  case ISD::SHL:
5167  case ISD::SRL:
5168  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
5169  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
5170  case ISD::SRL_PARTS:
5171  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
5172  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
5173  case ISD::SETCC:         return LowerVSETCC(Op, DAG);
5174  case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
5175  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
5176  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
5177  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
5178  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
5179  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
5180  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
5181  case ISD::MUL:           return LowerMUL(Op, DAG);
5182  case ISD::SDIV:          return LowerSDIV(Op, DAG);
5183  case ISD::UDIV:          return LowerUDIV(Op, DAG);
5184  case ISD::ADDC:
5185  case ISD::ADDE:
5186  case ISD::SUBC:
5187  case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
5188  case ISD::ATOMIC_LOAD:
5189  case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
5190  }
5191}
5192
5193/// ReplaceNodeResults - Replace the results of node with an illegal result
5194/// type with new values built out of custom code.
5195void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
5196                                           SmallVectorImpl<SDValue>&Results,
5197                                           SelectionDAG &DAG) const {
5198  SDValue Res;
5199  switch (N->getOpcode()) {
5200  default:
5201    llvm_unreachable("Don't know how to custom expand this!");
5202  case ISD::BITCAST:
5203    Res = ExpandBITCAST(N, DAG);
5204    break;
5205  case ISD::SRL:
5206  case ISD::SRA:
5207    Res = Expand64BitShift(N, DAG, Subtarget);
5208    break;
5209  case ISD::ATOMIC_LOAD_ADD:
5210    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
5211    return;
5212  case ISD::ATOMIC_LOAD_AND:
5213    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
5214    return;
5215  case ISD::ATOMIC_LOAD_NAND:
5216    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
5217    return;
5218  case ISD::ATOMIC_LOAD_OR:
5219    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
5220    return;
5221  case ISD::ATOMIC_LOAD_SUB:
5222    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
5223    return;
5224  case ISD::ATOMIC_LOAD_XOR:
5225    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
5226    return;
5227  case ISD::ATOMIC_SWAP:
5228    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
5229    return;
5230  case ISD::ATOMIC_CMP_SWAP:
5231    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
5232    return;
5233  }
5234  if (Res.getNode())
5235    Results.push_back(Res);
5236}
5237
5238//===----------------------------------------------------------------------===//
5239//                           ARM Scheduler Hooks
5240//===----------------------------------------------------------------------===//
5241
5242MachineBasicBlock *
5243ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
5244                                     MachineBasicBlock *BB,
5245                                     unsigned Size) const {
5246  unsigned dest    = MI->getOperand(0).getReg();
5247  unsigned ptr     = MI->getOperand(1).getReg();
5248  unsigned oldval  = MI->getOperand(2).getReg();
5249  unsigned newval  = MI->getOperand(3).getReg();
5250  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5251  DebugLoc dl = MI->getDebugLoc();
5252  bool isThumb2 = Subtarget->isThumb2();
5253
5254  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5255  unsigned scratch =
5256    MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
5257                                       : ARM::GPRRegisterClass);
5258
5259  if (isThumb2) {
5260    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
5261    MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
5262    MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
5263  }
5264
5265  unsigned ldrOpc, strOpc;
5266  switch (Size) {
5267  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5268  case 1:
5269    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5270    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5271    break;
5272  case 2:
5273    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5274    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5275    break;
5276  case 4:
5277    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5278    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5279    break;
5280  }
5281
5282  MachineFunction *MF = BB->getParent();
5283  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5284  MachineFunction::iterator It = BB;
5285  ++It; // insert the new blocks after the current block
5286
5287  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
5288  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
5289  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5290  MF->insert(It, loop1MBB);
5291  MF->insert(It, loop2MBB);
5292  MF->insert(It, exitMBB);
5293
5294  // Transfer the remainder of BB and its successor edges to exitMBB.
5295  exitMBB->splice(exitMBB->begin(), BB,
5296                  llvm::next(MachineBasicBlock::iterator(MI)),
5297                  BB->end());
5298  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5299
5300  //  thisMBB:
5301  //   ...
5302  //   fallthrough --> loop1MBB
5303  BB->addSuccessor(loop1MBB);
5304
5305  // loop1MBB:
5306  //   ldrex dest, [ptr]
5307  //   cmp dest, oldval
5308  //   bne exitMBB
5309  BB = loop1MBB;
5310  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5311  if (ldrOpc == ARM::t2LDREX)
5312    MIB.addImm(0);
5313  AddDefaultPred(MIB);
5314  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
5315                 .addReg(dest).addReg(oldval));
5316  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5317    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5318  BB->addSuccessor(loop2MBB);
5319  BB->addSuccessor(exitMBB);
5320
5321  // loop2MBB:
5322  //   strex scratch, newval, [ptr]
5323  //   cmp scratch, #0
5324  //   bne loop1MBB
5325  BB = loop2MBB;
5326  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
5327  if (strOpc == ARM::t2STREX)
5328    MIB.addImm(0);
5329  AddDefaultPred(MIB);
5330  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5331                 .addReg(scratch).addImm(0));
5332  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5333    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5334  BB->addSuccessor(loop1MBB);
5335  BB->addSuccessor(exitMBB);
5336
5337  //  exitMBB:
5338  //   ...
5339  BB = exitMBB;
5340
5341  MI->eraseFromParent();   // The instruction is gone now.
5342
5343  return BB;
5344}
5345
5346MachineBasicBlock *
5347ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
5348                                    unsigned Size, unsigned BinOpcode) const {
5349  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5350  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5351
5352  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5353  MachineFunction *MF = BB->getParent();
5354  MachineFunction::iterator It = BB;
5355  ++It;
5356
5357  unsigned dest = MI->getOperand(0).getReg();
5358  unsigned ptr = MI->getOperand(1).getReg();
5359  unsigned incr = MI->getOperand(2).getReg();
5360  DebugLoc dl = MI->getDebugLoc();
5361  bool isThumb2 = Subtarget->isThumb2();
5362
5363  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5364  if (isThumb2) {
5365    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
5366    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
5367  }
5368
5369  unsigned ldrOpc, strOpc;
5370  switch (Size) {
5371  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5372  case 1:
5373    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5374    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5375    break;
5376  case 2:
5377    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5378    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5379    break;
5380  case 4:
5381    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5382    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5383    break;
5384  }
5385
5386  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5387  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5388  MF->insert(It, loopMBB);
5389  MF->insert(It, exitMBB);
5390
5391  // Transfer the remainder of BB and its successor edges to exitMBB.
5392  exitMBB->splice(exitMBB->begin(), BB,
5393                  llvm::next(MachineBasicBlock::iterator(MI)),
5394                  BB->end());
5395  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5396
5397  const TargetRegisterClass *TRC =
5398    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5399  unsigned scratch = MRI.createVirtualRegister(TRC);
5400  unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
5401
5402  //  thisMBB:
5403  //   ...
5404  //   fallthrough --> loopMBB
5405  BB->addSuccessor(loopMBB);
5406
5407  //  loopMBB:
5408  //   ldrex dest, ptr
5409  //   <binop> scratch2, dest, incr
5410  //   strex scratch, scratch2, ptr
5411  //   cmp scratch, #0
5412  //   bne- loopMBB
5413  //   fallthrough --> exitMBB
5414  BB = loopMBB;
5415  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5416  if (ldrOpc == ARM::t2LDREX)
5417    MIB.addImm(0);
5418  AddDefaultPred(MIB);
5419  if (BinOpcode) {
5420    // operand order needs to go the other way for NAND
5421    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
5422      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
5423                     addReg(incr).addReg(dest)).addReg(0);
5424    else
5425      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
5426                     addReg(dest).addReg(incr)).addReg(0);
5427  }
5428
5429  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
5430  if (strOpc == ARM::t2STREX)
5431    MIB.addImm(0);
5432  AddDefaultPred(MIB);
5433  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5434                 .addReg(scratch).addImm(0));
5435  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5436    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5437
5438  BB->addSuccessor(loopMBB);
5439  BB->addSuccessor(exitMBB);
5440
5441  //  exitMBB:
5442  //   ...
5443  BB = exitMBB;
5444
5445  MI->eraseFromParent();   // The instruction is gone now.
5446
5447  return BB;
5448}
5449
5450MachineBasicBlock *
5451ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
5452                                          MachineBasicBlock *BB,
5453                                          unsigned Size,
5454                                          bool signExtend,
5455                                          ARMCC::CondCodes Cond) const {
5456  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5457
5458  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5459  MachineFunction *MF = BB->getParent();
5460  MachineFunction::iterator It = BB;
5461  ++It;
5462
5463  unsigned dest = MI->getOperand(0).getReg();
5464  unsigned ptr = MI->getOperand(1).getReg();
5465  unsigned incr = MI->getOperand(2).getReg();
5466  unsigned oldval = dest;
5467  DebugLoc dl = MI->getDebugLoc();
5468  bool isThumb2 = Subtarget->isThumb2();
5469
5470  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5471  if (isThumb2) {
5472    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
5473    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
5474  }
5475
5476  unsigned ldrOpc, strOpc, extendOpc;
5477  switch (Size) {
5478  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5479  case 1:
5480    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5481    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5482    extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
5483    break;
5484  case 2:
5485    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5486    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5487    extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
5488    break;
5489  case 4:
5490    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5491    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5492    extendOpc = 0;
5493    break;
5494  }
5495
5496  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5497  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5498  MF->insert(It, loopMBB);
5499  MF->insert(It, exitMBB);
5500
5501  // Transfer the remainder of BB and its successor edges to exitMBB.
5502  exitMBB->splice(exitMBB->begin(), BB,
5503                  llvm::next(MachineBasicBlock::iterator(MI)),
5504                  BB->end());
5505  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5506
5507  const TargetRegisterClass *TRC =
5508    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5509  unsigned scratch = MRI.createVirtualRegister(TRC);
5510  unsigned scratch2 = MRI.createVirtualRegister(TRC);
5511
5512  //  thisMBB:
5513  //   ...
5514  //   fallthrough --> loopMBB
5515  BB->addSuccessor(loopMBB);
5516
5517  //  loopMBB:
5518  //   ldrex dest, ptr
5519  //   (sign extend dest, if required)
5520  //   cmp dest, incr
5521  //   cmov.cond scratch2, dest, incr
5522  //   strex scratch, scratch2, ptr
5523  //   cmp scratch, #0
5524  //   bne- loopMBB
5525  //   fallthrough --> exitMBB
5526  BB = loopMBB;
5527  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5528  if (ldrOpc == ARM::t2LDREX)
5529    MIB.addImm(0);
5530  AddDefaultPred(MIB);
5531
5532  // Sign extend the value, if necessary.
5533  if (signExtend && extendOpc) {
5534    oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
5535    AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
5536                     .addReg(dest)
5537                     .addImm(0));
5538  }
5539
5540  // Build compare and cmov instructions.
5541  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
5542                 .addReg(oldval).addReg(incr));
5543  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
5544         .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
5545
5546  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
5547  if (strOpc == ARM::t2STREX)
5548    MIB.addImm(0);
5549  AddDefaultPred(MIB);
5550  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5551                 .addReg(scratch).addImm(0));
5552  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5553    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5554
5555  BB->addSuccessor(loopMBB);
5556  BB->addSuccessor(exitMBB);
5557
5558  //  exitMBB:
5559  //   ...
5560  BB = exitMBB;
5561
5562  MI->eraseFromParent();   // The instruction is gone now.
5563
5564  return BB;
5565}
5566
5567MachineBasicBlock *
5568ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
5569                                      unsigned Op1, unsigned Op2,
5570                                      bool NeedsCarry, bool IsCmpxchg) const {
5571  // This also handles ATOMIC_SWAP, indicated by Op1==0.
5572  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5573
5574  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5575  MachineFunction *MF = BB->getParent();
5576  MachineFunction::iterator It = BB;
5577  ++It;
5578
5579  unsigned destlo = MI->getOperand(0).getReg();
5580  unsigned desthi = MI->getOperand(1).getReg();
5581  unsigned ptr = MI->getOperand(2).getReg();
5582  unsigned vallo = MI->getOperand(3).getReg();
5583  unsigned valhi = MI->getOperand(4).getReg();
5584  DebugLoc dl = MI->getDebugLoc();
5585  bool isThumb2 = Subtarget->isThumb2();
5586
5587  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5588  if (isThumb2) {
5589    MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
5590    MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
5591    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
5592  }
5593
5594  unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
5595  unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
5596
5597  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5598  MachineBasicBlock *contBB = 0, *cont2BB = 0;
5599  if (IsCmpxchg) {
5600    contBB = MF->CreateMachineBasicBlock(LLVM_BB);
5601    cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
5602  }
5603  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5604  MF->insert(It, loopMBB);
5605  if (IsCmpxchg) {
5606    MF->insert(It, contBB);
5607    MF->insert(It, cont2BB);
5608  }
5609  MF->insert(It, exitMBB);
5610
5611  // Transfer the remainder of BB and its successor edges to exitMBB.
5612  exitMBB->splice(exitMBB->begin(), BB,
5613                  llvm::next(MachineBasicBlock::iterator(MI)),
5614                  BB->end());
5615  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5616
5617  const TargetRegisterClass *TRC =
5618    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5619  unsigned storesuccess = MRI.createVirtualRegister(TRC);
5620
5621  //  thisMBB:
5622  //   ...
5623  //   fallthrough --> loopMBB
5624  BB->addSuccessor(loopMBB);
5625
5626  //  loopMBB:
5627  //   ldrexd r2, r3, ptr
5628  //   <binopa> r0, r2, incr
5629  //   <binopb> r1, r3, incr
5630  //   strexd storesuccess, r0, r1, ptr
5631  //   cmp storesuccess, #0
5632  //   bne- loopMBB
5633  //   fallthrough --> exitMBB
5634  //
5635  // Note that the registers are explicitly specified because there is not any
5636  // way to force the register allocator to allocate a register pair.
5637  //
5638  // FIXME: The hardcoded registers are not necessary for Thumb2, but we
5639  // need to properly enforce the restriction that the two output registers
5640  // for ldrexd must be different.
5641  BB = loopMBB;
5642  // Load
5643  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
5644                 .addReg(ARM::R2, RegState::Define)
5645                 .addReg(ARM::R3, RegState::Define).addReg(ptr));
5646  // Copy r2/r3 into dest.  (This copy will normally be coalesced.)
5647  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
5648  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
5649
5650  if (IsCmpxchg) {
5651    // Add early exit
5652    for (unsigned i = 0; i < 2; i++) {
5653      AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
5654                                                         ARM::CMPrr))
5655                     .addReg(i == 0 ? destlo : desthi)
5656                     .addReg(i == 0 ? vallo : valhi));
5657      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5658        .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5659      BB->addSuccessor(exitMBB);
5660      BB->addSuccessor(i == 0 ? contBB : cont2BB);
5661      BB = (i == 0 ? contBB : cont2BB);
5662    }
5663
5664    // Copy to physregs for strexd
5665    unsigned setlo = MI->getOperand(5).getReg();
5666    unsigned sethi = MI->getOperand(6).getReg();
5667    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
5668    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
5669  } else if (Op1) {
5670    // Perform binary operation
5671    AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
5672                   .addReg(destlo).addReg(vallo))
5673        .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
5674    AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
5675                   .addReg(desthi).addReg(valhi)).addReg(0);
5676  } else {
5677    // Copy to physregs for strexd
5678    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
5679    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
5680  }
5681
5682  // Store
5683  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
5684                 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
5685  // Cmp+jump
5686  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5687                 .addReg(storesuccess).addImm(0));
5688  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5689    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5690
5691  BB->addSuccessor(loopMBB);
5692  BB->addSuccessor(exitMBB);
5693
5694  //  exitMBB:
5695  //   ...
5696  BB = exitMBB;
5697
5698  MI->eraseFromParent();   // The instruction is gone now.
5699
5700  return BB;
5701}
5702
5703/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
5704/// registers the function context.
5705void ARMTargetLowering::
5706SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
5707                       MachineBasicBlock *DispatchBB, int FI) const {
5708  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5709  DebugLoc dl = MI->getDebugLoc();
5710  MachineFunction *MF = MBB->getParent();
5711  MachineRegisterInfo *MRI = &MF->getRegInfo();
5712  MachineConstantPool *MCP = MF->getConstantPool();
5713  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
5714  const Function *F = MF->getFunction();
5715
5716  bool isThumb = Subtarget->isThumb();
5717  bool isThumb2 = Subtarget->isThumb2();
5718
5719  unsigned PCLabelId = AFI->createPICLabelUId();
5720  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
5721  ARMConstantPoolValue *CPV =
5722    ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
5723  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
5724
5725  const TargetRegisterClass *TRC =
5726    isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5727
5728  // Grab constant pool and fixed stack memory operands.
5729  MachineMemOperand *CPMMO =
5730    MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
5731                             MachineMemOperand::MOLoad, 4, 4);
5732
5733  MachineMemOperand *FIMMOSt =
5734    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
5735                             MachineMemOperand::MOStore, 4, 4);
5736
5737  // Load the address of the dispatch MBB into the jump buffer.
5738  if (isThumb2) {
5739    // Incoming value: jbuf
5740    //   ldr.n  r5, LCPI1_1
5741    //   orr    r5, r5, #1
5742    //   add    r5, pc
5743    //   str    r5, [$jbuf, #+4] ; &jbuf[1]
5744    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5745    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
5746                   .addConstantPoolIndex(CPI)
5747                   .addMemOperand(CPMMO));
5748    // Set the low bit because of thumb mode.
5749    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5750    AddDefaultCC(
5751      AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
5752                     .addReg(NewVReg1, RegState::Kill)
5753                     .addImm(0x01)));
5754    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5755    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
5756      .addReg(NewVReg2, RegState::Kill)
5757      .addImm(PCLabelId);
5758    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
5759                   .addReg(NewVReg3, RegState::Kill)
5760                   .addFrameIndex(FI)
5761                   .addImm(36)  // &jbuf[1] :: pc
5762                   .addMemOperand(FIMMOSt));
5763  } else if (isThumb) {
5764    // Incoming value: jbuf
5765    //   ldr.n  r1, LCPI1_4
5766    //   add    r1, pc
5767    //   mov    r2, #1
5768    //   orrs   r1, r2
5769    //   add    r2, $jbuf, #+4 ; &jbuf[1]
5770    //   str    r1, [r2]
5771    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5772    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
5773                   .addConstantPoolIndex(CPI)
5774                   .addMemOperand(CPMMO));
5775    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5776    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
5777      .addReg(NewVReg1, RegState::Kill)
5778      .addImm(PCLabelId);
5779    // Set the low bit because of thumb mode.
5780    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5781    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
5782                   .addReg(ARM::CPSR, RegState::Define)
5783                   .addImm(1));
5784    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5785    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
5786                   .addReg(ARM::CPSR, RegState::Define)
5787                   .addReg(NewVReg2, RegState::Kill)
5788                   .addReg(NewVReg3, RegState::Kill));
5789    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
5790    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
5791                   .addFrameIndex(FI)
5792                   .addImm(36)); // &jbuf[1] :: pc
5793    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
5794                   .addReg(NewVReg4, RegState::Kill)
5795                   .addReg(NewVReg5, RegState::Kill)
5796                   .addImm(0)
5797                   .addMemOperand(FIMMOSt));
5798  } else {
5799    // Incoming value: jbuf
5800    //   ldr  r1, LCPI1_1
5801    //   add  r1, pc, r1
5802    //   str  r1, [$jbuf, #+4] ; &jbuf[1]
5803    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5804    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12),  NewVReg1)
5805                   .addConstantPoolIndex(CPI)
5806                   .addImm(0)
5807                   .addMemOperand(CPMMO));
5808    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5809    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
5810                   .addReg(NewVReg1, RegState::Kill)
5811                   .addImm(PCLabelId));
5812    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
5813                   .addReg(NewVReg2, RegState::Kill)
5814                   .addFrameIndex(FI)
5815                   .addImm(36)  // &jbuf[1] :: pc
5816                   .addMemOperand(FIMMOSt));
5817  }
5818}
5819
5820MachineBasicBlock *ARMTargetLowering::
5821EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
5822  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5823  DebugLoc dl = MI->getDebugLoc();
5824  MachineFunction *MF = MBB->getParent();
5825  MachineRegisterInfo *MRI = &MF->getRegInfo();
5826  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
5827  MachineFrameInfo *MFI = MF->getFrameInfo();
5828  int FI = MFI->getFunctionContextIndex();
5829
5830  const TargetRegisterClass *TRC =
5831    Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5832
5833  // Get a mapping of the call site numbers to all of the landing pads they're
5834  // associated with.
5835  DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
5836  unsigned MaxCSNum = 0;
5837  MachineModuleInfo &MMI = MF->getMMI();
5838  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
5839       ++BB) {
5840    if (!BB->isLandingPad()) continue;
5841
5842    // FIXME: We should assert that the EH_LABEL is the first MI in the landing
5843    // pad.
5844    for (MachineBasicBlock::iterator
5845           II = BB->begin(), IE = BB->end(); II != IE; ++II) {
5846      if (!II->isEHLabel()) continue;
5847
5848      MCSymbol *Sym = II->getOperand(0).getMCSymbol();
5849      if (!MMI.hasCallSiteLandingPad(Sym)) continue;
5850
5851      SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
5852      for (SmallVectorImpl<unsigned>::iterator
5853             CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
5854           CSI != CSE; ++CSI) {
5855        CallSiteNumToLPad[*CSI].push_back(BB);
5856        MaxCSNum = std::max(MaxCSNum, *CSI);
5857      }
5858      break;
5859    }
5860  }
5861
5862  // Get an ordered list of the machine basic blocks for the jump table.
5863  std::vector<MachineBasicBlock*> LPadList;
5864  SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
5865  LPadList.reserve(CallSiteNumToLPad.size());
5866  for (unsigned I = 1; I <= MaxCSNum; ++I) {
5867    SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
5868    for (SmallVectorImpl<MachineBasicBlock*>::iterator
5869           II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
5870      LPadList.push_back(*II);
5871      InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
5872    }
5873  }
5874
5875  assert(!LPadList.empty() &&
5876         "No landing pad destinations for the dispatch jump table!");
5877
5878  // Create the jump table and associated information.
5879  MachineJumpTableInfo *JTI =
5880    MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
5881  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
5882  unsigned UId = AFI->createJumpTableUId();
5883
5884  // Create the MBBs for the dispatch code.
5885
5886  // Shove the dispatch's address into the return slot in the function context.
5887  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
5888  DispatchBB->setIsLandingPad();
5889
5890  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
5891  BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
5892  DispatchBB->addSuccessor(TrapBB);
5893
5894  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
5895  DispatchBB->addSuccessor(DispContBB);
5896
5897  // Insert and MBBs.
5898  MF->insert(MF->end(), DispatchBB);
5899  MF->insert(MF->end(), DispContBB);
5900  MF->insert(MF->end(), TrapBB);
5901
5902  // Insert code into the entry block that creates and registers the function
5903  // context.
5904  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
5905
5906  MachineMemOperand *FIMMOLd =
5907    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
5908                             MachineMemOperand::MOLoad |
5909                             MachineMemOperand::MOVolatile, 4, 4);
5910
5911  if (AFI->isThumb1OnlyFunction())
5912    BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
5913  else if (!Subtarget->hasVFP2())
5914    BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
5915  else
5916    BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
5917
5918  unsigned NumLPads = LPadList.size();
5919  if (Subtarget->isThumb2()) {
5920    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5921    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
5922                   .addFrameIndex(FI)
5923                   .addImm(4)
5924                   .addMemOperand(FIMMOLd));
5925
5926    if (NumLPads < 256) {
5927      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
5928                     .addReg(NewVReg1)
5929                     .addImm(LPadList.size()));
5930    } else {
5931      unsigned VReg1 = MRI->createVirtualRegister(TRC);
5932      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
5933                     .addImm(NumLPads & 0xFFFF));
5934
5935      unsigned VReg2 = VReg1;
5936      if ((NumLPads & 0xFFFF0000) != 0) {
5937        VReg2 = MRI->createVirtualRegister(TRC);
5938        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
5939                       .addReg(VReg1)
5940                       .addImm(NumLPads >> 16));
5941      }
5942
5943      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
5944                     .addReg(NewVReg1)
5945                     .addReg(VReg2));
5946    }
5947
5948    BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
5949      .addMBB(TrapBB)
5950      .addImm(ARMCC::HI)
5951      .addReg(ARM::CPSR);
5952
5953    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5954    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
5955                   .addJumpTableIndex(MJTI)
5956                   .addImm(UId));
5957
5958    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5959    AddDefaultCC(
5960      AddDefaultPred(
5961        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
5962        .addReg(NewVReg3, RegState::Kill)
5963        .addReg(NewVReg1)
5964        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
5965
5966    BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
5967      .addReg(NewVReg4, RegState::Kill)
5968      .addReg(NewVReg1)
5969      .addJumpTableIndex(MJTI)
5970      .addImm(UId);
5971  } else if (Subtarget->isThumb()) {
5972    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5973    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
5974                   .addFrameIndex(FI)
5975                   .addImm(1)
5976                   .addMemOperand(FIMMOLd));
5977
5978    if (NumLPads < 256) {
5979      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
5980                     .addReg(NewVReg1)
5981                     .addImm(NumLPads));
5982    } else {
5983      MachineConstantPool *ConstantPool = MF->getConstantPool();
5984      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
5985      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
5986
5987      // MachineConstantPool wants an explicit alignment.
5988      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
5989      if (Align == 0)
5990        Align = getTargetData()->getTypeAllocSize(C->getType());
5991      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
5992
5993      unsigned VReg1 = MRI->createVirtualRegister(TRC);
5994      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
5995                     .addReg(VReg1, RegState::Define)
5996                     .addConstantPoolIndex(Idx));
5997      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
5998                     .addReg(NewVReg1)
5999                     .addReg(VReg1));
6000    }
6001
6002    BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
6003      .addMBB(TrapBB)
6004      .addImm(ARMCC::HI)
6005      .addReg(ARM::CPSR);
6006
6007    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6008    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
6009                   .addReg(ARM::CPSR, RegState::Define)
6010                   .addReg(NewVReg1)
6011                   .addImm(2));
6012
6013    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6014    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
6015                   .addJumpTableIndex(MJTI)
6016                   .addImm(UId));
6017
6018    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6019    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
6020                   .addReg(ARM::CPSR, RegState::Define)
6021                   .addReg(NewVReg2, RegState::Kill)
6022                   .addReg(NewVReg3));
6023
6024    MachineMemOperand *JTMMOLd =
6025      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
6026                               MachineMemOperand::MOLoad, 4, 4);
6027
6028    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6029    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
6030                   .addReg(NewVReg4, RegState::Kill)
6031                   .addImm(0)
6032                   .addMemOperand(JTMMOLd));
6033
6034    unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
6035    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
6036                   .addReg(ARM::CPSR, RegState::Define)
6037                   .addReg(NewVReg5, RegState::Kill)
6038                   .addReg(NewVReg3));
6039
6040    BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
6041      .addReg(NewVReg6, RegState::Kill)
6042      .addJumpTableIndex(MJTI)
6043      .addImm(UId);
6044  } else {
6045    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6046    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
6047                   .addFrameIndex(FI)
6048                   .addImm(4)
6049                   .addMemOperand(FIMMOLd));
6050
6051    if (NumLPads < 256) {
6052      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
6053                     .addReg(NewVReg1)
6054                     .addImm(NumLPads));
6055    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
6056      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6057      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
6058                     .addImm(NumLPads & 0xFFFF));
6059
6060      unsigned VReg2 = VReg1;
6061      if ((NumLPads & 0xFFFF0000) != 0) {
6062        VReg2 = MRI->createVirtualRegister(TRC);
6063        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
6064                       .addReg(VReg1)
6065                       .addImm(NumLPads >> 16));
6066      }
6067
6068      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
6069                     .addReg(NewVReg1)
6070                     .addReg(VReg2));
6071    } else {
6072      MachineConstantPool *ConstantPool = MF->getConstantPool();
6073      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6074      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
6075
6076      // MachineConstantPool wants an explicit alignment.
6077      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
6078      if (Align == 0)
6079        Align = getTargetData()->getTypeAllocSize(C->getType());
6080      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6081
6082      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6083      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
6084                     .addReg(VReg1, RegState::Define)
6085                     .addConstantPoolIndex(Idx)
6086                     .addImm(0));
6087      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
6088                     .addReg(NewVReg1)
6089                     .addReg(VReg1, RegState::Kill));
6090    }
6091
6092    BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
6093      .addMBB(TrapBB)
6094      .addImm(ARMCC::HI)
6095      .addReg(ARM::CPSR);
6096
6097    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6098    AddDefaultCC(
6099      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
6100                     .addReg(NewVReg1)
6101                     .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
6102    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6103    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
6104                   .addJumpTableIndex(MJTI)
6105                   .addImm(UId));
6106
6107    MachineMemOperand *JTMMOLd =
6108      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
6109                               MachineMemOperand::MOLoad, 4, 4);
6110    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6111    AddDefaultPred(
6112      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
6113      .addReg(NewVReg3, RegState::Kill)
6114      .addReg(NewVReg4)
6115      .addImm(0)
6116      .addMemOperand(JTMMOLd));
6117
6118    BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
6119      .addReg(NewVReg5, RegState::Kill)
6120      .addReg(NewVReg4)
6121      .addJumpTableIndex(MJTI)
6122      .addImm(UId);
6123  }
6124
6125  // Add the jump table entries as successors to the MBB.
6126  MachineBasicBlock *PrevMBB = 0;
6127  for (std::vector<MachineBasicBlock*>::iterator
6128         I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
6129    MachineBasicBlock *CurMBB = *I;
6130    if (PrevMBB != CurMBB)
6131      DispContBB->addSuccessor(CurMBB);
6132    PrevMBB = CurMBB;
6133  }
6134
6135  // N.B. the order the invoke BBs are processed in doesn't matter here.
6136  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
6137  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
6138  const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
6139  SmallVector<MachineBasicBlock*, 64> MBBLPads;
6140  for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
6141         I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
6142    MachineBasicBlock *BB = *I;
6143
6144    // Remove the landing pad successor from the invoke block and replace it
6145    // with the new dispatch block.
6146    SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
6147                                                  BB->succ_end());
6148    while (!Successors.empty()) {
6149      MachineBasicBlock *SMBB = Successors.pop_back_val();
6150      if (SMBB->isLandingPad()) {
6151        BB->removeSuccessor(SMBB);
6152        MBBLPads.push_back(SMBB);
6153      }
6154    }
6155
6156    BB->addSuccessor(DispatchBB);
6157
6158    // Find the invoke call and mark all of the callee-saved registers as
6159    // 'implicit defined' so that they're spilled. This prevents code from
6160    // moving instructions to before the EH block, where they will never be
6161    // executed.
6162    for (MachineBasicBlock::reverse_iterator
6163           II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
6164      if (!II->isCall()) continue;
6165
6166      DenseMap<unsigned, bool> DefRegs;
6167      for (MachineInstr::mop_iterator
6168             OI = II->operands_begin(), OE = II->operands_end();
6169           OI != OE; ++OI) {
6170        if (!OI->isReg()) continue;
6171        DefRegs[OI->getReg()] = true;
6172      }
6173
6174      MachineInstrBuilder MIB(&*II);
6175
6176      for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
6177        unsigned Reg = SavedRegs[i];
6178        if (Subtarget->isThumb2() &&
6179            !ARM::tGPRRegisterClass->contains(Reg) &&
6180            !ARM::hGPRRegisterClass->contains(Reg))
6181          continue;
6182        else if (Subtarget->isThumb1Only() &&
6183                 !ARM::tGPRRegisterClass->contains(Reg))
6184          continue;
6185        else if (!Subtarget->isThumb() &&
6186                 !ARM::GPRRegisterClass->contains(Reg))
6187          continue;
6188        if (!DefRegs[Reg])
6189          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
6190      }
6191
6192      break;
6193    }
6194  }
6195
6196  // Mark all former landing pads as non-landing pads. The dispatch is the only
6197  // landing pad now.
6198  for (SmallVectorImpl<MachineBasicBlock*>::iterator
6199         I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
6200    (*I)->setIsLandingPad(false);
6201
6202  // The instruction is gone now.
6203  MI->eraseFromParent();
6204
6205  return MBB;
6206}
6207
6208static
6209MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
6210  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
6211       E = MBB->succ_end(); I != E; ++I)
6212    if (*I != Succ)
6213      return *I;
6214  llvm_unreachable("Expecting a BB with two successors!");
6215}
6216
6217MachineBasicBlock *
6218ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
6219                                               MachineBasicBlock *BB) const {
6220  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6221  DebugLoc dl = MI->getDebugLoc();
6222  bool isThumb2 = Subtarget->isThumb2();
6223  switch (MI->getOpcode()) {
6224  default: {
6225    MI->dump();
6226    llvm_unreachable("Unexpected instr type to insert");
6227  }
6228  // The Thumb2 pre-indexed stores have the same MI operands, they just
6229  // define them differently in the .td files from the isel patterns, so
6230  // they need pseudos.
6231  case ARM::t2STR_preidx:
6232    MI->setDesc(TII->get(ARM::t2STR_PRE));
6233    return BB;
6234  case ARM::t2STRB_preidx:
6235    MI->setDesc(TII->get(ARM::t2STRB_PRE));
6236    return BB;
6237  case ARM::t2STRH_preidx:
6238    MI->setDesc(TII->get(ARM::t2STRH_PRE));
6239    return BB;
6240
6241  case ARM::STRi_preidx:
6242  case ARM::STRBi_preidx: {
6243    unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
6244      ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
6245    // Decode the offset.
6246    unsigned Offset = MI->getOperand(4).getImm();
6247    bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
6248    Offset = ARM_AM::getAM2Offset(Offset);
6249    if (isSub)
6250      Offset = -Offset;
6251
6252    MachineMemOperand *MMO = *MI->memoperands_begin();
6253    BuildMI(*BB, MI, dl, TII->get(NewOpc))
6254      .addOperand(MI->getOperand(0))  // Rn_wb
6255      .addOperand(MI->getOperand(1))  // Rt
6256      .addOperand(MI->getOperand(2))  // Rn
6257      .addImm(Offset)                 // offset (skip GPR==zero_reg)
6258      .addOperand(MI->getOperand(5))  // pred
6259      .addOperand(MI->getOperand(6))
6260      .addMemOperand(MMO);
6261    MI->eraseFromParent();
6262    return BB;
6263  }
6264  case ARM::STRr_preidx:
6265  case ARM::STRBr_preidx:
6266  case ARM::STRH_preidx: {
6267    unsigned NewOpc;
6268    switch (MI->getOpcode()) {
6269    default: llvm_unreachable("unexpected opcode!");
6270    case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
6271    case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
6272    case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
6273    }
6274    MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
6275    for (unsigned i = 0; i < MI->getNumOperands(); ++i)
6276      MIB.addOperand(MI->getOperand(i));
6277    MI->eraseFromParent();
6278    return BB;
6279  }
6280  case ARM::ATOMIC_LOAD_ADD_I8:
6281     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6282  case ARM::ATOMIC_LOAD_ADD_I16:
6283     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6284  case ARM::ATOMIC_LOAD_ADD_I32:
6285     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6286
6287  case ARM::ATOMIC_LOAD_AND_I8:
6288     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6289  case ARM::ATOMIC_LOAD_AND_I16:
6290     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6291  case ARM::ATOMIC_LOAD_AND_I32:
6292     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6293
6294  case ARM::ATOMIC_LOAD_OR_I8:
6295     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6296  case ARM::ATOMIC_LOAD_OR_I16:
6297     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6298  case ARM::ATOMIC_LOAD_OR_I32:
6299     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6300
6301  case ARM::ATOMIC_LOAD_XOR_I8:
6302     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6303  case ARM::ATOMIC_LOAD_XOR_I16:
6304     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6305  case ARM::ATOMIC_LOAD_XOR_I32:
6306     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6307
6308  case ARM::ATOMIC_LOAD_NAND_I8:
6309     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6310  case ARM::ATOMIC_LOAD_NAND_I16:
6311     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6312  case ARM::ATOMIC_LOAD_NAND_I32:
6313     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6314
6315  case ARM::ATOMIC_LOAD_SUB_I8:
6316     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6317  case ARM::ATOMIC_LOAD_SUB_I16:
6318     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6319  case ARM::ATOMIC_LOAD_SUB_I32:
6320     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6321
6322  case ARM::ATOMIC_LOAD_MIN_I8:
6323     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
6324  case ARM::ATOMIC_LOAD_MIN_I16:
6325     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
6326  case ARM::ATOMIC_LOAD_MIN_I32:
6327     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
6328
6329  case ARM::ATOMIC_LOAD_MAX_I8:
6330     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
6331  case ARM::ATOMIC_LOAD_MAX_I16:
6332     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
6333  case ARM::ATOMIC_LOAD_MAX_I32:
6334     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
6335
6336  case ARM::ATOMIC_LOAD_UMIN_I8:
6337     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
6338  case ARM::ATOMIC_LOAD_UMIN_I16:
6339     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
6340  case ARM::ATOMIC_LOAD_UMIN_I32:
6341     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
6342
6343  case ARM::ATOMIC_LOAD_UMAX_I8:
6344     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
6345  case ARM::ATOMIC_LOAD_UMAX_I16:
6346     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
6347  case ARM::ATOMIC_LOAD_UMAX_I32:
6348     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
6349
6350  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
6351  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
6352  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
6353
6354  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
6355  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
6356  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
6357
6358
6359  case ARM::ATOMADD6432:
6360    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
6361                              isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
6362                              /*NeedsCarry*/ true);
6363  case ARM::ATOMSUB6432:
6364    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
6365                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
6366                              /*NeedsCarry*/ true);
6367  case ARM::ATOMOR6432:
6368    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
6369                              isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6370  case ARM::ATOMXOR6432:
6371    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
6372                              isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6373  case ARM::ATOMAND6432:
6374    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
6375                              isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6376  case ARM::ATOMSWAP6432:
6377    return EmitAtomicBinary64(MI, BB, 0, 0, false);
6378  case ARM::ATOMCMPXCHG6432:
6379    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
6380                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
6381                              /*NeedsCarry*/ false, /*IsCmpxchg*/true);
6382
6383  case ARM::tMOVCCr_pseudo: {
6384    // To "insert" a SELECT_CC instruction, we actually have to insert the
6385    // diamond control-flow pattern.  The incoming instruction knows the
6386    // destination vreg to set, the condition code register to branch on, the
6387    // true/false values to select between, and a branch opcode to use.
6388    const BasicBlock *LLVM_BB = BB->getBasicBlock();
6389    MachineFunction::iterator It = BB;
6390    ++It;
6391
6392    //  thisMBB:
6393    //  ...
6394    //   TrueVal = ...
6395    //   cmpTY ccX, r1, r2
6396    //   bCC copy1MBB
6397    //   fallthrough --> copy0MBB
6398    MachineBasicBlock *thisMBB  = BB;
6399    MachineFunction *F = BB->getParent();
6400    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
6401    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
6402    F->insert(It, copy0MBB);
6403    F->insert(It, sinkMBB);
6404
6405    // Transfer the remainder of BB and its successor edges to sinkMBB.
6406    sinkMBB->splice(sinkMBB->begin(), BB,
6407                    llvm::next(MachineBasicBlock::iterator(MI)),
6408                    BB->end());
6409    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
6410
6411    BB->addSuccessor(copy0MBB);
6412    BB->addSuccessor(sinkMBB);
6413
6414    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
6415      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
6416
6417    //  copy0MBB:
6418    //   %FalseValue = ...
6419    //   # fallthrough to sinkMBB
6420    BB = copy0MBB;
6421
6422    // Update machine-CFG edges
6423    BB->addSuccessor(sinkMBB);
6424
6425    //  sinkMBB:
6426    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
6427    //  ...
6428    BB = sinkMBB;
6429    BuildMI(*BB, BB->begin(), dl,
6430            TII->get(ARM::PHI), MI->getOperand(0).getReg())
6431      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
6432      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
6433
6434    MI->eraseFromParent();   // The pseudo instruction is gone now.
6435    return BB;
6436  }
6437
6438  case ARM::BCCi64:
6439  case ARM::BCCZi64: {
6440    // If there is an unconditional branch to the other successor, remove it.
6441    BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
6442
6443    // Compare both parts that make up the double comparison separately for
6444    // equality.
6445    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
6446
6447    unsigned LHS1 = MI->getOperand(1).getReg();
6448    unsigned LHS2 = MI->getOperand(2).getReg();
6449    if (RHSisZero) {
6450      AddDefaultPred(BuildMI(BB, dl,
6451                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6452                     .addReg(LHS1).addImm(0));
6453      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6454        .addReg(LHS2).addImm(0)
6455        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
6456    } else {
6457      unsigned RHS1 = MI->getOperand(3).getReg();
6458      unsigned RHS2 = MI->getOperand(4).getReg();
6459      AddDefaultPred(BuildMI(BB, dl,
6460                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
6461                     .addReg(LHS1).addReg(RHS1));
6462      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
6463        .addReg(LHS2).addReg(RHS2)
6464        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
6465    }
6466
6467    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
6468    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
6469    if (MI->getOperand(0).getImm() == ARMCC::NE)
6470      std::swap(destMBB, exitMBB);
6471
6472    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
6473      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
6474    if (isThumb2)
6475      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
6476    else
6477      BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
6478
6479    MI->eraseFromParent();   // The pseudo instruction is gone now.
6480    return BB;
6481  }
6482
6483  case ARM::Int_eh_sjlj_setjmp:
6484  case ARM::Int_eh_sjlj_setjmp_nofp:
6485  case ARM::tInt_eh_sjlj_setjmp:
6486  case ARM::t2Int_eh_sjlj_setjmp:
6487  case ARM::t2Int_eh_sjlj_setjmp_nofp:
6488    EmitSjLjDispatchBlock(MI, BB);
6489    return BB;
6490
6491  case ARM::ABS:
6492  case ARM::t2ABS: {
6493    // To insert an ABS instruction, we have to insert the
6494    // diamond control-flow pattern.  The incoming instruction knows the
6495    // source vreg to test against 0, the destination vreg to set,
6496    // the condition code register to branch on, the
6497    // true/false values to select between, and a branch opcode to use.
6498    // It transforms
6499    //     V1 = ABS V0
6500    // into
6501    //     V2 = MOVS V0
6502    //     BCC                      (branch to SinkBB if V0 >= 0)
6503    //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
6504    //     SinkBB: V1 = PHI(V2, V3)
6505    const BasicBlock *LLVM_BB = BB->getBasicBlock();
6506    MachineFunction::iterator BBI = BB;
6507    ++BBI;
6508    MachineFunction *Fn = BB->getParent();
6509    MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
6510    MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
6511    Fn->insert(BBI, RSBBB);
6512    Fn->insert(BBI, SinkBB);
6513
6514    unsigned int ABSSrcReg = MI->getOperand(1).getReg();
6515    unsigned int ABSDstReg = MI->getOperand(0).getReg();
6516    bool isThumb2 = Subtarget->isThumb2();
6517    MachineRegisterInfo &MRI = Fn->getRegInfo();
6518    // In Thumb mode S must not be specified if source register is the SP or
6519    // PC and if destination register is the SP, so restrict register class
6520    unsigned NewMovDstReg = MRI.createVirtualRegister(
6521      isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
6522    unsigned NewRsbDstReg = MRI.createVirtualRegister(
6523      isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
6524
6525    // Transfer the remainder of BB and its successor edges to sinkMBB.
6526    SinkBB->splice(SinkBB->begin(), BB,
6527      llvm::next(MachineBasicBlock::iterator(MI)),
6528      BB->end());
6529    SinkBB->transferSuccessorsAndUpdatePHIs(BB);
6530
6531    BB->addSuccessor(RSBBB);
6532    BB->addSuccessor(SinkBB);
6533
6534    // fall through to SinkMBB
6535    RSBBB->addSuccessor(SinkBB);
6536
6537    // insert a movs at the end of BB
6538    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
6539      NewMovDstReg)
6540      .addReg(ABSSrcReg, RegState::Kill)
6541      .addImm((unsigned)ARMCC::AL).addReg(0)
6542      .addReg(ARM::CPSR, RegState::Define);
6543
6544    // insert a bcc with opposite CC to ARMCC::MI at the end of BB
6545    BuildMI(BB, dl,
6546      TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
6547      .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
6548
6549    // insert rsbri in RSBBB
6550    // Note: BCC and rsbri will be converted into predicated rsbmi
6551    // by if-conversion pass
6552    BuildMI(*RSBBB, RSBBB->begin(), dl,
6553      TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
6554      .addReg(NewMovDstReg, RegState::Kill)
6555      .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
6556
6557    // insert PHI in SinkBB,
6558    // reuse ABSDstReg to not change uses of ABS instruction
6559    BuildMI(*SinkBB, SinkBB->begin(), dl,
6560      TII->get(ARM::PHI), ABSDstReg)
6561      .addReg(NewRsbDstReg).addMBB(RSBBB)
6562      .addReg(NewMovDstReg).addMBB(BB);
6563
6564    // remove ABS instruction
6565    MI->eraseFromParent();
6566
6567    // return last added BB
6568    return SinkBB;
6569  }
6570  }
6571}
6572
6573void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
6574                                                      SDNode *Node) const {
6575  if (!MI->hasPostISelHook()) {
6576    assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
6577           "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
6578    return;
6579  }
6580
6581  const MCInstrDesc *MCID = &MI->getDesc();
6582  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
6583  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
6584  // operand is still set to noreg. If needed, set the optional operand's
6585  // register to CPSR, and remove the redundant implicit def.
6586  //
6587  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
6588
6589  // Rename pseudo opcodes.
6590  unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
6591  if (NewOpc) {
6592    const ARMBaseInstrInfo *TII =
6593      static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
6594    MCID = &TII->get(NewOpc);
6595
6596    assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
6597           "converted opcode should be the same except for cc_out");
6598
6599    MI->setDesc(*MCID);
6600
6601    // Add the optional cc_out operand
6602    MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
6603  }
6604  unsigned ccOutIdx = MCID->getNumOperands() - 1;
6605
6606  // Any ARM instruction that sets the 's' bit should specify an optional
6607  // "cc_out" operand in the last operand position.
6608  if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
6609    assert(!NewOpc && "Optional cc_out operand required");
6610    return;
6611  }
6612  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
6613  // since we already have an optional CPSR def.
6614  bool definesCPSR = false;
6615  bool deadCPSR = false;
6616  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
6617       i != e; ++i) {
6618    const MachineOperand &MO = MI->getOperand(i);
6619    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
6620      definesCPSR = true;
6621      if (MO.isDead())
6622        deadCPSR = true;
6623      MI->RemoveOperand(i);
6624      break;
6625    }
6626  }
6627  if (!definesCPSR) {
6628    assert(!NewOpc && "Optional cc_out operand required");
6629    return;
6630  }
6631  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
6632  if (deadCPSR) {
6633    assert(!MI->getOperand(ccOutIdx).getReg() &&
6634           "expect uninitialized optional cc_out operand");
6635    return;
6636  }
6637
6638  // If this instruction was defined with an optional CPSR def and its dag node
6639  // had a live implicit CPSR def, then activate the optional CPSR def.
6640  MachineOperand &MO = MI->getOperand(ccOutIdx);
6641  MO.setReg(ARM::CPSR);
6642  MO.setIsDef(true);
6643}
6644
6645//===----------------------------------------------------------------------===//
6646//                           ARM Optimization Hooks
6647//===----------------------------------------------------------------------===//
6648
6649static
6650SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
6651                            TargetLowering::DAGCombinerInfo &DCI) {
6652  SelectionDAG &DAG = DCI.DAG;
6653  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6654  EVT VT = N->getValueType(0);
6655  unsigned Opc = N->getOpcode();
6656  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
6657  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
6658  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
6659  ISD::CondCode CC = ISD::SETCC_INVALID;
6660
6661  if (isSlctCC) {
6662    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
6663  } else {
6664    SDValue CCOp = Slct.getOperand(0);
6665    if (CCOp.getOpcode() == ISD::SETCC)
6666      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
6667  }
6668
6669  bool DoXform = false;
6670  bool InvCC = false;
6671  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
6672          "Bad input!");
6673
6674  if (LHS.getOpcode() == ISD::Constant &&
6675      cast<ConstantSDNode>(LHS)->isNullValue()) {
6676    DoXform = true;
6677  } else if (CC != ISD::SETCC_INVALID &&
6678             RHS.getOpcode() == ISD::Constant &&
6679             cast<ConstantSDNode>(RHS)->isNullValue()) {
6680    std::swap(LHS, RHS);
6681    SDValue Op0 = Slct.getOperand(0);
6682    EVT OpVT = isSlctCC ? Op0.getValueType() :
6683                          Op0.getOperand(0).getValueType();
6684    bool isInt = OpVT.isInteger();
6685    CC = ISD::getSetCCInverse(CC, isInt);
6686
6687    if (!TLI.isCondCodeLegal(CC, OpVT))
6688      return SDValue();         // Inverse operator isn't legal.
6689
6690    DoXform = true;
6691    InvCC = true;
6692  }
6693
6694  if (DoXform) {
6695    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
6696    if (isSlctCC)
6697      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
6698                             Slct.getOperand(0), Slct.getOperand(1), CC);
6699    SDValue CCOp = Slct.getOperand(0);
6700    if (InvCC)
6701      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
6702                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
6703    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
6704                       CCOp, OtherOp, Result);
6705  }
6706  return SDValue();
6707}
6708
6709// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
6710// (only after legalization).
6711static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
6712                                 TargetLowering::DAGCombinerInfo &DCI,
6713                                 const ARMSubtarget *Subtarget) {
6714
6715  // Only perform optimization if after legalize, and if NEON is available. We
6716  // also expected both operands to be BUILD_VECTORs.
6717  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
6718      || N0.getOpcode() != ISD::BUILD_VECTOR
6719      || N1.getOpcode() != ISD::BUILD_VECTOR)
6720    return SDValue();
6721
6722  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
6723  EVT VT = N->getValueType(0);
6724  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
6725    return SDValue();
6726
6727  // Check that the vector operands are of the right form.
6728  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
6729  // operands, where N is the size of the formed vector.
6730  // Each EXTRACT_VECTOR should have the same input vector and odd or even
6731  // index such that we have a pair wise add pattern.
6732
6733  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
6734  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6735    return SDValue();
6736  SDValue Vec = N0->getOperand(0)->getOperand(0);
6737  SDNode *V = Vec.getNode();
6738  unsigned nextIndex = 0;
6739
6740  // For each operands to the ADD which are BUILD_VECTORs,
6741  // check to see if each of their operands are an EXTRACT_VECTOR with
6742  // the same vector and appropriate index.
6743  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
6744    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
6745        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
6746
6747      SDValue ExtVec0 = N0->getOperand(i);
6748      SDValue ExtVec1 = N1->getOperand(i);
6749
6750      // First operand is the vector, verify its the same.
6751      if (V != ExtVec0->getOperand(0).getNode() ||
6752          V != ExtVec1->getOperand(0).getNode())
6753        return SDValue();
6754
6755      // Second is the constant, verify its correct.
6756      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
6757      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
6758
6759      // For the constant, we want to see all the even or all the odd.
6760      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
6761          || C1->getZExtValue() != nextIndex+1)
6762        return SDValue();
6763
6764      // Increment index.
6765      nextIndex+=2;
6766    } else
6767      return SDValue();
6768  }
6769
6770  // Create VPADDL node.
6771  SelectionDAG &DAG = DCI.DAG;
6772  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6773
6774  // Build operand list.
6775  SmallVector<SDValue, 8> Ops;
6776  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
6777                                TLI.getPointerTy()));
6778
6779  // Input is the vector.
6780  Ops.push_back(Vec);
6781
6782  // Get widened type and narrowed type.
6783  MVT widenType;
6784  unsigned numElem = VT.getVectorNumElements();
6785  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
6786    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
6787    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
6788    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
6789    default:
6790      llvm_unreachable("Invalid vector element type for padd optimization.");
6791  }
6792
6793  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
6794                            widenType, &Ops[0], Ops.size());
6795  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
6796}
6797
6798/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
6799/// operands N0 and N1.  This is a helper for PerformADDCombine that is
6800/// called with the default operands, and if that fails, with commuted
6801/// operands.
6802static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
6803                                          TargetLowering::DAGCombinerInfo &DCI,
6804                                          const ARMSubtarget *Subtarget){
6805
6806  // Attempt to create vpaddl for this add.
6807  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
6808  if (Result.getNode())
6809    return Result;
6810
6811  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
6812  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
6813    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
6814    if (Result.getNode()) return Result;
6815  }
6816  return SDValue();
6817}
6818
6819/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
6820///
6821static SDValue PerformADDCombine(SDNode *N,
6822                                 TargetLowering::DAGCombinerInfo &DCI,
6823                                 const ARMSubtarget *Subtarget) {
6824  SDValue N0 = N->getOperand(0);
6825  SDValue N1 = N->getOperand(1);
6826
6827  // First try with the default operand order.
6828  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
6829  if (Result.getNode())
6830    return Result;
6831
6832  // If that didn't work, try again with the operands commuted.
6833  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
6834}
6835
6836/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
6837///
6838static SDValue PerformSUBCombine(SDNode *N,
6839                                 TargetLowering::DAGCombinerInfo &DCI) {
6840  SDValue N0 = N->getOperand(0);
6841  SDValue N1 = N->getOperand(1);
6842
6843  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
6844  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
6845    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
6846    if (Result.getNode()) return Result;
6847  }
6848
6849  return SDValue();
6850}
6851
6852/// PerformVMULCombine
6853/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
6854/// special multiplier accumulator forwarding.
6855///   vmul d3, d0, d2
6856///   vmla d3, d1, d2
6857/// is faster than
6858///   vadd d3, d0, d1
6859///   vmul d3, d3, d2
6860static SDValue PerformVMULCombine(SDNode *N,
6861                                  TargetLowering::DAGCombinerInfo &DCI,
6862                                  const ARMSubtarget *Subtarget) {
6863  if (!Subtarget->hasVMLxForwarding())
6864    return SDValue();
6865
6866  SelectionDAG &DAG = DCI.DAG;
6867  SDValue N0 = N->getOperand(0);
6868  SDValue N1 = N->getOperand(1);
6869  unsigned Opcode = N0.getOpcode();
6870  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
6871      Opcode != ISD::FADD && Opcode != ISD::FSUB) {
6872    Opcode = N1.getOpcode();
6873    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
6874        Opcode != ISD::FADD && Opcode != ISD::FSUB)
6875      return SDValue();
6876    std::swap(N0, N1);
6877  }
6878
6879  EVT VT = N->getValueType(0);
6880  DebugLoc DL = N->getDebugLoc();
6881  SDValue N00 = N0->getOperand(0);
6882  SDValue N01 = N0->getOperand(1);
6883  return DAG.getNode(Opcode, DL, VT,
6884                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),
6885                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));
6886}
6887
6888static SDValue PerformMULCombine(SDNode *N,
6889                                 TargetLowering::DAGCombinerInfo &DCI,
6890                                 const ARMSubtarget *Subtarget) {
6891  SelectionDAG &DAG = DCI.DAG;
6892
6893  if (Subtarget->isThumb1Only())
6894    return SDValue();
6895
6896  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
6897    return SDValue();
6898
6899  EVT VT = N->getValueType(0);
6900  if (VT.is64BitVector() || VT.is128BitVector())
6901    return PerformVMULCombine(N, DCI, Subtarget);
6902  if (VT != MVT::i32)
6903    return SDValue();
6904
6905  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
6906  if (!C)
6907    return SDValue();
6908
6909  int64_t MulAmt = C->getSExtValue();
6910  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
6911
6912  ShiftAmt = ShiftAmt & (32 - 1);
6913  SDValue V = N->getOperand(0);
6914  DebugLoc DL = N->getDebugLoc();
6915
6916  SDValue Res;
6917  MulAmt >>= ShiftAmt;
6918
6919  if (MulAmt >= 0) {
6920    if (isPowerOf2_32(MulAmt - 1)) {
6921      // (mul x, 2^N + 1) => (add (shl x, N), x)
6922      Res = DAG.getNode(ISD::ADD, DL, VT,
6923                        V,
6924                        DAG.getNode(ISD::SHL, DL, VT,
6925                                    V,
6926                                    DAG.getConstant(Log2_32(MulAmt - 1),
6927                                                    MVT::i32)));
6928    } else if (isPowerOf2_32(MulAmt + 1)) {
6929      // (mul x, 2^N - 1) => (sub (shl x, N), x)
6930      Res = DAG.getNode(ISD::SUB, DL, VT,
6931                        DAG.getNode(ISD::SHL, DL, VT,
6932                                    V,
6933                                    DAG.getConstant(Log2_32(MulAmt + 1),
6934                                                    MVT::i32)),
6935                        V);
6936    } else
6937      return SDValue();
6938  } else {
6939    uint64_t MulAmtAbs = -MulAmt;
6940    if (isPowerOf2_32(MulAmtAbs + 1)) {
6941      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
6942      Res = DAG.getNode(ISD::SUB, DL, VT,
6943                        V,
6944                        DAG.getNode(ISD::SHL, DL, VT,
6945                                    V,
6946                                    DAG.getConstant(Log2_32(MulAmtAbs + 1),
6947                                                    MVT::i32)));
6948    } else if (isPowerOf2_32(MulAmtAbs - 1)) {
6949      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
6950      Res = DAG.getNode(ISD::ADD, DL, VT,
6951                        V,
6952                        DAG.getNode(ISD::SHL, DL, VT,
6953                                    V,
6954                                    DAG.getConstant(Log2_32(MulAmtAbs-1),
6955                                                    MVT::i32)));
6956      Res = DAG.getNode(ISD::SUB, DL, VT,
6957                        DAG.getConstant(0, MVT::i32),Res);
6958
6959    } else
6960      return SDValue();
6961  }
6962
6963  if (ShiftAmt != 0)
6964    Res = DAG.getNode(ISD::SHL, DL, VT,
6965                      Res, DAG.getConstant(ShiftAmt, MVT::i32));
6966
6967  // Do not add new nodes to DAG combiner worklist.
6968  DCI.CombineTo(N, Res, false);
6969  return SDValue();
6970}
6971
6972static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
6973  if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
6974    return false;
6975
6976  SDValue FalseVal = N.getOperand(0);
6977  ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
6978  if (!C)
6979    return false;
6980  if (AllOnes)
6981    return C->isAllOnesValue();
6982  return C->isNullValue();
6983}
6984
6985/// formConditionalOp - Combine an operation with a conditional move operand
6986/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
6987/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
6988static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
6989                                 bool Commutable) {
6990  SDValue N0 = N->getOperand(0);
6991  SDValue N1 = N->getOperand(1);
6992
6993  bool isAND = N->getOpcode() == ISD::AND;
6994  bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
6995  if (!isCand && Commutable) {
6996    isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
6997    if (isCand)
6998      std::swap(N0, N1);
6999  }
7000  if (!isCand)
7001    return SDValue();
7002
7003  unsigned Opc = 0;
7004  switch (N->getOpcode()) {
7005  default: llvm_unreachable("Unexpected node");
7006  case ISD::AND: Opc = ARMISD::CAND; break;
7007  case ISD::OR:  Opc = ARMISD::COR; break;
7008  case ISD::XOR: Opc = ARMISD::CXOR; break;
7009  }
7010  return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
7011                     N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
7012                     N1.getOperand(4));
7013}
7014
7015static SDValue PerformANDCombine(SDNode *N,
7016                                 TargetLowering::DAGCombinerInfo &DCI,
7017                                 const ARMSubtarget *Subtarget) {
7018
7019  // Attempt to use immediate-form VBIC
7020  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
7021  DebugLoc dl = N->getDebugLoc();
7022  EVT VT = N->getValueType(0);
7023  SelectionDAG &DAG = DCI.DAG;
7024
7025  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7026    return SDValue();
7027
7028  APInt SplatBits, SplatUndef;
7029  unsigned SplatBitSize;
7030  bool HasAnyUndefs;
7031  if (BVN &&
7032      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7033    if (SplatBitSize <= 64) {
7034      EVT VbicVT;
7035      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
7036                                      SplatUndef.getZExtValue(), SplatBitSize,
7037                                      DAG, VbicVT, VT.is128BitVector(),
7038                                      OtherModImm);
7039      if (Val.getNode()) {
7040        SDValue Input =
7041          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
7042        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
7043        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
7044      }
7045    }
7046  }
7047
7048  if (!Subtarget->isThumb1Only()) {
7049    // (and x, (cmov -1, y, cond)) => (and.cond x, y)
7050    SDValue CAND = formConditionalOp(N, DAG, true);
7051    if (CAND.getNode())
7052      return CAND;
7053  }
7054
7055  return SDValue();
7056}
7057
7058/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
7059static SDValue PerformORCombine(SDNode *N,
7060                                TargetLowering::DAGCombinerInfo &DCI,
7061                                const ARMSubtarget *Subtarget) {
7062  // Attempt to use immediate-form VORR
7063  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
7064  DebugLoc dl = N->getDebugLoc();
7065  EVT VT = N->getValueType(0);
7066  SelectionDAG &DAG = DCI.DAG;
7067
7068  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7069    return SDValue();
7070
7071  APInt SplatBits, SplatUndef;
7072  unsigned SplatBitSize;
7073  bool HasAnyUndefs;
7074  if (BVN && Subtarget->hasNEON() &&
7075      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7076    if (SplatBitSize <= 64) {
7077      EVT VorrVT;
7078      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
7079                                      SplatUndef.getZExtValue(), SplatBitSize,
7080                                      DAG, VorrVT, VT.is128BitVector(),
7081                                      OtherModImm);
7082      if (Val.getNode()) {
7083        SDValue Input =
7084          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
7085        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
7086        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
7087      }
7088    }
7089  }
7090
7091  if (!Subtarget->isThumb1Only()) {
7092    // (or x, (cmov 0, y, cond)) => (or.cond x, y)
7093    SDValue COR = formConditionalOp(N, DAG, true);
7094    if (COR.getNode())
7095      return COR;
7096  }
7097
7098  SDValue N0 = N->getOperand(0);
7099  if (N0.getOpcode() != ISD::AND)
7100    return SDValue();
7101  SDValue N1 = N->getOperand(1);
7102
7103  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
7104  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
7105      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
7106    APInt SplatUndef;
7107    unsigned SplatBitSize;
7108    bool HasAnyUndefs;
7109
7110    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
7111    APInt SplatBits0;
7112    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
7113                                  HasAnyUndefs) && !HasAnyUndefs) {
7114      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
7115      APInt SplatBits1;
7116      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
7117                                    HasAnyUndefs) && !HasAnyUndefs &&
7118          SplatBits0 == ~SplatBits1) {
7119        // Canonicalize the vector type to make instruction selection simpler.
7120        EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
7121        SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
7122                                     N0->getOperand(1), N0->getOperand(0),
7123                                     N1->getOperand(0));
7124        return DAG.getNode(ISD::BITCAST, dl, VT, Result);
7125      }
7126    }
7127  }
7128
7129  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
7130  // reasonable.
7131
7132  // BFI is only available on V6T2+
7133  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
7134    return SDValue();
7135
7136  DebugLoc DL = N->getDebugLoc();
7137  // 1) or (and A, mask), val => ARMbfi A, val, mask
7138  //      iff (val & mask) == val
7139  //
7140  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
7141  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
7142  //          && mask == ~mask2
7143  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
7144  //          && ~mask == mask2
7145  //  (i.e., copy a bitfield value into another bitfield of the same width)
7146
7147  if (VT != MVT::i32)
7148    return SDValue();
7149
7150  SDValue N00 = N0.getOperand(0);
7151
7152  // The value and the mask need to be constants so we can verify this is
7153  // actually a bitfield set. If the mask is 0xffff, we can do better
7154  // via a movt instruction, so don't use BFI in that case.
7155  SDValue MaskOp = N0.getOperand(1);
7156  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
7157  if (!MaskC)
7158    return SDValue();
7159  unsigned Mask = MaskC->getZExtValue();
7160  if (Mask == 0xffff)
7161    return SDValue();
7162  SDValue Res;
7163  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
7164  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7165  if (N1C) {
7166    unsigned Val = N1C->getZExtValue();
7167    if ((Val & ~Mask) != Val)
7168      return SDValue();
7169
7170    if (ARM::isBitFieldInvertedMask(Mask)) {
7171      Val >>= CountTrailingZeros_32(~Mask);
7172
7173      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
7174                        DAG.getConstant(Val, MVT::i32),
7175                        DAG.getConstant(Mask, MVT::i32));
7176
7177      // Do not add new nodes to DAG combiner worklist.
7178      DCI.CombineTo(N, Res, false);
7179      return SDValue();
7180    }
7181  } else if (N1.getOpcode() == ISD::AND) {
7182    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
7183    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7184    if (!N11C)
7185      return SDValue();
7186    unsigned Mask2 = N11C->getZExtValue();
7187
7188    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
7189    // as is to match.
7190    if (ARM::isBitFieldInvertedMask(Mask) &&
7191        (Mask == ~Mask2)) {
7192      // The pack halfword instruction works better for masks that fit it,
7193      // so use that when it's available.
7194      if (Subtarget->hasT2ExtractPack() &&
7195          (Mask == 0xffff || Mask == 0xffff0000))
7196        return SDValue();
7197      // 2a
7198      unsigned amt = CountTrailingZeros_32(Mask2);
7199      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
7200                        DAG.getConstant(amt, MVT::i32));
7201      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
7202                        DAG.getConstant(Mask, MVT::i32));
7203      // Do not add new nodes to DAG combiner worklist.
7204      DCI.CombineTo(N, Res, false);
7205      return SDValue();
7206    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
7207               (~Mask == Mask2)) {
7208      // The pack halfword instruction works better for masks that fit it,
7209      // so use that when it's available.
7210      if (Subtarget->hasT2ExtractPack() &&
7211          (Mask2 == 0xffff || Mask2 == 0xffff0000))
7212        return SDValue();
7213      // 2b
7214      unsigned lsb = CountTrailingZeros_32(Mask);
7215      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
7216                        DAG.getConstant(lsb, MVT::i32));
7217      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
7218                        DAG.getConstant(Mask2, MVT::i32));
7219      // Do not add new nodes to DAG combiner worklist.
7220      DCI.CombineTo(N, Res, false);
7221      return SDValue();
7222    }
7223  }
7224
7225  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
7226      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
7227      ARM::isBitFieldInvertedMask(~Mask)) {
7228    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
7229    // where lsb(mask) == #shamt and masked bits of B are known zero.
7230    SDValue ShAmt = N00.getOperand(1);
7231    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7232    unsigned LSB = CountTrailingZeros_32(Mask);
7233    if (ShAmtC != LSB)
7234      return SDValue();
7235
7236    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
7237                      DAG.getConstant(~Mask, MVT::i32));
7238
7239    // Do not add new nodes to DAG combiner worklist.
7240    DCI.CombineTo(N, Res, false);
7241  }
7242
7243  return SDValue();
7244}
7245
7246static SDValue PerformXORCombine(SDNode *N,
7247                                 TargetLowering::DAGCombinerInfo &DCI,
7248                                 const ARMSubtarget *Subtarget) {
7249  EVT VT = N->getValueType(0);
7250  SelectionDAG &DAG = DCI.DAG;
7251
7252  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7253    return SDValue();
7254
7255  if (!Subtarget->isThumb1Only()) {
7256    // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
7257    SDValue CXOR = formConditionalOp(N, DAG, true);
7258    if (CXOR.getNode())
7259      return CXOR;
7260  }
7261
7262  return SDValue();
7263}
7264
7265/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
7266/// the bits being cleared by the AND are not demanded by the BFI.
7267static SDValue PerformBFICombine(SDNode *N,
7268                                 TargetLowering::DAGCombinerInfo &DCI) {
7269  SDValue N1 = N->getOperand(1);
7270  if (N1.getOpcode() == ISD::AND) {
7271    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7272    if (!N11C)
7273      return SDValue();
7274    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
7275    unsigned LSB = CountTrailingZeros_32(~InvMask);
7276    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
7277    unsigned Mask = (1 << Width)-1;
7278    unsigned Mask2 = N11C->getZExtValue();
7279    if ((Mask & (~Mask2)) == 0)
7280      return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
7281                             N->getOperand(0), N1.getOperand(0),
7282                             N->getOperand(2));
7283  }
7284  return SDValue();
7285}
7286
7287/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
7288/// ARMISD::VMOVRRD.
7289static SDValue PerformVMOVRRDCombine(SDNode *N,
7290                                     TargetLowering::DAGCombinerInfo &DCI) {
7291  // vmovrrd(vmovdrr x, y) -> x,y
7292  SDValue InDouble = N->getOperand(0);
7293  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
7294    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
7295
7296  // vmovrrd(load f64) -> (load i32), (load i32)
7297  SDNode *InNode = InDouble.getNode();
7298  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
7299      InNode->getValueType(0) == MVT::f64 &&
7300      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
7301      !cast<LoadSDNode>(InNode)->isVolatile()) {
7302    // TODO: Should this be done for non-FrameIndex operands?
7303    LoadSDNode *LD = cast<LoadSDNode>(InNode);
7304
7305    SelectionDAG &DAG = DCI.DAG;
7306    DebugLoc DL = LD->getDebugLoc();
7307    SDValue BasePtr = LD->getBasePtr();
7308    SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
7309                                 LD->getPointerInfo(), LD->isVolatile(),
7310                                 LD->isNonTemporal(), LD->isInvariant(),
7311                                 LD->getAlignment());
7312
7313    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
7314                                    DAG.getConstant(4, MVT::i32));
7315    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
7316                                 LD->getPointerInfo(), LD->isVolatile(),
7317                                 LD->isNonTemporal(), LD->isInvariant(),
7318                                 std::min(4U, LD->getAlignment() / 2));
7319
7320    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
7321    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
7322    DCI.RemoveFromWorklist(LD);
7323    DAG.DeleteNode(LD);
7324    return Result;
7325  }
7326
7327  return SDValue();
7328}
7329
7330/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
7331/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
7332static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
7333  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
7334  SDValue Op0 = N->getOperand(0);
7335  SDValue Op1 = N->getOperand(1);
7336  if (Op0.getOpcode() == ISD::BITCAST)
7337    Op0 = Op0.getOperand(0);
7338  if (Op1.getOpcode() == ISD::BITCAST)
7339    Op1 = Op1.getOperand(0);
7340  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
7341      Op0.getNode() == Op1.getNode() &&
7342      Op0.getResNo() == 0 && Op1.getResNo() == 1)
7343    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
7344                       N->getValueType(0), Op0.getOperand(0));
7345  return SDValue();
7346}
7347
7348/// PerformSTORECombine - Target-specific dag combine xforms for
7349/// ISD::STORE.
7350static SDValue PerformSTORECombine(SDNode *N,
7351                                   TargetLowering::DAGCombinerInfo &DCI) {
7352  StoreSDNode *St = cast<StoreSDNode>(N);
7353  if (St->isVolatile())
7354    return SDValue();
7355
7356  // Optimize trunc store (of multiple scalars) to shuffle and store.  First,
7357  // pack all of the elements in one place.  Next, store to memory in fewer
7358  // chunks.
7359  SDValue StVal = St->getValue();
7360  EVT VT = StVal.getValueType();
7361  if (St->isTruncatingStore() && VT.isVector()) {
7362    SelectionDAG &DAG = DCI.DAG;
7363    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7364    EVT StVT = St->getMemoryVT();
7365    unsigned NumElems = VT.getVectorNumElements();
7366    assert(StVT != VT && "Cannot truncate to the same type");
7367    unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
7368    unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
7369
7370    // From, To sizes and ElemCount must be pow of two
7371    if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
7372
7373    // We are going to use the original vector elt for storing.
7374    // Accumulated smaller vector elements must be a multiple of the store size.
7375    if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
7376
7377    unsigned SizeRatio  = FromEltSz / ToEltSz;
7378    assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
7379
7380    // Create a type on which we perform the shuffle.
7381    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
7382                                     NumElems*SizeRatio);
7383    assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
7384
7385    DebugLoc DL = St->getDebugLoc();
7386    SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
7387    SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
7388    for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
7389
7390    // Can't shuffle using an illegal type.
7391    if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
7392
7393    SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
7394                                DAG.getUNDEF(WideVec.getValueType()),
7395                                ShuffleVec.data());
7396    // At this point all of the data is stored at the bottom of the
7397    // register. We now need to save it to mem.
7398
7399    // Find the largest store unit
7400    MVT StoreType = MVT::i8;
7401    for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
7402         tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
7403      MVT Tp = (MVT::SimpleValueType)tp;
7404      if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
7405        StoreType = Tp;
7406    }
7407    // Didn't find a legal store type.
7408    if (!TLI.isTypeLegal(StoreType))
7409      return SDValue();
7410
7411    // Bitcast the original vector into a vector of store-size units
7412    EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
7413            StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
7414    assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
7415    SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
7416    SmallVector<SDValue, 8> Chains;
7417    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
7418                                        TLI.getPointerTy());
7419    SDValue BasePtr = St->getBasePtr();
7420
7421    // Perform one or more big stores into memory.
7422    unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
7423    for (unsigned I = 0; I < E; I++) {
7424      SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
7425                                   StoreType, ShuffWide,
7426                                   DAG.getIntPtrConstant(I));
7427      SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
7428                                St->getPointerInfo(), St->isVolatile(),
7429                                St->isNonTemporal(), St->getAlignment());
7430      BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7431                            Increment);
7432      Chains.push_back(Ch);
7433    }
7434    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
7435                       Chains.size());
7436  }
7437
7438  if (!ISD::isNormalStore(St))
7439    return SDValue();
7440
7441  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
7442  // ARM stores of arguments in the same cache line.
7443  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
7444      StVal.getNode()->hasOneUse()) {
7445    SelectionDAG  &DAG = DCI.DAG;
7446    DebugLoc DL = St->getDebugLoc();
7447    SDValue BasePtr = St->getBasePtr();
7448    SDValue NewST1 = DAG.getStore(St->getChain(), DL,
7449                                  StVal.getNode()->getOperand(0), BasePtr,
7450                                  St->getPointerInfo(), St->isVolatile(),
7451                                  St->isNonTemporal(), St->getAlignment());
7452
7453    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
7454                                    DAG.getConstant(4, MVT::i32));
7455    return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
7456                        OffsetPtr, St->getPointerInfo(), St->isVolatile(),
7457                        St->isNonTemporal(),
7458                        std::min(4U, St->getAlignment() / 2));
7459  }
7460
7461  if (StVal.getValueType() != MVT::i64 ||
7462      StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7463    return SDValue();
7464
7465  // Bitcast an i64 store extracted from a vector to f64.
7466  // Otherwise, the i64 value will be legalized to a pair of i32 values.
7467  SelectionDAG &DAG = DCI.DAG;
7468  DebugLoc dl = StVal.getDebugLoc();
7469  SDValue IntVec = StVal.getOperand(0);
7470  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
7471                                 IntVec.getValueType().getVectorNumElements());
7472  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
7473  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
7474                               Vec, StVal.getOperand(1));
7475  dl = N->getDebugLoc();
7476  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
7477  // Make the DAGCombiner fold the bitcasts.
7478  DCI.AddToWorklist(Vec.getNode());
7479  DCI.AddToWorklist(ExtElt.getNode());
7480  DCI.AddToWorklist(V.getNode());
7481  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
7482                      St->getPointerInfo(), St->isVolatile(),
7483                      St->isNonTemporal(), St->getAlignment(),
7484                      St->getTBAAInfo());
7485}
7486
7487/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
7488/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
7489/// i64 vector to have f64 elements, since the value can then be loaded
7490/// directly into a VFP register.
7491static bool hasNormalLoadOperand(SDNode *N) {
7492  unsigned NumElts = N->getValueType(0).getVectorNumElements();
7493  for (unsigned i = 0; i < NumElts; ++i) {
7494    SDNode *Elt = N->getOperand(i).getNode();
7495    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
7496      return true;
7497  }
7498  return false;
7499}
7500
7501/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
7502/// ISD::BUILD_VECTOR.
7503static SDValue PerformBUILD_VECTORCombine(SDNode *N,
7504                                          TargetLowering::DAGCombinerInfo &DCI){
7505  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
7506  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
7507  // into a pair of GPRs, which is fine when the value is used as a scalar,
7508  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
7509  SelectionDAG &DAG = DCI.DAG;
7510  if (N->getNumOperands() == 2) {
7511    SDValue RV = PerformVMOVDRRCombine(N, DAG);
7512    if (RV.getNode())
7513      return RV;
7514  }
7515
7516  // Load i64 elements as f64 values so that type legalization does not split
7517  // them up into i32 values.
7518  EVT VT = N->getValueType(0);
7519  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
7520    return SDValue();
7521  DebugLoc dl = N->getDebugLoc();
7522  SmallVector<SDValue, 8> Ops;
7523  unsigned NumElts = VT.getVectorNumElements();
7524  for (unsigned i = 0; i < NumElts; ++i) {
7525    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
7526    Ops.push_back(V);
7527    // Make the DAGCombiner fold the bitcast.
7528    DCI.AddToWorklist(V.getNode());
7529  }
7530  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
7531  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
7532  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
7533}
7534
7535/// PerformInsertEltCombine - Target-specific dag combine xforms for
7536/// ISD::INSERT_VECTOR_ELT.
7537static SDValue PerformInsertEltCombine(SDNode *N,
7538                                       TargetLowering::DAGCombinerInfo &DCI) {
7539  // Bitcast an i64 load inserted into a vector to f64.
7540  // Otherwise, the i64 value will be legalized to a pair of i32 values.
7541  EVT VT = N->getValueType(0);
7542  SDNode *Elt = N->getOperand(1).getNode();
7543  if (VT.getVectorElementType() != MVT::i64 ||
7544      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
7545    return SDValue();
7546
7547  SelectionDAG &DAG = DCI.DAG;
7548  DebugLoc dl = N->getDebugLoc();
7549  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
7550                                 VT.getVectorNumElements());
7551  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
7552  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
7553  // Make the DAGCombiner fold the bitcasts.
7554  DCI.AddToWorklist(Vec.getNode());
7555  DCI.AddToWorklist(V.getNode());
7556  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
7557                               Vec, V, N->getOperand(2));
7558  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
7559}
7560
7561/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
7562/// ISD::VECTOR_SHUFFLE.
7563static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
7564  // The LLVM shufflevector instruction does not require the shuffle mask
7565  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
7566  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
7567  // operands do not match the mask length, they are extended by concatenating
7568  // them with undef vectors.  That is probably the right thing for other
7569  // targets, but for NEON it is better to concatenate two double-register
7570  // size vector operands into a single quad-register size vector.  Do that
7571  // transformation here:
7572  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
7573  //   shuffle(concat(v1, v2), undef)
7574  SDValue Op0 = N->getOperand(0);
7575  SDValue Op1 = N->getOperand(1);
7576  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
7577      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
7578      Op0.getNumOperands() != 2 ||
7579      Op1.getNumOperands() != 2)
7580    return SDValue();
7581  SDValue Concat0Op1 = Op0.getOperand(1);
7582  SDValue Concat1Op1 = Op1.getOperand(1);
7583  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
7584      Concat1Op1.getOpcode() != ISD::UNDEF)
7585    return SDValue();
7586  // Skip the transformation if any of the types are illegal.
7587  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7588  EVT VT = N->getValueType(0);
7589  if (!TLI.isTypeLegal(VT) ||
7590      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
7591      !TLI.isTypeLegal(Concat1Op1.getValueType()))
7592    return SDValue();
7593
7594  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
7595                                  Op0.getOperand(0), Op1.getOperand(0));
7596  // Translate the shuffle mask.
7597  SmallVector<int, 16> NewMask;
7598  unsigned NumElts = VT.getVectorNumElements();
7599  unsigned HalfElts = NumElts/2;
7600  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7601  for (unsigned n = 0; n < NumElts; ++n) {
7602    int MaskElt = SVN->getMaskElt(n);
7603    int NewElt = -1;
7604    if (MaskElt < (int)HalfElts)
7605      NewElt = MaskElt;
7606    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
7607      NewElt = HalfElts + MaskElt - NumElts;
7608    NewMask.push_back(NewElt);
7609  }
7610  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
7611                              DAG.getUNDEF(VT), NewMask.data());
7612}
7613
7614/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
7615/// NEON load/store intrinsics to merge base address updates.
7616static SDValue CombineBaseUpdate(SDNode *N,
7617                                 TargetLowering::DAGCombinerInfo &DCI) {
7618  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
7619    return SDValue();
7620
7621  SelectionDAG &DAG = DCI.DAG;
7622  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
7623                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
7624  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
7625  SDValue Addr = N->getOperand(AddrOpIdx);
7626
7627  // Search for a use of the address operand that is an increment.
7628  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
7629         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
7630    SDNode *User = *UI;
7631    if (User->getOpcode() != ISD::ADD ||
7632        UI.getUse().getResNo() != Addr.getResNo())
7633      continue;
7634
7635    // Check that the add is independent of the load/store.  Otherwise, folding
7636    // it would create a cycle.
7637    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
7638      continue;
7639
7640    // Find the new opcode for the updating load/store.
7641    bool isLoad = true;
7642    bool isLaneOp = false;
7643    unsigned NewOpc = 0;
7644    unsigned NumVecs = 0;
7645    if (isIntrinsic) {
7646      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7647      switch (IntNo) {
7648      default: llvm_unreachable("unexpected intrinsic for Neon base update");
7649      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
7650        NumVecs = 1; break;
7651      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
7652        NumVecs = 2; break;
7653      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
7654        NumVecs = 3; break;
7655      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
7656        NumVecs = 4; break;
7657      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
7658        NumVecs = 2; isLaneOp = true; break;
7659      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
7660        NumVecs = 3; isLaneOp = true; break;
7661      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
7662        NumVecs = 4; isLaneOp = true; break;
7663      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
7664        NumVecs = 1; isLoad = false; break;
7665      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
7666        NumVecs = 2; isLoad = false; break;
7667      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
7668        NumVecs = 3; isLoad = false; break;
7669      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
7670        NumVecs = 4; isLoad = false; break;
7671      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
7672        NumVecs = 2; isLoad = false; isLaneOp = true; break;
7673      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
7674        NumVecs = 3; isLoad = false; isLaneOp = true; break;
7675      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
7676        NumVecs = 4; isLoad = false; isLaneOp = true; break;
7677      }
7678    } else {
7679      isLaneOp = true;
7680      switch (N->getOpcode()) {
7681      default: llvm_unreachable("unexpected opcode for Neon base update");
7682      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
7683      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
7684      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
7685      }
7686    }
7687
7688    // Find the size of memory referenced by the load/store.
7689    EVT VecTy;
7690    if (isLoad)
7691      VecTy = N->getValueType(0);
7692    else
7693      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
7694    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
7695    if (isLaneOp)
7696      NumBytes /= VecTy.getVectorNumElements();
7697
7698    // If the increment is a constant, it must match the memory ref size.
7699    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
7700    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
7701      uint64_t IncVal = CInc->getZExtValue();
7702      if (IncVal != NumBytes)
7703        continue;
7704    } else if (NumBytes >= 3 * 16) {
7705      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
7706      // separate instructions that make it harder to use a non-constant update.
7707      continue;
7708    }
7709
7710    // Create the new updating load/store node.
7711    EVT Tys[6];
7712    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
7713    unsigned n;
7714    for (n = 0; n < NumResultVecs; ++n)
7715      Tys[n] = VecTy;
7716    Tys[n++] = MVT::i32;
7717    Tys[n] = MVT::Other;
7718    SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
7719    SmallVector<SDValue, 8> Ops;
7720    Ops.push_back(N->getOperand(0)); // incoming chain
7721    Ops.push_back(N->getOperand(AddrOpIdx));
7722    Ops.push_back(Inc);
7723    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
7724      Ops.push_back(N->getOperand(i));
7725    }
7726    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
7727    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
7728                                           Ops.data(), Ops.size(),
7729                                           MemInt->getMemoryVT(),
7730                                           MemInt->getMemOperand());
7731
7732    // Update the uses.
7733    std::vector<SDValue> NewResults;
7734    for (unsigned i = 0; i < NumResultVecs; ++i) {
7735      NewResults.push_back(SDValue(UpdN.getNode(), i));
7736    }
7737    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
7738    DCI.CombineTo(N, NewResults);
7739    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
7740
7741    break;
7742  }
7743  return SDValue();
7744}
7745
7746/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
7747/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
7748/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
7749/// return true.
7750static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
7751  SelectionDAG &DAG = DCI.DAG;
7752  EVT VT = N->getValueType(0);
7753  // vldN-dup instructions only support 64-bit vectors for N > 1.
7754  if (!VT.is64BitVector())
7755    return false;
7756
7757  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
7758  SDNode *VLD = N->getOperand(0).getNode();
7759  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
7760    return false;
7761  unsigned NumVecs = 0;
7762  unsigned NewOpc = 0;
7763  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
7764  if (IntNo == Intrinsic::arm_neon_vld2lane) {
7765    NumVecs = 2;
7766    NewOpc = ARMISD::VLD2DUP;
7767  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
7768    NumVecs = 3;
7769    NewOpc = ARMISD::VLD3DUP;
7770  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
7771    NumVecs = 4;
7772    NewOpc = ARMISD::VLD4DUP;
7773  } else {
7774    return false;
7775  }
7776
7777  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
7778  // numbers match the load.
7779  unsigned VLDLaneNo =
7780    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
7781  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
7782       UI != UE; ++UI) {
7783    // Ignore uses of the chain result.
7784    if (UI.getUse().getResNo() == NumVecs)
7785      continue;
7786    SDNode *User = *UI;
7787    if (User->getOpcode() != ARMISD::VDUPLANE ||
7788        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
7789      return false;
7790  }
7791
7792  // Create the vldN-dup node.
7793  EVT Tys[5];
7794  unsigned n;
7795  for (n = 0; n < NumVecs; ++n)
7796    Tys[n] = VT;
7797  Tys[n] = MVT::Other;
7798  SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
7799  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
7800  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
7801  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
7802                                           Ops, 2, VLDMemInt->getMemoryVT(),
7803                                           VLDMemInt->getMemOperand());
7804
7805  // Update the uses.
7806  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
7807       UI != UE; ++UI) {
7808    unsigned ResNo = UI.getUse().getResNo();
7809    // Ignore uses of the chain result.
7810    if (ResNo == NumVecs)
7811      continue;
7812    SDNode *User = *UI;
7813    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
7814  }
7815
7816  // Now the vldN-lane intrinsic is dead except for its chain result.
7817  // Update uses of the chain.
7818  std::vector<SDValue> VLDDupResults;
7819  for (unsigned n = 0; n < NumVecs; ++n)
7820    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
7821  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
7822  DCI.CombineTo(VLD, VLDDupResults);
7823
7824  return true;
7825}
7826
7827/// PerformVDUPLANECombine - Target-specific dag combine xforms for
7828/// ARMISD::VDUPLANE.
7829static SDValue PerformVDUPLANECombine(SDNode *N,
7830                                      TargetLowering::DAGCombinerInfo &DCI) {
7831  SDValue Op = N->getOperand(0);
7832
7833  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
7834  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
7835  if (CombineVLDDUP(N, DCI))
7836    return SDValue(N, 0);
7837
7838  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
7839  // redundant.  Ignore bit_converts for now; element sizes are checked below.
7840  while (Op.getOpcode() == ISD::BITCAST)
7841    Op = Op.getOperand(0);
7842  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
7843    return SDValue();
7844
7845  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
7846  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
7847  // The canonical VMOV for a zero vector uses a 32-bit element size.
7848  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7849  unsigned EltBits;
7850  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
7851    EltSize = 8;
7852  EVT VT = N->getValueType(0);
7853  if (EltSize > VT.getVectorElementType().getSizeInBits())
7854    return SDValue();
7855
7856  return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
7857}
7858
7859// isConstVecPow2 - Return true if each vector element is a power of 2, all
7860// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
7861static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
7862{
7863  integerPart cN;
7864  integerPart c0 = 0;
7865  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
7866       I != E; I++) {
7867    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
7868    if (!C)
7869      return false;
7870
7871    bool isExact;
7872    APFloat APF = C->getValueAPF();
7873    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
7874        != APFloat::opOK || !isExact)
7875      return false;
7876
7877    c0 = (I == 0) ? cN : c0;
7878    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
7879      return false;
7880  }
7881  C = c0;
7882  return true;
7883}
7884
7885/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
7886/// can replace combinations of VMUL and VCVT (floating-point to integer)
7887/// when the VMUL has a constant operand that is a power of 2.
7888///
7889/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
7890///  vmul.f32        d16, d17, d16
7891///  vcvt.s32.f32    d16, d16
7892/// becomes:
7893///  vcvt.s32.f32    d16, d16, #3
7894static SDValue PerformVCVTCombine(SDNode *N,
7895                                  TargetLowering::DAGCombinerInfo &DCI,
7896                                  const ARMSubtarget *Subtarget) {
7897  SelectionDAG &DAG = DCI.DAG;
7898  SDValue Op = N->getOperand(0);
7899
7900  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
7901      Op.getOpcode() != ISD::FMUL)
7902    return SDValue();
7903
7904  uint64_t C;
7905  SDValue N0 = Op->getOperand(0);
7906  SDValue ConstVec = Op->getOperand(1);
7907  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
7908
7909  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
7910      !isConstVecPow2(ConstVec, isSigned, C))
7911    return SDValue();
7912
7913  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
7914    Intrinsic::arm_neon_vcvtfp2fxu;
7915  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
7916                     N->getValueType(0),
7917                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
7918                     DAG.getConstant(Log2_64(C), MVT::i32));
7919}
7920
7921/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
7922/// can replace combinations of VCVT (integer to floating-point) and VDIV
7923/// when the VDIV has a constant operand that is a power of 2.
7924///
7925/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
7926///  vcvt.f32.s32    d16, d16
7927///  vdiv.f32        d16, d17, d16
7928/// becomes:
7929///  vcvt.f32.s32    d16, d16, #3
7930static SDValue PerformVDIVCombine(SDNode *N,
7931                                  TargetLowering::DAGCombinerInfo &DCI,
7932                                  const ARMSubtarget *Subtarget) {
7933  SelectionDAG &DAG = DCI.DAG;
7934  SDValue Op = N->getOperand(0);
7935  unsigned OpOpcode = Op.getNode()->getOpcode();
7936
7937  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
7938      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
7939    return SDValue();
7940
7941  uint64_t C;
7942  SDValue ConstVec = N->getOperand(1);
7943  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
7944
7945  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
7946      !isConstVecPow2(ConstVec, isSigned, C))
7947    return SDValue();
7948
7949  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
7950    Intrinsic::arm_neon_vcvtfxu2fp;
7951  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
7952                     Op.getValueType(),
7953                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
7954                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
7955}
7956
7957/// Getvshiftimm - Check if this is a valid build_vector for the immediate
7958/// operand of a vector shift operation, where all the elements of the
7959/// build_vector must have the same constant integer value.
7960static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
7961  // Ignore bit_converts.
7962  while (Op.getOpcode() == ISD::BITCAST)
7963    Op = Op.getOperand(0);
7964  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7965  APInt SplatBits, SplatUndef;
7966  unsigned SplatBitSize;
7967  bool HasAnyUndefs;
7968  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
7969                                      HasAnyUndefs, ElementBits) ||
7970      SplatBitSize > ElementBits)
7971    return false;
7972  Cnt = SplatBits.getSExtValue();
7973  return true;
7974}
7975
7976/// isVShiftLImm - Check if this is a valid build_vector for the immediate
7977/// operand of a vector shift left operation.  That value must be in the range:
7978///   0 <= Value < ElementBits for a left shift; or
7979///   0 <= Value <= ElementBits for a long left shift.
7980static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
7981  assert(VT.isVector() && "vector shift count is not a vector type");
7982  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
7983  if (! getVShiftImm(Op, ElementBits, Cnt))
7984    return false;
7985  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
7986}
7987
7988/// isVShiftRImm - Check if this is a valid build_vector for the immediate
7989/// operand of a vector shift right operation.  For a shift opcode, the value
7990/// is positive, but for an intrinsic the value count must be negative. The
7991/// absolute value must be in the range:
7992///   1 <= |Value| <= ElementBits for a right shift; or
7993///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
7994static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
7995                         int64_t &Cnt) {
7996  assert(VT.isVector() && "vector shift count is not a vector type");
7997  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
7998  if (! getVShiftImm(Op, ElementBits, Cnt))
7999    return false;
8000  if (isIntrinsic)
8001    Cnt = -Cnt;
8002  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
8003}
8004
8005/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
8006static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
8007  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
8008  switch (IntNo) {
8009  default:
8010    // Don't do anything for most intrinsics.
8011    break;
8012
8013  // Vector shifts: check for immediate versions and lower them.
8014  // Note: This is done during DAG combining instead of DAG legalizing because
8015  // the build_vectors for 64-bit vector element shift counts are generally
8016  // not legal, and it is hard to see their values after they get legalized to
8017  // loads from a constant pool.
8018  case Intrinsic::arm_neon_vshifts:
8019  case Intrinsic::arm_neon_vshiftu:
8020  case Intrinsic::arm_neon_vshiftls:
8021  case Intrinsic::arm_neon_vshiftlu:
8022  case Intrinsic::arm_neon_vshiftn:
8023  case Intrinsic::arm_neon_vrshifts:
8024  case Intrinsic::arm_neon_vrshiftu:
8025  case Intrinsic::arm_neon_vrshiftn:
8026  case Intrinsic::arm_neon_vqshifts:
8027  case Intrinsic::arm_neon_vqshiftu:
8028  case Intrinsic::arm_neon_vqshiftsu:
8029  case Intrinsic::arm_neon_vqshiftns:
8030  case Intrinsic::arm_neon_vqshiftnu:
8031  case Intrinsic::arm_neon_vqshiftnsu:
8032  case Intrinsic::arm_neon_vqrshiftns:
8033  case Intrinsic::arm_neon_vqrshiftnu:
8034  case Intrinsic::arm_neon_vqrshiftnsu: {
8035    EVT VT = N->getOperand(1).getValueType();
8036    int64_t Cnt;
8037    unsigned VShiftOpc = 0;
8038
8039    switch (IntNo) {
8040    case Intrinsic::arm_neon_vshifts:
8041    case Intrinsic::arm_neon_vshiftu:
8042      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
8043        VShiftOpc = ARMISD::VSHL;
8044        break;
8045      }
8046      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
8047        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
8048                     ARMISD::VSHRs : ARMISD::VSHRu);
8049        break;
8050      }
8051      return SDValue();
8052
8053    case Intrinsic::arm_neon_vshiftls:
8054    case Intrinsic::arm_neon_vshiftlu:
8055      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
8056        break;
8057      llvm_unreachable("invalid shift count for vshll intrinsic");
8058
8059    case Intrinsic::arm_neon_vrshifts:
8060    case Intrinsic::arm_neon_vrshiftu:
8061      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
8062        break;
8063      return SDValue();
8064
8065    case Intrinsic::arm_neon_vqshifts:
8066    case Intrinsic::arm_neon_vqshiftu:
8067      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
8068        break;
8069      return SDValue();
8070
8071    case Intrinsic::arm_neon_vqshiftsu:
8072      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
8073        break;
8074      llvm_unreachable("invalid shift count for vqshlu intrinsic");
8075
8076    case Intrinsic::arm_neon_vshiftn:
8077    case Intrinsic::arm_neon_vrshiftn:
8078    case Intrinsic::arm_neon_vqshiftns:
8079    case Intrinsic::arm_neon_vqshiftnu:
8080    case Intrinsic::arm_neon_vqshiftnsu:
8081    case Intrinsic::arm_neon_vqrshiftns:
8082    case Intrinsic::arm_neon_vqrshiftnu:
8083    case Intrinsic::arm_neon_vqrshiftnsu:
8084      // Narrowing shifts require an immediate right shift.
8085      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
8086        break;
8087      llvm_unreachable("invalid shift count for narrowing vector shift "
8088                       "intrinsic");
8089
8090    default:
8091      llvm_unreachable("unhandled vector shift");
8092    }
8093
8094    switch (IntNo) {
8095    case Intrinsic::arm_neon_vshifts:
8096    case Intrinsic::arm_neon_vshiftu:
8097      // Opcode already set above.
8098      break;
8099    case Intrinsic::arm_neon_vshiftls:
8100    case Intrinsic::arm_neon_vshiftlu:
8101      if (Cnt == VT.getVectorElementType().getSizeInBits())
8102        VShiftOpc = ARMISD::VSHLLi;
8103      else
8104        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
8105                     ARMISD::VSHLLs : ARMISD::VSHLLu);
8106      break;
8107    case Intrinsic::arm_neon_vshiftn:
8108      VShiftOpc = ARMISD::VSHRN; break;
8109    case Intrinsic::arm_neon_vrshifts:
8110      VShiftOpc = ARMISD::VRSHRs; break;
8111    case Intrinsic::arm_neon_vrshiftu:
8112      VShiftOpc = ARMISD::VRSHRu; break;
8113    case Intrinsic::arm_neon_vrshiftn:
8114      VShiftOpc = ARMISD::VRSHRN; break;
8115    case Intrinsic::arm_neon_vqshifts:
8116      VShiftOpc = ARMISD::VQSHLs; break;
8117    case Intrinsic::arm_neon_vqshiftu:
8118      VShiftOpc = ARMISD::VQSHLu; break;
8119    case Intrinsic::arm_neon_vqshiftsu:
8120      VShiftOpc = ARMISD::VQSHLsu; break;
8121    case Intrinsic::arm_neon_vqshiftns:
8122      VShiftOpc = ARMISD::VQSHRNs; break;
8123    case Intrinsic::arm_neon_vqshiftnu:
8124      VShiftOpc = ARMISD::VQSHRNu; break;
8125    case Intrinsic::arm_neon_vqshiftnsu:
8126      VShiftOpc = ARMISD::VQSHRNsu; break;
8127    case Intrinsic::arm_neon_vqrshiftns:
8128      VShiftOpc = ARMISD::VQRSHRNs; break;
8129    case Intrinsic::arm_neon_vqrshiftnu:
8130      VShiftOpc = ARMISD::VQRSHRNu; break;
8131    case Intrinsic::arm_neon_vqrshiftnsu:
8132      VShiftOpc = ARMISD::VQRSHRNsu; break;
8133    }
8134
8135    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
8136                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
8137  }
8138
8139  case Intrinsic::arm_neon_vshiftins: {
8140    EVT VT = N->getOperand(1).getValueType();
8141    int64_t Cnt;
8142    unsigned VShiftOpc = 0;
8143
8144    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
8145      VShiftOpc = ARMISD::VSLI;
8146    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
8147      VShiftOpc = ARMISD::VSRI;
8148    else {
8149      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
8150    }
8151
8152    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
8153                       N->getOperand(1), N->getOperand(2),
8154                       DAG.getConstant(Cnt, MVT::i32));
8155  }
8156
8157  case Intrinsic::arm_neon_vqrshifts:
8158  case Intrinsic::arm_neon_vqrshiftu:
8159    // No immediate versions of these to check for.
8160    break;
8161  }
8162
8163  return SDValue();
8164}
8165
8166/// PerformShiftCombine - Checks for immediate versions of vector shifts and
8167/// lowers them.  As with the vector shift intrinsics, this is done during DAG
8168/// combining instead of DAG legalizing because the build_vectors for 64-bit
8169/// vector element shift counts are generally not legal, and it is hard to see
8170/// their values after they get legalized to loads from a constant pool.
8171static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
8172                                   const ARMSubtarget *ST) {
8173  EVT VT = N->getValueType(0);
8174  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
8175    // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
8176    // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
8177    SDValue N1 = N->getOperand(1);
8178    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
8179      SDValue N0 = N->getOperand(0);
8180      if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
8181          DAG.MaskedValueIsZero(N0.getOperand(0),
8182                                APInt::getHighBitsSet(32, 16)))
8183        return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
8184    }
8185  }
8186
8187  // Nothing to be done for scalar shifts.
8188  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8189  if (!VT.isVector() || !TLI.isTypeLegal(VT))
8190    return SDValue();
8191
8192  assert(ST->hasNEON() && "unexpected vector shift");
8193  int64_t Cnt;
8194
8195  switch (N->getOpcode()) {
8196  default: llvm_unreachable("unexpected shift opcode");
8197
8198  case ISD::SHL:
8199    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
8200      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
8201                         DAG.getConstant(Cnt, MVT::i32));
8202    break;
8203
8204  case ISD::SRA:
8205  case ISD::SRL:
8206    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
8207      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
8208                            ARMISD::VSHRs : ARMISD::VSHRu);
8209      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
8210                         DAG.getConstant(Cnt, MVT::i32));
8211    }
8212  }
8213  return SDValue();
8214}
8215
8216/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
8217/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
8218static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
8219                                    const ARMSubtarget *ST) {
8220  SDValue N0 = N->getOperand(0);
8221
8222  // Check for sign- and zero-extensions of vector extract operations of 8-
8223  // and 16-bit vector elements.  NEON supports these directly.  They are
8224  // handled during DAG combining because type legalization will promote them
8225  // to 32-bit types and it is messy to recognize the operations after that.
8226  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
8227    SDValue Vec = N0.getOperand(0);
8228    SDValue Lane = N0.getOperand(1);
8229    EVT VT = N->getValueType(0);
8230    EVT EltVT = N0.getValueType();
8231    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8232
8233    if (VT == MVT::i32 &&
8234        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
8235        TLI.isTypeLegal(Vec.getValueType()) &&
8236        isa<ConstantSDNode>(Lane)) {
8237
8238      unsigned Opc = 0;
8239      switch (N->getOpcode()) {
8240      default: llvm_unreachable("unexpected opcode");
8241      case ISD::SIGN_EXTEND:
8242        Opc = ARMISD::VGETLANEs;
8243        break;
8244      case ISD::ZERO_EXTEND:
8245      case ISD::ANY_EXTEND:
8246        Opc = ARMISD::VGETLANEu;
8247        break;
8248      }
8249      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
8250    }
8251  }
8252
8253  return SDValue();
8254}
8255
8256/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
8257/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
8258static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
8259                                       const ARMSubtarget *ST) {
8260  // If the target supports NEON, try to use vmax/vmin instructions for f32
8261  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
8262  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
8263  // a NaN; only do the transformation when it matches that behavior.
8264
8265  // For now only do this when using NEON for FP operations; if using VFP, it
8266  // is not obvious that the benefit outweighs the cost of switching to the
8267  // NEON pipeline.
8268  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
8269      N->getValueType(0) != MVT::f32)
8270    return SDValue();
8271
8272  SDValue CondLHS = N->getOperand(0);
8273  SDValue CondRHS = N->getOperand(1);
8274  SDValue LHS = N->getOperand(2);
8275  SDValue RHS = N->getOperand(3);
8276  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
8277
8278  unsigned Opcode = 0;
8279  bool IsReversed;
8280  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
8281    IsReversed = false; // x CC y ? x : y
8282  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
8283    IsReversed = true ; // x CC y ? y : x
8284  } else {
8285    return SDValue();
8286  }
8287
8288  bool IsUnordered;
8289  switch (CC) {
8290  default: break;
8291  case ISD::SETOLT:
8292  case ISD::SETOLE:
8293  case ISD::SETLT:
8294  case ISD::SETLE:
8295  case ISD::SETULT:
8296  case ISD::SETULE:
8297    // If LHS is NaN, an ordered comparison will be false and the result will
8298    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
8299    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
8300    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
8301    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
8302      break;
8303    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
8304    // will return -0, so vmin can only be used for unsafe math or if one of
8305    // the operands is known to be nonzero.
8306    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
8307        !DAG.getTarget().Options.UnsafeFPMath &&
8308        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
8309      break;
8310    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
8311    break;
8312
8313  case ISD::SETOGT:
8314  case ISD::SETOGE:
8315  case ISD::SETGT:
8316  case ISD::SETGE:
8317  case ISD::SETUGT:
8318  case ISD::SETUGE:
8319    // If LHS is NaN, an ordered comparison will be false and the result will
8320    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
8321    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
8322    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
8323    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
8324      break;
8325    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
8326    // will return +0, so vmax can only be used for unsafe math or if one of
8327    // the operands is known to be nonzero.
8328    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
8329        !DAG.getTarget().Options.UnsafeFPMath &&
8330        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
8331      break;
8332    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
8333    break;
8334  }
8335
8336  if (!Opcode)
8337    return SDValue();
8338  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
8339}
8340
8341/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
8342SDValue
8343ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
8344  SDValue Cmp = N->getOperand(4);
8345  if (Cmp.getOpcode() != ARMISD::CMPZ)
8346    // Only looking at EQ and NE cases.
8347    return SDValue();
8348
8349  EVT VT = N->getValueType(0);
8350  DebugLoc dl = N->getDebugLoc();
8351  SDValue LHS = Cmp.getOperand(0);
8352  SDValue RHS = Cmp.getOperand(1);
8353  SDValue FalseVal = N->getOperand(0);
8354  SDValue TrueVal = N->getOperand(1);
8355  SDValue ARMcc = N->getOperand(2);
8356  ARMCC::CondCodes CC =
8357    (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
8358
8359  // Simplify
8360  //   mov     r1, r0
8361  //   cmp     r1, x
8362  //   mov     r0, y
8363  //   moveq   r0, x
8364  // to
8365  //   cmp     r0, x
8366  //   movne   r0, y
8367  //
8368  //   mov     r1, r0
8369  //   cmp     r1, x
8370  //   mov     r0, x
8371  //   movne   r0, y
8372  // to
8373  //   cmp     r0, x
8374  //   movne   r0, y
8375  /// FIXME: Turn this into a target neutral optimization?
8376  SDValue Res;
8377  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
8378    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
8379                      N->getOperand(3), Cmp);
8380  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
8381    SDValue ARMcc;
8382    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
8383    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
8384                      N->getOperand(3), NewCmp);
8385  }
8386
8387  if (Res.getNode()) {
8388    APInt KnownZero, KnownOne;
8389    DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
8390    // Capture demanded bits information that would be otherwise lost.
8391    if (KnownZero == 0xfffffffe)
8392      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8393                        DAG.getValueType(MVT::i1));
8394    else if (KnownZero == 0xffffff00)
8395      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8396                        DAG.getValueType(MVT::i8));
8397    else if (KnownZero == 0xffff0000)
8398      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8399                        DAG.getValueType(MVT::i16));
8400  }
8401
8402  return Res;
8403}
8404
8405SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
8406                                             DAGCombinerInfo &DCI) const {
8407  switch (N->getOpcode()) {
8408  default: break;
8409  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
8410  case ISD::SUB:        return PerformSUBCombine(N, DCI);
8411  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
8412  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
8413  case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
8414  case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
8415  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
8416  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
8417  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
8418  case ISD::STORE:      return PerformSTORECombine(N, DCI);
8419  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
8420  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
8421  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
8422  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
8423  case ISD::FP_TO_SINT:
8424  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
8425  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
8426  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
8427  case ISD::SHL:
8428  case ISD::SRA:
8429  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
8430  case ISD::SIGN_EXTEND:
8431  case ISD::ZERO_EXTEND:
8432  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
8433  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
8434  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
8435  case ARMISD::VLD2DUP:
8436  case ARMISD::VLD3DUP:
8437  case ARMISD::VLD4DUP:
8438    return CombineBaseUpdate(N, DCI);
8439  case ISD::INTRINSIC_VOID:
8440  case ISD::INTRINSIC_W_CHAIN:
8441    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
8442    case Intrinsic::arm_neon_vld1:
8443    case Intrinsic::arm_neon_vld2:
8444    case Intrinsic::arm_neon_vld3:
8445    case Intrinsic::arm_neon_vld4:
8446    case Intrinsic::arm_neon_vld2lane:
8447    case Intrinsic::arm_neon_vld3lane:
8448    case Intrinsic::arm_neon_vld4lane:
8449    case Intrinsic::arm_neon_vst1:
8450    case Intrinsic::arm_neon_vst2:
8451    case Intrinsic::arm_neon_vst3:
8452    case Intrinsic::arm_neon_vst4:
8453    case Intrinsic::arm_neon_vst2lane:
8454    case Intrinsic::arm_neon_vst3lane:
8455    case Intrinsic::arm_neon_vst4lane:
8456      return CombineBaseUpdate(N, DCI);
8457    default: break;
8458    }
8459    break;
8460  }
8461  return SDValue();
8462}
8463
8464bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
8465                                                          EVT VT) const {
8466  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
8467}
8468
8469bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
8470  if (!Subtarget->allowsUnalignedMem())
8471    return false;
8472
8473  switch (VT.getSimpleVT().SimpleTy) {
8474  default:
8475    return false;
8476  case MVT::i8:
8477  case MVT::i16:
8478  case MVT::i32:
8479    return true;
8480  // FIXME: VLD1 etc with standard alignment is legal.
8481  }
8482}
8483
8484static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
8485                       unsigned AlignCheck) {
8486  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
8487          (DstAlign == 0 || DstAlign % AlignCheck == 0));
8488}
8489
8490EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
8491                                           unsigned DstAlign, unsigned SrcAlign,
8492                                           bool IsZeroVal,
8493                                           bool MemcpyStrSrc,
8494                                           MachineFunction &MF) const {
8495  const Function *F = MF.getFunction();
8496
8497  // See if we can use NEON instructions for this...
8498  if (IsZeroVal &&
8499      !F->hasFnAttr(Attribute::NoImplicitFloat) &&
8500      Subtarget->hasNEON()) {
8501    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
8502      return MVT::v4i32;
8503    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
8504      return MVT::v2i32;
8505    }
8506  }
8507
8508  // Lowering to i32/i16 if the size permits.
8509  if (Size >= 4) {
8510    return MVT::i32;
8511  } else if (Size >= 2) {
8512    return MVT::i16;
8513  }
8514
8515  // Let the target-independent logic figure it out.
8516  return MVT::Other;
8517}
8518
8519static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
8520  if (V < 0)
8521    return false;
8522
8523  unsigned Scale = 1;
8524  switch (VT.getSimpleVT().SimpleTy) {
8525  default: return false;
8526  case MVT::i1:
8527  case MVT::i8:
8528    // Scale == 1;
8529    break;
8530  case MVT::i16:
8531    // Scale == 2;
8532    Scale = 2;
8533    break;
8534  case MVT::i32:
8535    // Scale == 4;
8536    Scale = 4;
8537    break;
8538  }
8539
8540  if ((V & (Scale - 1)) != 0)
8541    return false;
8542  V /= Scale;
8543  return V == (V & ((1LL << 5) - 1));
8544}
8545
8546static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
8547                                      const ARMSubtarget *Subtarget) {
8548  bool isNeg = false;
8549  if (V < 0) {
8550    isNeg = true;
8551    V = - V;
8552  }
8553
8554  switch (VT.getSimpleVT().SimpleTy) {
8555  default: return false;
8556  case MVT::i1:
8557  case MVT::i8:
8558  case MVT::i16:
8559  case MVT::i32:
8560    // + imm12 or - imm8
8561    if (isNeg)
8562      return V == (V & ((1LL << 8) - 1));
8563    return V == (V & ((1LL << 12) - 1));
8564  case MVT::f32:
8565  case MVT::f64:
8566    // Same as ARM mode. FIXME: NEON?
8567    if (!Subtarget->hasVFP2())
8568      return false;
8569    if ((V & 3) != 0)
8570      return false;
8571    V >>= 2;
8572    return V == (V & ((1LL << 8) - 1));
8573  }
8574}
8575
8576/// isLegalAddressImmediate - Return true if the integer value can be used
8577/// as the offset of the target addressing mode for load / store of the
8578/// given type.
8579static bool isLegalAddressImmediate(int64_t V, EVT VT,
8580                                    const ARMSubtarget *Subtarget) {
8581  if (V == 0)
8582    return true;
8583
8584  if (!VT.isSimple())
8585    return false;
8586
8587  if (Subtarget->isThumb1Only())
8588    return isLegalT1AddressImmediate(V, VT);
8589  else if (Subtarget->isThumb2())
8590    return isLegalT2AddressImmediate(V, VT, Subtarget);
8591
8592  // ARM mode.
8593  if (V < 0)
8594    V = - V;
8595  switch (VT.getSimpleVT().SimpleTy) {
8596  default: return false;
8597  case MVT::i1:
8598  case MVT::i8:
8599  case MVT::i32:
8600    // +- imm12
8601    return V == (V & ((1LL << 12) - 1));
8602  case MVT::i16:
8603    // +- imm8
8604    return V == (V & ((1LL << 8) - 1));
8605  case MVT::f32:
8606  case MVT::f64:
8607    if (!Subtarget->hasVFP2()) // FIXME: NEON?
8608      return false;
8609    if ((V & 3) != 0)
8610      return false;
8611    V >>= 2;
8612    return V == (V & ((1LL << 8) - 1));
8613  }
8614}
8615
8616bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
8617                                                      EVT VT) const {
8618  int Scale = AM.Scale;
8619  if (Scale < 0)
8620    return false;
8621
8622  switch (VT.getSimpleVT().SimpleTy) {
8623  default: return false;
8624  case MVT::i1:
8625  case MVT::i8:
8626  case MVT::i16:
8627  case MVT::i32:
8628    if (Scale == 1)
8629      return true;
8630    // r + r << imm
8631    Scale = Scale & ~1;
8632    return Scale == 2 || Scale == 4 || Scale == 8;
8633  case MVT::i64:
8634    // r + r
8635    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
8636      return true;
8637    return false;
8638  case MVT::isVoid:
8639    // Note, we allow "void" uses (basically, uses that aren't loads or
8640    // stores), because arm allows folding a scale into many arithmetic
8641    // operations.  This should be made more precise and revisited later.
8642
8643    // Allow r << imm, but the imm has to be a multiple of two.
8644    if (Scale & 1) return false;
8645    return isPowerOf2_32(Scale);
8646  }
8647}
8648
8649/// isLegalAddressingMode - Return true if the addressing mode represented
8650/// by AM is legal for this target, for a load/store of the specified type.
8651bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
8652                                              Type *Ty) const {
8653  EVT VT = getValueType(Ty, true);
8654  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
8655    return false;
8656
8657  // Can never fold addr of global into load/store.
8658  if (AM.BaseGV)
8659    return false;
8660
8661  switch (AM.Scale) {
8662  case 0:  // no scale reg, must be "r+i" or "r", or "i".
8663    break;
8664  case 1:
8665    if (Subtarget->isThumb1Only())
8666      return false;
8667    // FALL THROUGH.
8668  default:
8669    // ARM doesn't support any R+R*scale+imm addr modes.
8670    if (AM.BaseOffs)
8671      return false;
8672
8673    if (!VT.isSimple())
8674      return false;
8675
8676    if (Subtarget->isThumb2())
8677      return isLegalT2ScaledAddressingMode(AM, VT);
8678
8679    int Scale = AM.Scale;
8680    switch (VT.getSimpleVT().SimpleTy) {
8681    default: return false;
8682    case MVT::i1:
8683    case MVT::i8:
8684    case MVT::i32:
8685      if (Scale < 0) Scale = -Scale;
8686      if (Scale == 1)
8687        return true;
8688      // r + r << imm
8689      return isPowerOf2_32(Scale & ~1);
8690    case MVT::i16:
8691    case MVT::i64:
8692      // r + r
8693      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
8694        return true;
8695      return false;
8696
8697    case MVT::isVoid:
8698      // Note, we allow "void" uses (basically, uses that aren't loads or
8699      // stores), because arm allows folding a scale into many arithmetic
8700      // operations.  This should be made more precise and revisited later.
8701
8702      // Allow r << imm, but the imm has to be a multiple of two.
8703      if (Scale & 1) return false;
8704      return isPowerOf2_32(Scale);
8705    }
8706  }
8707  return true;
8708}
8709
8710/// isLegalICmpImmediate - Return true if the specified immediate is legal
8711/// icmp immediate, that is the target has icmp instructions which can compare
8712/// a register against the immediate without having to materialize the
8713/// immediate into a register.
8714bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8715  // Thumb2 and ARM modes can use cmn for negative immediates.
8716  if (!Subtarget->isThumb())
8717    return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
8718  if (Subtarget->isThumb2())
8719    return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
8720  // Thumb1 doesn't have cmn, and only 8-bit immediates.
8721  return Imm >= 0 && Imm <= 255;
8722}
8723
8724/// isLegalAddImmediate - Return true if the specified immediate is legal
8725/// add immediate, that is the target has add instructions which can add
8726/// a register with the immediate without having to materialize the
8727/// immediate into a register.
8728bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8729  return ARM_AM::getSOImmVal(Imm) != -1;
8730}
8731
8732static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
8733                                      bool isSEXTLoad, SDValue &Base,
8734                                      SDValue &Offset, bool &isInc,
8735                                      SelectionDAG &DAG) {
8736  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
8737    return false;
8738
8739  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
8740    // AddressingMode 3
8741    Base = Ptr->getOperand(0);
8742    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
8743      int RHSC = (int)RHS->getZExtValue();
8744      if (RHSC < 0 && RHSC > -256) {
8745        assert(Ptr->getOpcode() == ISD::ADD);
8746        isInc = false;
8747        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
8748        return true;
8749      }
8750    }
8751    isInc = (Ptr->getOpcode() == ISD::ADD);
8752    Offset = Ptr->getOperand(1);
8753    return true;
8754  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
8755    // AddressingMode 2
8756    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
8757      int RHSC = (int)RHS->getZExtValue();
8758      if (RHSC < 0 && RHSC > -0x1000) {
8759        assert(Ptr->getOpcode() == ISD::ADD);
8760        isInc = false;
8761        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
8762        Base = Ptr->getOperand(0);
8763        return true;
8764      }
8765    }
8766
8767    if (Ptr->getOpcode() == ISD::ADD) {
8768      isInc = true;
8769      ARM_AM::ShiftOpc ShOpcVal=
8770        ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
8771      if (ShOpcVal != ARM_AM::no_shift) {
8772        Base = Ptr->getOperand(1);
8773        Offset = Ptr->getOperand(0);
8774      } else {
8775        Base = Ptr->getOperand(0);
8776        Offset = Ptr->getOperand(1);
8777      }
8778      return true;
8779    }
8780
8781    isInc = (Ptr->getOpcode() == ISD::ADD);
8782    Base = Ptr->getOperand(0);
8783    Offset = Ptr->getOperand(1);
8784    return true;
8785  }
8786
8787  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
8788  return false;
8789}
8790
8791static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
8792                                     bool isSEXTLoad, SDValue &Base,
8793                                     SDValue &Offset, bool &isInc,
8794                                     SelectionDAG &DAG) {
8795  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
8796    return false;
8797
8798  Base = Ptr->getOperand(0);
8799  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
8800    int RHSC = (int)RHS->getZExtValue();
8801    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
8802      assert(Ptr->getOpcode() == ISD::ADD);
8803      isInc = false;
8804      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
8805      return true;
8806    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
8807      isInc = Ptr->getOpcode() == ISD::ADD;
8808      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
8809      return true;
8810    }
8811  }
8812
8813  return false;
8814}
8815
8816/// getPreIndexedAddressParts - returns true by value, base pointer and
8817/// offset pointer and addressing mode by reference if the node's address
8818/// can be legally represented as pre-indexed load / store address.
8819bool
8820ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
8821                                             SDValue &Offset,
8822                                             ISD::MemIndexedMode &AM,
8823                                             SelectionDAG &DAG) const {
8824  if (Subtarget->isThumb1Only())
8825    return false;
8826
8827  EVT VT;
8828  SDValue Ptr;
8829  bool isSEXTLoad = false;
8830  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
8831    Ptr = LD->getBasePtr();
8832    VT  = LD->getMemoryVT();
8833    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
8834  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
8835    Ptr = ST->getBasePtr();
8836    VT  = ST->getMemoryVT();
8837  } else
8838    return false;
8839
8840  bool isInc;
8841  bool isLegal = false;
8842  if (Subtarget->isThumb2())
8843    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
8844                                       Offset, isInc, DAG);
8845  else
8846    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
8847                                        Offset, isInc, DAG);
8848  if (!isLegal)
8849    return false;
8850
8851  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
8852  return true;
8853}
8854
8855/// getPostIndexedAddressParts - returns true by value, base pointer and
8856/// offset pointer and addressing mode by reference if this node can be
8857/// combined with a load / store to form a post-indexed load / store.
8858bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
8859                                                   SDValue &Base,
8860                                                   SDValue &Offset,
8861                                                   ISD::MemIndexedMode &AM,
8862                                                   SelectionDAG &DAG) const {
8863  if (Subtarget->isThumb1Only())
8864    return false;
8865
8866  EVT VT;
8867  SDValue Ptr;
8868  bool isSEXTLoad = false;
8869  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
8870    VT  = LD->getMemoryVT();
8871    Ptr = LD->getBasePtr();
8872    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
8873  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
8874    VT  = ST->getMemoryVT();
8875    Ptr = ST->getBasePtr();
8876  } else
8877    return false;
8878
8879  bool isInc;
8880  bool isLegal = false;
8881  if (Subtarget->isThumb2())
8882    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
8883                                       isInc, DAG);
8884  else
8885    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
8886                                        isInc, DAG);
8887  if (!isLegal)
8888    return false;
8889
8890  if (Ptr != Base) {
8891    // Swap base ptr and offset to catch more post-index load / store when
8892    // it's legal. In Thumb2 mode, offset must be an immediate.
8893    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
8894        !Subtarget->isThumb2())
8895      std::swap(Base, Offset);
8896
8897    // Post-indexed load / store update the base pointer.
8898    if (Ptr != Base)
8899      return false;
8900  }
8901
8902  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
8903  return true;
8904}
8905
8906void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
8907                                                       APInt &KnownZero,
8908                                                       APInt &KnownOne,
8909                                                       const SelectionDAG &DAG,
8910                                                       unsigned Depth) const {
8911  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
8912  switch (Op.getOpcode()) {
8913  default: break;
8914  case ARMISD::CMOV: {
8915    // Bits are known zero/one if known on the LHS and RHS.
8916    DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
8917    if (KnownZero == 0 && KnownOne == 0) return;
8918
8919    APInt KnownZeroRHS, KnownOneRHS;
8920    DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
8921    KnownZero &= KnownZeroRHS;
8922    KnownOne  &= KnownOneRHS;
8923    return;
8924  }
8925  }
8926}
8927
8928//===----------------------------------------------------------------------===//
8929//                           ARM Inline Assembly Support
8930//===----------------------------------------------------------------------===//
8931
8932bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
8933  // Looking for "rev" which is V6+.
8934  if (!Subtarget->hasV6Ops())
8935    return false;
8936
8937  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
8938  std::string AsmStr = IA->getAsmString();
8939  SmallVector<StringRef, 4> AsmPieces;
8940  SplitString(AsmStr, AsmPieces, ";\n");
8941
8942  switch (AsmPieces.size()) {
8943  default: return false;
8944  case 1:
8945    AsmStr = AsmPieces[0];
8946    AsmPieces.clear();
8947    SplitString(AsmStr, AsmPieces, " \t,");
8948
8949    // rev $0, $1
8950    if (AsmPieces.size() == 3 &&
8951        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
8952        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
8953      IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
8954      if (Ty && Ty->getBitWidth() == 32)
8955        return IntrinsicLowering::LowerToByteSwap(CI);
8956    }
8957    break;
8958  }
8959
8960  return false;
8961}
8962
8963/// getConstraintType - Given a constraint letter, return the type of
8964/// constraint it is for this target.
8965ARMTargetLowering::ConstraintType
8966ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
8967  if (Constraint.size() == 1) {
8968    switch (Constraint[0]) {
8969    default:  break;
8970    case 'l': return C_RegisterClass;
8971    case 'w': return C_RegisterClass;
8972    case 'h': return C_RegisterClass;
8973    case 'x': return C_RegisterClass;
8974    case 't': return C_RegisterClass;
8975    case 'j': return C_Other; // Constant for movw.
8976      // An address with a single base register. Due to the way we
8977      // currently handle addresses it is the same as an 'r' memory constraint.
8978    case 'Q': return C_Memory;
8979    }
8980  } else if (Constraint.size() == 2) {
8981    switch (Constraint[0]) {
8982    default: break;
8983    // All 'U+' constraints are addresses.
8984    case 'U': return C_Memory;
8985    }
8986  }
8987  return TargetLowering::getConstraintType(Constraint);
8988}
8989
8990/// Examine constraint type and operand type and determine a weight value.
8991/// This object must already have been set up with the operand type
8992/// and the current alternative constraint selected.
8993TargetLowering::ConstraintWeight
8994ARMTargetLowering::getSingleConstraintMatchWeight(
8995    AsmOperandInfo &info, const char *constraint) const {
8996  ConstraintWeight weight = CW_Invalid;
8997  Value *CallOperandVal = info.CallOperandVal;
8998    // If we don't have a value, we can't do a match,
8999    // but allow it at the lowest weight.
9000  if (CallOperandVal == NULL)
9001    return CW_Default;
9002  Type *type = CallOperandVal->getType();
9003  // Look at the constraint type.
9004  switch (*constraint) {
9005  default:
9006    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
9007    break;
9008  case 'l':
9009    if (type->isIntegerTy()) {
9010      if (Subtarget->isThumb())
9011        weight = CW_SpecificReg;
9012      else
9013        weight = CW_Register;
9014    }
9015    break;
9016  case 'w':
9017    if (type->isFloatingPointTy())
9018      weight = CW_Register;
9019    break;
9020  }
9021  return weight;
9022}
9023
9024typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
9025RCPair
9026ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
9027                                                EVT VT) const {
9028  if (Constraint.size() == 1) {
9029    // GCC ARM Constraint Letters
9030    switch (Constraint[0]) {
9031    case 'l': // Low regs or general regs.
9032      if (Subtarget->isThumb())
9033        return RCPair(0U, ARM::tGPRRegisterClass);
9034      else
9035        return RCPair(0U, ARM::GPRRegisterClass);
9036    case 'h': // High regs or no regs.
9037      if (Subtarget->isThumb())
9038        return RCPair(0U, ARM::hGPRRegisterClass);
9039      break;
9040    case 'r':
9041      return RCPair(0U, ARM::GPRRegisterClass);
9042    case 'w':
9043      if (VT == MVT::f32)
9044        return RCPair(0U, ARM::SPRRegisterClass);
9045      if (VT.getSizeInBits() == 64)
9046        return RCPair(0U, ARM::DPRRegisterClass);
9047      if (VT.getSizeInBits() == 128)
9048        return RCPair(0U, ARM::QPRRegisterClass);
9049      break;
9050    case 'x':
9051      if (VT == MVT::f32)
9052        return RCPair(0U, ARM::SPR_8RegisterClass);
9053      if (VT.getSizeInBits() == 64)
9054        return RCPair(0U, ARM::DPR_8RegisterClass);
9055      if (VT.getSizeInBits() == 128)
9056        return RCPair(0U, ARM::QPR_8RegisterClass);
9057      break;
9058    case 't':
9059      if (VT == MVT::f32)
9060        return RCPair(0U, ARM::SPRRegisterClass);
9061      break;
9062    }
9063  }
9064  if (StringRef("{cc}").equals_lower(Constraint))
9065    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
9066
9067  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
9068}
9069
9070/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
9071/// vector.  If it is invalid, don't add anything to Ops.
9072void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
9073                                                     std::string &Constraint,
9074                                                     std::vector<SDValue>&Ops,
9075                                                     SelectionDAG &DAG) const {
9076  SDValue Result(0, 0);
9077
9078  // Currently only support length 1 constraints.
9079  if (Constraint.length() != 1) return;
9080
9081  char ConstraintLetter = Constraint[0];
9082  switch (ConstraintLetter) {
9083  default: break;
9084  case 'j':
9085  case 'I': case 'J': case 'K': case 'L':
9086  case 'M': case 'N': case 'O':
9087    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
9088    if (!C)
9089      return;
9090
9091    int64_t CVal64 = C->getSExtValue();
9092    int CVal = (int) CVal64;
9093    // None of these constraints allow values larger than 32 bits.  Check
9094    // that the value fits in an int.
9095    if (CVal != CVal64)
9096      return;
9097
9098    switch (ConstraintLetter) {
9099      case 'j':
9100        // Constant suitable for movw, must be between 0 and
9101        // 65535.
9102        if (Subtarget->hasV6T2Ops())
9103          if (CVal >= 0 && CVal <= 65535)
9104            break;
9105        return;
9106      case 'I':
9107        if (Subtarget->isThumb1Only()) {
9108          // This must be a constant between 0 and 255, for ADD
9109          // immediates.
9110          if (CVal >= 0 && CVal <= 255)
9111            break;
9112        } else if (Subtarget->isThumb2()) {
9113          // A constant that can be used as an immediate value in a
9114          // data-processing instruction.
9115          if (ARM_AM::getT2SOImmVal(CVal) != -1)
9116            break;
9117        } else {
9118          // A constant that can be used as an immediate value in a
9119          // data-processing instruction.
9120          if (ARM_AM::getSOImmVal(CVal) != -1)
9121            break;
9122        }
9123        return;
9124
9125      case 'J':
9126        if (Subtarget->isThumb()) {  // FIXME thumb2
9127          // This must be a constant between -255 and -1, for negated ADD
9128          // immediates. This can be used in GCC with an "n" modifier that
9129          // prints the negated value, for use with SUB instructions. It is
9130          // not useful otherwise but is implemented for compatibility.
9131          if (CVal >= -255 && CVal <= -1)
9132            break;
9133        } else {
9134          // This must be a constant between -4095 and 4095. It is not clear
9135          // what this constraint is intended for. Implemented for
9136          // compatibility with GCC.
9137          if (CVal >= -4095 && CVal <= 4095)
9138            break;
9139        }
9140        return;
9141
9142      case 'K':
9143        if (Subtarget->isThumb1Only()) {
9144          // A 32-bit value where only one byte has a nonzero value. Exclude
9145          // zero to match GCC. This constraint is used by GCC internally for
9146          // constants that can be loaded with a move/shift combination.
9147          // It is not useful otherwise but is implemented for compatibility.
9148          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
9149            break;
9150        } else if (Subtarget->isThumb2()) {
9151          // A constant whose bitwise inverse can be used as an immediate
9152          // value in a data-processing instruction. This can be used in GCC
9153          // with a "B" modifier that prints the inverted value, for use with
9154          // BIC and MVN instructions. It is not useful otherwise but is
9155          // implemented for compatibility.
9156          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
9157            break;
9158        } else {
9159          // A constant whose bitwise inverse can be used as an immediate
9160          // value in a data-processing instruction. This can be used in GCC
9161          // with a "B" modifier that prints the inverted value, for use with
9162          // BIC and MVN instructions. It is not useful otherwise but is
9163          // implemented for compatibility.
9164          if (ARM_AM::getSOImmVal(~CVal) != -1)
9165            break;
9166        }
9167        return;
9168
9169      case 'L':
9170        if (Subtarget->isThumb1Only()) {
9171          // This must be a constant between -7 and 7,
9172          // for 3-operand ADD/SUB immediate instructions.
9173          if (CVal >= -7 && CVal < 7)
9174            break;
9175        } else if (Subtarget->isThumb2()) {
9176          // A constant whose negation can be used as an immediate value in a
9177          // data-processing instruction. This can be used in GCC with an "n"
9178          // modifier that prints the negated value, for use with SUB
9179          // instructions. It is not useful otherwise but is implemented for
9180          // compatibility.
9181          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
9182            break;
9183        } else {
9184          // A constant whose negation can be used as an immediate value in a
9185          // data-processing instruction. This can be used in GCC with an "n"
9186          // modifier that prints the negated value, for use with SUB
9187          // instructions. It is not useful otherwise but is implemented for
9188          // compatibility.
9189          if (ARM_AM::getSOImmVal(-CVal) != -1)
9190            break;
9191        }
9192        return;
9193
9194      case 'M':
9195        if (Subtarget->isThumb()) { // FIXME thumb2
9196          // This must be a multiple of 4 between 0 and 1020, for
9197          // ADD sp + immediate.
9198          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
9199            break;
9200        } else {
9201          // A power of two or a constant between 0 and 32.  This is used in
9202          // GCC for the shift amount on shifted register operands, but it is
9203          // useful in general for any shift amounts.
9204          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
9205            break;
9206        }
9207        return;
9208
9209      case 'N':
9210        if (Subtarget->isThumb()) {  // FIXME thumb2
9211          // This must be a constant between 0 and 31, for shift amounts.
9212          if (CVal >= 0 && CVal <= 31)
9213            break;
9214        }
9215        return;
9216
9217      case 'O':
9218        if (Subtarget->isThumb()) {  // FIXME thumb2
9219          // This must be a multiple of 4 between -508 and 508, for
9220          // ADD/SUB sp = sp + immediate.
9221          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
9222            break;
9223        }
9224        return;
9225    }
9226    Result = DAG.getTargetConstant(CVal, Op.getValueType());
9227    break;
9228  }
9229
9230  if (Result.getNode()) {
9231    Ops.push_back(Result);
9232    return;
9233  }
9234  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9235}
9236
9237bool
9238ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
9239  // The ARM target isn't yet aware of offsets.
9240  return false;
9241}
9242
9243bool ARM::isBitFieldInvertedMask(unsigned v) {
9244  if (v == 0xffffffff)
9245    return 0;
9246  // there can be 1's on either or both "outsides", all the "inside"
9247  // bits must be 0's
9248  unsigned int lsb = 0, msb = 31;
9249  while (v & (1 << msb)) --msb;
9250  while (v & (1 << lsb)) ++lsb;
9251  for (unsigned int i = lsb; i <= msb; ++i) {
9252    if (v & (1 << i))
9253      return 0;
9254  }
9255  return 1;
9256}
9257
9258/// isFPImmLegal - Returns true if the target can instruction select the
9259/// specified FP immediate natively. If false, the legalizer will
9260/// materialize the FP immediate as a load from a constant pool.
9261bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
9262  if (!Subtarget->hasVFP3())
9263    return false;
9264  if (VT == MVT::f32)
9265    return ARM_AM::getFP32Imm(Imm) != -1;
9266  if (VT == MVT::f64)
9267    return ARM_AM::getFP64Imm(Imm) != -1;
9268  return false;
9269}
9270
9271/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
9272/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
9273/// specified in the intrinsic calls.
9274bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
9275                                           const CallInst &I,
9276                                           unsigned Intrinsic) const {
9277  switch (Intrinsic) {
9278  case Intrinsic::arm_neon_vld1:
9279  case Intrinsic::arm_neon_vld2:
9280  case Intrinsic::arm_neon_vld3:
9281  case Intrinsic::arm_neon_vld4:
9282  case Intrinsic::arm_neon_vld2lane:
9283  case Intrinsic::arm_neon_vld3lane:
9284  case Intrinsic::arm_neon_vld4lane: {
9285    Info.opc = ISD::INTRINSIC_W_CHAIN;
9286    // Conservatively set memVT to the entire set of vectors loaded.
9287    uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
9288    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
9289    Info.ptrVal = I.getArgOperand(0);
9290    Info.offset = 0;
9291    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
9292    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
9293    Info.vol = false; // volatile loads with NEON intrinsics not supported
9294    Info.readMem = true;
9295    Info.writeMem = false;
9296    return true;
9297  }
9298  case Intrinsic::arm_neon_vst1:
9299  case Intrinsic::arm_neon_vst2:
9300  case Intrinsic::arm_neon_vst3:
9301  case Intrinsic::arm_neon_vst4:
9302  case Intrinsic::arm_neon_vst2lane:
9303  case Intrinsic::arm_neon_vst3lane:
9304  case Intrinsic::arm_neon_vst4lane: {
9305    Info.opc = ISD::INTRINSIC_VOID;
9306    // Conservatively set memVT to the entire set of vectors stored.
9307    unsigned NumElts = 0;
9308    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
9309      Type *ArgTy = I.getArgOperand(ArgI)->getType();
9310      if (!ArgTy->isVectorTy())
9311        break;
9312      NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
9313    }
9314    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
9315    Info.ptrVal = I.getArgOperand(0);
9316    Info.offset = 0;
9317    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
9318    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
9319    Info.vol = false; // volatile stores with NEON intrinsics not supported
9320    Info.readMem = false;
9321    Info.writeMem = true;
9322    return true;
9323  }
9324  case Intrinsic::arm_strexd: {
9325    Info.opc = ISD::INTRINSIC_W_CHAIN;
9326    Info.memVT = MVT::i64;
9327    Info.ptrVal = I.getArgOperand(2);
9328    Info.offset = 0;
9329    Info.align = 8;
9330    Info.vol = true;
9331    Info.readMem = false;
9332    Info.writeMem = true;
9333    return true;
9334  }
9335  case Intrinsic::arm_ldrexd: {
9336    Info.opc = ISD::INTRINSIC_W_CHAIN;
9337    Info.memVT = MVT::i64;
9338    Info.ptrVal = I.getArgOperand(0);
9339    Info.offset = 0;
9340    Info.align = 8;
9341    Info.vol = true;
9342    Info.readMem = true;
9343    Info.writeMem = false;
9344    return true;
9345  }
9346  default:
9347    break;
9348  }
9349
9350  return false;
9351}
9352