ARMISelLowering.cpp revision 8ea60bc4c322910d48ce1b1a1cf416e90268cc98
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-isel"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMISelLowering.h"
21#include "ARMMachineFunctionInfo.h"
22#include "ARMPerfectShuffle.h"
23#include "ARMRegisterInfo.h"
24#include "ARMSubtarget.h"
25#include "ARMTargetMachine.h"
26#include "ARMTargetObjectFile.h"
27#include "llvm/CallingConv.h"
28#include "llvm/Constants.h"
29#include "llvm/Function.h"
30#include "llvm/GlobalValue.h"
31#include "llvm/Instruction.h"
32#include "llvm/Instructions.h"
33#include "llvm/Intrinsics.h"
34#include "llvm/Type.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/PseudoSourceValue.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/MC/MCSectionMachO.h"
44#include "llvm/Target/TargetOptions.h"
45#include "llvm/ADT/VectorExtras.h"
46#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/MathExtras.h"
50#include "llvm/Support/raw_ostream.h"
51#include <sstream>
52using namespace llvm;
53
54STATISTIC(NumTailCalls, "Number of tail calls");
55
56// This option should go away when tail calls fully work.
57static cl::opt<bool>
58EnableARMTailCalls("arm-tail-calls", cl::Hidden,
59  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
60  cl::init(false));
61
62static cl::opt<bool>
63EnableARMLongCalls("arm-long-calls", cl::Hidden,
64  cl::desc("Generate calls via indirect call instructions"),
65  cl::init(false));
66
67static cl::opt<bool>
68ARMInterworking("arm-interworking", cl::Hidden,
69  cl::desc("Enable / disable ARM interworking (for debugging only)"),
70  cl::init(true));
71
72void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
73                                       EVT PromotedBitwiseVT) {
74  if (VT != PromotedLdStVT) {
75    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
76    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
77                       PromotedLdStVT.getSimpleVT());
78
79    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
80    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
81                       PromotedLdStVT.getSimpleVT());
82  }
83
84  EVT ElemTy = VT.getVectorElementType();
85  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
86    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
87  if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
88    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
89  if (ElemTy != MVT::i32) {
90    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
91    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
92    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
93    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
94  }
95  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
96  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
97  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
98  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
99  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
100  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
101  if (VT.isInteger()) {
102    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
103    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
104    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
105    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
106    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
107    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
108         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
109      setTruncStoreAction(VT.getSimpleVT(),
110                          (MVT::SimpleValueType)InnerVT, Expand);
111  }
112  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
113
114  // Promote all bit-wise operations.
115  if (VT.isInteger() && VT != PromotedBitwiseVT) {
116    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
117    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
118                       PromotedBitwiseVT.getSimpleVT());
119    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
120    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
121                       PromotedBitwiseVT.getSimpleVT());
122    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
123    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
124                       PromotedBitwiseVT.getSimpleVT());
125  }
126
127  // Neon does not support vector divide/remainder operations.
128  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
129  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
130  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
131  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
132  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
133  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
134}
135
136void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
137  addRegisterClass(VT, ARM::DPRRegisterClass);
138  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
139}
140
141void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
142  addRegisterClass(VT, ARM::QPRRegisterClass);
143  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
144}
145
146static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
147  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
148    return new TargetLoweringObjectFileMachO();
149
150  return new ARMElfTargetObjectFile();
151}
152
153ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
154    : TargetLowering(TM, createTLOF(TM)) {
155  Subtarget = &TM.getSubtarget<ARMSubtarget>();
156  RegInfo = TM.getRegisterInfo();
157  Itins = TM.getInstrItineraryData();
158
159  if (Subtarget->isTargetDarwin()) {
160    // Uses VFP for Thumb libfuncs if available.
161    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
162      // Single-precision floating-point arithmetic.
163      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
164      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
165      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
166      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
167
168      // Double-precision floating-point arithmetic.
169      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
170      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
171      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
172      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
173
174      // Single-precision comparisons.
175      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
176      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
177      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
178      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
179      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
180      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
181      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
182      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
183
184      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
185      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
186      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
187      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
188      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
189      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
190      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
191      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
192
193      // Double-precision comparisons.
194      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
195      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
196      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
197      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
198      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
199      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
200      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
201      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
202
203      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
204      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
205      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
206      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
207      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
208      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
209      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
210      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
211
212      // Floating-point to integer conversions.
213      // i64 conversions are done via library routines even when generating VFP
214      // instructions, so use the same ones.
215      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
216      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
217      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
218      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
219
220      // Conversions between floating types.
221      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
222      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
223
224      // Integer to floating-point conversions.
225      // i64 conversions are done via library routines even when generating VFP
226      // instructions, so use the same ones.
227      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
228      // e.g., __floatunsidf vs. __floatunssidfvfp.
229      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
230      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
231      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
232      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
233    }
234  }
235
236  // These libcalls are not available in 32-bit.
237  setLibcallName(RTLIB::SHL_I128, 0);
238  setLibcallName(RTLIB::SRL_I128, 0);
239  setLibcallName(RTLIB::SRA_I128, 0);
240
241  if (Subtarget->isAAPCS_ABI()) {
242    // Double-precision floating-point arithmetic helper functions
243    // RTABI chapter 4.1.2, Table 2
244    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
245    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
246    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
247    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
248    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
249    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
250    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
251    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
252
253    // Double-precision floating-point comparison helper functions
254    // RTABI chapter 4.1.2, Table 3
255    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
256    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
257    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
258    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
259    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
260    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
261    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
262    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
263    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
264    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
265    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
266    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
267    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
268    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
269    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
270    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
271    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
272    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
273    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
274    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
275    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
276    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
277    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
278    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
279
280    // Single-precision floating-point arithmetic helper functions
281    // RTABI chapter 4.1.2, Table 4
282    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
283    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
284    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
285    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
286    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
287    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
288    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
289    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
290
291    // Single-precision floating-point comparison helper functions
292    // RTABI chapter 4.1.2, Table 5
293    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
294    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
295    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
296    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
297    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
298    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
299    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
300    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
301    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
302    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
303    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
304    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
305    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
306    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
307    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
308    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
309    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
310    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
311    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
312    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
313    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
314    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
315    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
316    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
317
318    // Floating-point to integer conversions.
319    // RTABI chapter 4.1.2, Table 6
320    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
321    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
322    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
323    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
324    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
325    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
326    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
327    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
328    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
329    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
330    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
331    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
332    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
333    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
334    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
335    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
336
337    // Conversions between floating types.
338    // RTABI chapter 4.1.2, Table 7
339    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
340    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
341    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
342    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
343
344    // Integer to floating-point conversions.
345    // RTABI chapter 4.1.2, Table 8
346    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
347    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
348    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
349    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
350    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
351    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
352    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
353    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
354    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
355    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
356    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
357    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
358    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
359    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
360    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
361    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
362
363    // Long long helper functions
364    // RTABI chapter 4.2, Table 9
365    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
366    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
367    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
368    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
369    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
370    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
371    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
372    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
373    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
374    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
375    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
376    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
377
378    // Integer division functions
379    // RTABI chapter 4.3.1
380    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
381    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
382    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
383    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
384    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
385    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
386    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
387    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
388    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
389    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
390    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
391    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
392  }
393
394  if (Subtarget->isThumb1Only())
395    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
396  else
397    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
398  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
399    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
400    if (!Subtarget->isFPOnlySP())
401      addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
402
403    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
404  }
405
406  if (Subtarget->hasNEON()) {
407    addDRTypeForNEON(MVT::v2f32);
408    addDRTypeForNEON(MVT::v8i8);
409    addDRTypeForNEON(MVT::v4i16);
410    addDRTypeForNEON(MVT::v2i32);
411    addDRTypeForNEON(MVT::v1i64);
412
413    addQRTypeForNEON(MVT::v4f32);
414    addQRTypeForNEON(MVT::v2f64);
415    addQRTypeForNEON(MVT::v16i8);
416    addQRTypeForNEON(MVT::v8i16);
417    addQRTypeForNEON(MVT::v4i32);
418    addQRTypeForNEON(MVT::v2i64);
419
420    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
421    // neither Neon nor VFP support any arithmetic operations on it.
422    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
423    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
424    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
425    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
426    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
427    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
428    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
429    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
430    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
431    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
432    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
433    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
434    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
435    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
436    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
437    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
438    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
439    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
440    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
441    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
442    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
443    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
444    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
445    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
446
447    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
448
449    // Neon does not support some operations on v1i64 and v2i64 types.
450    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
451    // Custom handling for some quad-vector types to detect VMULL.
452    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
453    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
454    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
455    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
456    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
457
458    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
459    setTargetDAGCombine(ISD::SHL);
460    setTargetDAGCombine(ISD::SRL);
461    setTargetDAGCombine(ISD::SRA);
462    setTargetDAGCombine(ISD::SIGN_EXTEND);
463    setTargetDAGCombine(ISD::ZERO_EXTEND);
464    setTargetDAGCombine(ISD::ANY_EXTEND);
465    setTargetDAGCombine(ISD::SELECT_CC);
466    setTargetDAGCombine(ISD::BUILD_VECTOR);
467    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
468  }
469
470  computeRegisterProperties();
471
472  // ARM does not have f32 extending load.
473  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
474
475  // ARM does not have i1 sign extending load.
476  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
477
478  // ARM supports all 4 flavors of integer indexed load / store.
479  if (!Subtarget->isThumb1Only()) {
480    for (unsigned im = (unsigned)ISD::PRE_INC;
481         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
482      setIndexedLoadAction(im,  MVT::i1,  Legal);
483      setIndexedLoadAction(im,  MVT::i8,  Legal);
484      setIndexedLoadAction(im,  MVT::i16, Legal);
485      setIndexedLoadAction(im,  MVT::i32, Legal);
486      setIndexedStoreAction(im, MVT::i1,  Legal);
487      setIndexedStoreAction(im, MVT::i8,  Legal);
488      setIndexedStoreAction(im, MVT::i16, Legal);
489      setIndexedStoreAction(im, MVT::i32, Legal);
490    }
491  }
492
493  // i64 operation support.
494  if (Subtarget->isThumb1Only()) {
495    setOperationAction(ISD::MUL,     MVT::i64, Expand);
496    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
497    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
498    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
499    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
500  } else {
501    setOperationAction(ISD::MUL,     MVT::i64, Expand);
502    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
503    if (!Subtarget->hasV6Ops())
504      setOperationAction(ISD::MULHS, MVT::i32, Expand);
505  }
506  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
507  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
508  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
509  setOperationAction(ISD::SRL,       MVT::i64, Custom);
510  setOperationAction(ISD::SRA,       MVT::i64, Custom);
511
512  // ARM does not have ROTL.
513  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
514  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
515  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
516  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
517    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
518
519  // Only ARMv6 has BSWAP.
520  if (!Subtarget->hasV6Ops())
521    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
522
523  // These are expanded into libcalls.
524  if (!Subtarget->hasDivide()) {
525    // v7M has a hardware divider
526    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
527    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
528  }
529  setOperationAction(ISD::SREM,  MVT::i32, Expand);
530  setOperationAction(ISD::UREM,  MVT::i32, Expand);
531  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
532  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
533
534  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
535  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
536  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
537  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
538  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
539
540  setOperationAction(ISD::TRAP, MVT::Other, Legal);
541
542  // Use the default implementation.
543  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
544  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
545  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
546  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
547  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
548  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
549  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
550  // FIXME: Shouldn't need this, since no register is used, but the legalizer
551  // doesn't yet know how to not do that for SjLj.
552  setExceptionSelectorRegister(ARM::R0);
553  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
554  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
555  // the default expansion.
556  if (Subtarget->hasDataBarrier() ||
557      (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
558    // membarrier needs custom lowering; the rest are legal and handled
559    // normally.
560    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
561  } else {
562    // Set them all for expansion, which will force libcalls.
563    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
564    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
565    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
566    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
567    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
568    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
569    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
570    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
571    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
572    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
573    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
574    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
575    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
576    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
577    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
578    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
579    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
580    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
581    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
582    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
583    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
584    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
585    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
586    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
587    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
588    // Since the libcalls include locking, fold in the fences
589    setShouldFoldAtomicFences(true);
590  }
591  // 64-bit versions are always libcalls (for now)
592  setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
593  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
594  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
595  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
596  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
597  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
598  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
599  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
600
601  // ARM v5TE+ and Thumb2 has preload instructions.
602  if (Subtarget->isThumb2() ||
603      (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))
604    setOperationAction(ISD::PREFETCH,   MVT::Other, Legal);
605
606  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
607  if (!Subtarget->hasV6Ops()) {
608    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
609    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
610  }
611  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
612
613  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
614    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
615    // iff target supports vfp2.
616    setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
617    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
618  }
619
620  // We want to custom lower some of our intrinsics.
621  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
622  if (Subtarget->isTargetDarwin()) {
623    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
624    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
625    setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
626  }
627
628  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
629  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
630  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
631  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
632  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
633  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
634  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
635  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
636  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
637
638  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
639  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
640  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
641  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
642  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
643
644  // We don't support sin/cos/fmod/copysign/pow
645  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
646  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
647  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
648  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
649  setOperationAction(ISD::FREM,      MVT::f64, Expand);
650  setOperationAction(ISD::FREM,      MVT::f32, Expand);
651  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
652    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
653    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
654  }
655  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
656  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
657
658  // Various VFP goodness
659  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
660    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
661    if (Subtarget->hasVFP2()) {
662      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
663      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
664      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
665      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
666    }
667    // Special handling for half-precision FP.
668    if (!Subtarget->hasFP16()) {
669      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
670      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
671    }
672  }
673
674  // We have target-specific dag combine patterns for the following nodes:
675  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
676  setTargetDAGCombine(ISD::ADD);
677  setTargetDAGCombine(ISD::SUB);
678  setTargetDAGCombine(ISD::MUL);
679
680  if (Subtarget->hasV6T2Ops())
681    setTargetDAGCombine(ISD::OR);
682
683  setStackPointerRegisterToSaveRestore(ARM::SP);
684
685  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
686    setSchedulingPreference(Sched::RegPressure);
687  else
688    setSchedulingPreference(Sched::Hybrid);
689
690  maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
691
692  // On ARM arguments smaller than 4 bytes are extended, so all arguments
693  // are at least 4 bytes aligned.
694  setMinStackArgumentAlignment(4);
695
696  benefitFromCodePlacementOpt = true;
697}
698
699std::pair<const TargetRegisterClass*, uint8_t>
700ARMTargetLowering::findRepresentativeClass(EVT VT) const{
701  const TargetRegisterClass *RRC = 0;
702  uint8_t Cost = 1;
703  switch (VT.getSimpleVT().SimpleTy) {
704  default:
705    return TargetLowering::findRepresentativeClass(VT);
706  // Use DPR as representative register class for all floating point
707  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
708  // the cost is 1 for both f32 and f64.
709  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
710  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
711    RRC = ARM::DPRRegisterClass;
712    break;
713  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
714  case MVT::v4f32: case MVT::v2f64:
715    RRC = ARM::DPRRegisterClass;
716    Cost = 2;
717    break;
718  case MVT::v4i64:
719    RRC = ARM::DPRRegisterClass;
720    Cost = 4;
721    break;
722  case MVT::v8i64:
723    RRC = ARM::DPRRegisterClass;
724    Cost = 8;
725    break;
726  }
727  return std::make_pair(RRC, Cost);
728}
729
730const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
731  switch (Opcode) {
732  default: return 0;
733  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
734  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
735  case ARMISD::CALL:          return "ARMISD::CALL";
736  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
737  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
738  case ARMISD::tCALL:         return "ARMISD::tCALL";
739  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
740  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
741  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
742  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
743  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
744  case ARMISD::CMP:           return "ARMISD::CMP";
745  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
746  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
747  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
748  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
749  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
750  case ARMISD::CMOV:          return "ARMISD::CMOV";
751  case ARMISD::CNEG:          return "ARMISD::CNEG";
752
753  case ARMISD::RBIT:          return "ARMISD::RBIT";
754
755  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
756  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
757  case ARMISD::SITOF:         return "ARMISD::SITOF";
758  case ARMISD::UITOF:         return "ARMISD::UITOF";
759
760  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
761  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
762  case ARMISD::RRX:           return "ARMISD::RRX";
763
764  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
765  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
766
767  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
768  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
769  case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
770
771  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
772
773  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
774
775  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
776
777  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
778  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
779
780  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
781  case ARMISD::VCGE:          return "ARMISD::VCGE";
782  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
783  case ARMISD::VCGT:          return "ARMISD::VCGT";
784  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
785  case ARMISD::VTST:          return "ARMISD::VTST";
786
787  case ARMISD::VSHL:          return "ARMISD::VSHL";
788  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
789  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
790  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
791  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
792  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
793  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
794  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
795  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
796  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
797  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
798  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
799  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
800  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
801  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
802  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
803  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
804  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
805  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
806  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
807  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
808  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
809  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
810  case ARMISD::VDUP:          return "ARMISD::VDUP";
811  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
812  case ARMISD::VEXT:          return "ARMISD::VEXT";
813  case ARMISD::VREV64:        return "ARMISD::VREV64";
814  case ARMISD::VREV32:        return "ARMISD::VREV32";
815  case ARMISD::VREV16:        return "ARMISD::VREV16";
816  case ARMISD::VZIP:          return "ARMISD::VZIP";
817  case ARMISD::VUZP:          return "ARMISD::VUZP";
818  case ARMISD::VTRN:          return "ARMISD::VTRN";
819  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
820  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
821  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
822  case ARMISD::FMAX:          return "ARMISD::FMAX";
823  case ARMISD::FMIN:          return "ARMISD::FMIN";
824  case ARMISD::BFI:           return "ARMISD::BFI";
825  }
826}
827
828/// getRegClassFor - Return the register class that should be used for the
829/// specified value type.
830TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
831  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
832  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
833  // load / store 4 to 8 consecutive D registers.
834  if (Subtarget->hasNEON()) {
835    if (VT == MVT::v4i64)
836      return ARM::QQPRRegisterClass;
837    else if (VT == MVT::v8i64)
838      return ARM::QQQQPRRegisterClass;
839  }
840  return TargetLowering::getRegClassFor(VT);
841}
842
843// Create a fast isel object.
844FastISel *
845ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
846  return ARM::createFastISel(funcInfo);
847}
848
849/// getFunctionAlignment - Return the Log2 alignment of this function.
850unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
851  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
852}
853
854/// getMaximalGlobalOffset - Returns the maximal possible offset which can
855/// be used for loads / stores from the global.
856unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
857  return (Subtarget->isThumb1Only() ? 127 : 4095);
858}
859
860Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
861  unsigned NumVals = N->getNumValues();
862  if (!NumVals)
863    return Sched::RegPressure;
864
865  for (unsigned i = 0; i != NumVals; ++i) {
866    EVT VT = N->getValueType(i);
867    if (VT == MVT::Flag || VT == MVT::Other)
868      continue;
869    if (VT.isFloatingPoint() || VT.isVector())
870      return Sched::Latency;
871  }
872
873  if (!N->isMachineOpcode())
874    return Sched::RegPressure;
875
876  // Load are scheduled for latency even if there instruction itinerary
877  // is not available.
878  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
879  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
880
881  if (TID.getNumDefs() == 0)
882    return Sched::RegPressure;
883  if (!Itins->isEmpty() &&
884      Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
885    return Sched::Latency;
886
887  return Sched::RegPressure;
888}
889
890unsigned
891ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
892                                       MachineFunction &MF) const {
893  switch (RC->getID()) {
894  default:
895    return 0;
896  case ARM::tGPRRegClassID:
897    return RegInfo->hasFP(MF) ? 4 : 5;
898  case ARM::GPRRegClassID: {
899    unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
900    return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
901  }
902  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
903  case ARM::DPRRegClassID:
904    return 32 - 10;
905  }
906}
907
908//===----------------------------------------------------------------------===//
909// Lowering Code
910//===----------------------------------------------------------------------===//
911
912/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
913static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
914  switch (CC) {
915  default: llvm_unreachable("Unknown condition code!");
916  case ISD::SETNE:  return ARMCC::NE;
917  case ISD::SETEQ:  return ARMCC::EQ;
918  case ISD::SETGT:  return ARMCC::GT;
919  case ISD::SETGE:  return ARMCC::GE;
920  case ISD::SETLT:  return ARMCC::LT;
921  case ISD::SETLE:  return ARMCC::LE;
922  case ISD::SETUGT: return ARMCC::HI;
923  case ISD::SETUGE: return ARMCC::HS;
924  case ISD::SETULT: return ARMCC::LO;
925  case ISD::SETULE: return ARMCC::LS;
926  }
927}
928
929/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
930static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
931                        ARMCC::CondCodes &CondCode2) {
932  CondCode2 = ARMCC::AL;
933  switch (CC) {
934  default: llvm_unreachable("Unknown FP condition!");
935  case ISD::SETEQ:
936  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
937  case ISD::SETGT:
938  case ISD::SETOGT: CondCode = ARMCC::GT; break;
939  case ISD::SETGE:
940  case ISD::SETOGE: CondCode = ARMCC::GE; break;
941  case ISD::SETOLT: CondCode = ARMCC::MI; break;
942  case ISD::SETOLE: CondCode = ARMCC::LS; break;
943  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
944  case ISD::SETO:   CondCode = ARMCC::VC; break;
945  case ISD::SETUO:  CondCode = ARMCC::VS; break;
946  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
947  case ISD::SETUGT: CondCode = ARMCC::HI; break;
948  case ISD::SETUGE: CondCode = ARMCC::PL; break;
949  case ISD::SETLT:
950  case ISD::SETULT: CondCode = ARMCC::LT; break;
951  case ISD::SETLE:
952  case ISD::SETULE: CondCode = ARMCC::LE; break;
953  case ISD::SETNE:
954  case ISD::SETUNE: CondCode = ARMCC::NE; break;
955  }
956}
957
958//===----------------------------------------------------------------------===//
959//                      Calling Convention Implementation
960//===----------------------------------------------------------------------===//
961
962#include "ARMGenCallingConv.inc"
963
964/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
965/// given CallingConvention value.
966CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
967                                                 bool Return,
968                                                 bool isVarArg) const {
969  switch (CC) {
970  default:
971    llvm_unreachable("Unsupported calling convention");
972  case CallingConv::Fast:
973    if (Subtarget->hasVFP2() && !isVarArg) {
974      if (!Subtarget->isAAPCS_ABI())
975        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
976      // For AAPCS ABI targets, just use VFP variant of the calling convention.
977      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
978    }
979    // Fallthrough
980  case CallingConv::C: {
981    // Use target triple & subtarget features to do actual dispatch.
982    if (!Subtarget->isAAPCS_ABI())
983      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
984    else if (Subtarget->hasVFP2() &&
985             FloatABIType == FloatABI::Hard && !isVarArg)
986      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
987    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
988  }
989  case CallingConv::ARM_AAPCS_VFP:
990    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
991  case CallingConv::ARM_AAPCS:
992    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
993  case CallingConv::ARM_APCS:
994    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
995  }
996}
997
998/// LowerCallResult - Lower the result values of a call into the
999/// appropriate copies out of appropriate physical registers.
1000SDValue
1001ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1002                                   CallingConv::ID CallConv, bool isVarArg,
1003                                   const SmallVectorImpl<ISD::InputArg> &Ins,
1004                                   DebugLoc dl, SelectionDAG &DAG,
1005                                   SmallVectorImpl<SDValue> &InVals) const {
1006
1007  // Assign locations to each value returned by this call.
1008  SmallVector<CCValAssign, 16> RVLocs;
1009  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1010                 RVLocs, *DAG.getContext());
1011  CCInfo.AnalyzeCallResult(Ins,
1012                           CCAssignFnForNode(CallConv, /* Return*/ true,
1013                                             isVarArg));
1014
1015  // Copy all of the result registers out of their specified physreg.
1016  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1017    CCValAssign VA = RVLocs[i];
1018
1019    SDValue Val;
1020    if (VA.needsCustom()) {
1021      // Handle f64 or half of a v2f64.
1022      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1023                                      InFlag);
1024      Chain = Lo.getValue(1);
1025      InFlag = Lo.getValue(2);
1026      VA = RVLocs[++i]; // skip ahead to next loc
1027      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1028                                      InFlag);
1029      Chain = Hi.getValue(1);
1030      InFlag = Hi.getValue(2);
1031      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1032
1033      if (VA.getLocVT() == MVT::v2f64) {
1034        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1035        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1036                          DAG.getConstant(0, MVT::i32));
1037
1038        VA = RVLocs[++i]; // skip ahead to next loc
1039        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1040        Chain = Lo.getValue(1);
1041        InFlag = Lo.getValue(2);
1042        VA = RVLocs[++i]; // skip ahead to next loc
1043        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1044        Chain = Hi.getValue(1);
1045        InFlag = Hi.getValue(2);
1046        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1047        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1048                          DAG.getConstant(1, MVT::i32));
1049      }
1050    } else {
1051      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1052                               InFlag);
1053      Chain = Val.getValue(1);
1054      InFlag = Val.getValue(2);
1055    }
1056
1057    switch (VA.getLocInfo()) {
1058    default: llvm_unreachable("Unknown loc info!");
1059    case CCValAssign::Full: break;
1060    case CCValAssign::BCvt:
1061      Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
1062      break;
1063    }
1064
1065    InVals.push_back(Val);
1066  }
1067
1068  return Chain;
1069}
1070
1071/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1072/// by "Src" to address "Dst" of size "Size".  Alignment information is
1073/// specified by the specific parameter attribute.  The copy will be passed as
1074/// a byval function parameter.
1075/// Sometimes what we are copying is the end of a larger object, the part that
1076/// does not fit in registers.
1077static SDValue
1078CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1079                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1080                          DebugLoc dl) {
1081  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1082  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
1083                       /*isVolatile=*/false, /*AlwaysInline=*/false,
1084                       MachinePointerInfo(0), MachinePointerInfo(0));
1085}
1086
1087/// LowerMemOpCallTo - Store the argument to the stack.
1088SDValue
1089ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1090                                    SDValue StackPtr, SDValue Arg,
1091                                    DebugLoc dl, SelectionDAG &DAG,
1092                                    const CCValAssign &VA,
1093                                    ISD::ArgFlagsTy Flags) const {
1094  unsigned LocMemOffset = VA.getLocMemOffset();
1095  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1096  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1097  if (Flags.isByVal())
1098    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1099
1100  return DAG.getStore(Chain, dl, Arg, PtrOff,
1101                      MachinePointerInfo::getStack(LocMemOffset),
1102                      false, false, 0);
1103}
1104
1105void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
1106                                         SDValue Chain, SDValue &Arg,
1107                                         RegsToPassVector &RegsToPass,
1108                                         CCValAssign &VA, CCValAssign &NextVA,
1109                                         SDValue &StackPtr,
1110                                         SmallVector<SDValue, 8> &MemOpChains,
1111                                         ISD::ArgFlagsTy Flags) const {
1112
1113  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1114                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1115  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1116
1117  if (NextVA.isRegLoc())
1118    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
1119  else {
1120    assert(NextVA.isMemLoc());
1121    if (StackPtr.getNode() == 0)
1122      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1123
1124    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1125                                           dl, DAG, NextVA,
1126                                           Flags));
1127  }
1128}
1129
1130/// LowerCall - Lowering a call into a callseq_start <-
1131/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1132/// nodes.
1133SDValue
1134ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1135                             CallingConv::ID CallConv, bool isVarArg,
1136                             bool &isTailCall,
1137                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1138                             const SmallVectorImpl<SDValue> &OutVals,
1139                             const SmallVectorImpl<ISD::InputArg> &Ins,
1140                             DebugLoc dl, SelectionDAG &DAG,
1141                             SmallVectorImpl<SDValue> &InVals) const {
1142  MachineFunction &MF = DAG.getMachineFunction();
1143  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1144  bool IsSibCall = false;
1145  // Temporarily disable tail calls so things don't break.
1146  if (!EnableARMTailCalls)
1147    isTailCall = false;
1148  if (isTailCall) {
1149    // Check if it's really possible to do a tail call.
1150    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1151                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1152                                                   Outs, OutVals, Ins, DAG);
1153    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1154    // detected sibcalls.
1155    if (isTailCall) {
1156      ++NumTailCalls;
1157      IsSibCall = true;
1158    }
1159  }
1160
1161  // Analyze operands of the call, assigning locations to each operand.
1162  SmallVector<CCValAssign, 16> ArgLocs;
1163  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1164                 *DAG.getContext());
1165  CCInfo.AnalyzeCallOperands(Outs,
1166                             CCAssignFnForNode(CallConv, /* Return*/ false,
1167                                               isVarArg));
1168
1169  // Get a count of how many bytes are to be pushed on the stack.
1170  unsigned NumBytes = CCInfo.getNextStackOffset();
1171
1172  // For tail calls, memory operands are available in our caller's stack.
1173  if (IsSibCall)
1174    NumBytes = 0;
1175
1176  // Adjust the stack pointer for the new arguments...
1177  // These operations are automatically eliminated by the prolog/epilog pass
1178  if (!IsSibCall)
1179    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1180
1181  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1182
1183  RegsToPassVector RegsToPass;
1184  SmallVector<SDValue, 8> MemOpChains;
1185
1186  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1187  // of tail call optimization, arguments are handled later.
1188  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1189       i != e;
1190       ++i, ++realArgIdx) {
1191    CCValAssign &VA = ArgLocs[i];
1192    SDValue Arg = OutVals[realArgIdx];
1193    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1194
1195    // Promote the value if needed.
1196    switch (VA.getLocInfo()) {
1197    default: llvm_unreachable("Unknown loc info!");
1198    case CCValAssign::Full: break;
1199    case CCValAssign::SExt:
1200      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1201      break;
1202    case CCValAssign::ZExt:
1203      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1204      break;
1205    case CCValAssign::AExt:
1206      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1207      break;
1208    case CCValAssign::BCvt:
1209      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1210      break;
1211    }
1212
1213    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1214    if (VA.needsCustom()) {
1215      if (VA.getLocVT() == MVT::v2f64) {
1216        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1217                                  DAG.getConstant(0, MVT::i32));
1218        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1219                                  DAG.getConstant(1, MVT::i32));
1220
1221        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1222                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1223
1224        VA = ArgLocs[++i]; // skip ahead to next loc
1225        if (VA.isRegLoc()) {
1226          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1227                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1228        } else {
1229          assert(VA.isMemLoc());
1230
1231          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1232                                                 dl, DAG, VA, Flags));
1233        }
1234      } else {
1235        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1236                         StackPtr, MemOpChains, Flags);
1237      }
1238    } else if (VA.isRegLoc()) {
1239      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1240    } else if (!IsSibCall) {
1241      assert(VA.isMemLoc());
1242
1243      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1244                                             dl, DAG, VA, Flags));
1245    }
1246  }
1247
1248  if (!MemOpChains.empty())
1249    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1250                        &MemOpChains[0], MemOpChains.size());
1251
1252  // Build a sequence of copy-to-reg nodes chained together with token chain
1253  // and flag operands which copy the outgoing args into the appropriate regs.
1254  SDValue InFlag;
1255  // Tail call byval lowering might overwrite argument registers so in case of
1256  // tail call optimization the copies to registers are lowered later.
1257  if (!isTailCall)
1258    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1259      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1260                               RegsToPass[i].second, InFlag);
1261      InFlag = Chain.getValue(1);
1262    }
1263
1264  // For tail calls lower the arguments to the 'real' stack slot.
1265  if (isTailCall) {
1266    // Force all the incoming stack arguments to be loaded from the stack
1267    // before any new outgoing arguments are stored to the stack, because the
1268    // outgoing stack slots may alias the incoming argument stack slots, and
1269    // the alias isn't otherwise explicit. This is slightly more conservative
1270    // than necessary, because it means that each store effectively depends
1271    // on every argument instead of just those arguments it would clobber.
1272
1273    // Do not flag preceeding copytoreg stuff together with the following stuff.
1274    InFlag = SDValue();
1275    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1276      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1277                               RegsToPass[i].second, InFlag);
1278      InFlag = Chain.getValue(1);
1279    }
1280    InFlag =SDValue();
1281  }
1282
1283  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1284  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1285  // node so that legalize doesn't hack it.
1286  bool isDirect = false;
1287  bool isARMFunc = false;
1288  bool isLocalARMFunc = false;
1289  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1290
1291  if (EnableARMLongCalls) {
1292    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1293            && "long-calls with non-static relocation model!");
1294    // Handle a global address or an external symbol. If it's not one of
1295    // those, the target's already in a register, so we don't need to do
1296    // anything extra.
1297    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1298      const GlobalValue *GV = G->getGlobal();
1299      // Create a constant pool entry for the callee address
1300      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1301      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1302                                                           ARMPCLabelIndex,
1303                                                           ARMCP::CPValue, 0);
1304      // Get the address of the callee into a register
1305      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1306      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1307      Callee = DAG.getLoad(getPointerTy(), dl,
1308                           DAG.getEntryNode(), CPAddr,
1309                           MachinePointerInfo::getConstantPool(),
1310                           false, false, 0);
1311    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1312      const char *Sym = S->getSymbol();
1313
1314      // Create a constant pool entry for the callee address
1315      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1316      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1317                                                       Sym, ARMPCLabelIndex, 0);
1318      // Get the address of the callee into a register
1319      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1320      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1321      Callee = DAG.getLoad(getPointerTy(), dl,
1322                           DAG.getEntryNode(), CPAddr,
1323                           MachinePointerInfo::getConstantPool(),
1324                           false, false, 0);
1325    }
1326  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1327    const GlobalValue *GV = G->getGlobal();
1328    isDirect = true;
1329    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1330    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1331                   getTargetMachine().getRelocationModel() != Reloc::Static;
1332    isARMFunc = !Subtarget->isThumb() || isStub;
1333    // ARM call to a local ARM function is predicable.
1334    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1335    // tBX takes a register source operand.
1336    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1337      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1338      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1339                                                           ARMPCLabelIndex,
1340                                                           ARMCP::CPValue, 4);
1341      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1342      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1343      Callee = DAG.getLoad(getPointerTy(), dl,
1344                           DAG.getEntryNode(), CPAddr,
1345                           MachinePointerInfo::getConstantPool(),
1346                           false, false, 0);
1347      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1348      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1349                           getPointerTy(), Callee, PICLabel);
1350    } else {
1351      // On ELF targets for PIC code, direct calls should go through the PLT
1352      unsigned OpFlags = 0;
1353      if (Subtarget->isTargetELF() &&
1354                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1355        OpFlags = ARMII::MO_PLT;
1356      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1357    }
1358  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1359    isDirect = true;
1360    bool isStub = Subtarget->isTargetDarwin() &&
1361                  getTargetMachine().getRelocationModel() != Reloc::Static;
1362    isARMFunc = !Subtarget->isThumb() || isStub;
1363    // tBX takes a register source operand.
1364    const char *Sym = S->getSymbol();
1365    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1366      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1367      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1368                                                       Sym, ARMPCLabelIndex, 4);
1369      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1370      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1371      Callee = DAG.getLoad(getPointerTy(), dl,
1372                           DAG.getEntryNode(), CPAddr,
1373                           MachinePointerInfo::getConstantPool(),
1374                           false, false, 0);
1375      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1376      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1377                           getPointerTy(), Callee, PICLabel);
1378    } else {
1379      unsigned OpFlags = 0;
1380      // On ELF targets for PIC code, direct calls should go through the PLT
1381      if (Subtarget->isTargetELF() &&
1382                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1383        OpFlags = ARMII::MO_PLT;
1384      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1385    }
1386  }
1387
1388  // FIXME: handle tail calls differently.
1389  unsigned CallOpc;
1390  if (Subtarget->isThumb()) {
1391    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1392      CallOpc = ARMISD::CALL_NOLINK;
1393    else
1394      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1395  } else {
1396    CallOpc = (isDirect || Subtarget->hasV5TOps())
1397      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1398      : ARMISD::CALL_NOLINK;
1399  }
1400
1401  std::vector<SDValue> Ops;
1402  Ops.push_back(Chain);
1403  Ops.push_back(Callee);
1404
1405  // Add argument registers to the end of the list so that they are known live
1406  // into the call.
1407  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1408    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1409                                  RegsToPass[i].second.getValueType()));
1410
1411  if (InFlag.getNode())
1412    Ops.push_back(InFlag);
1413
1414  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1415  if (isTailCall)
1416    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1417
1418  // Returns a chain and a flag for retval copy to use.
1419  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1420  InFlag = Chain.getValue(1);
1421
1422  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1423                             DAG.getIntPtrConstant(0, true), InFlag);
1424  if (!Ins.empty())
1425    InFlag = Chain.getValue(1);
1426
1427  // Handle result values, copying them out of physregs into vregs that we
1428  // return.
1429  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1430                         dl, DAG, InVals);
1431}
1432
1433/// MatchingStackOffset - Return true if the given stack call argument is
1434/// already available in the same position (relatively) of the caller's
1435/// incoming argument stack.
1436static
1437bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1438                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1439                         const ARMInstrInfo *TII) {
1440  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1441  int FI = INT_MAX;
1442  if (Arg.getOpcode() == ISD::CopyFromReg) {
1443    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1444    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
1445      return false;
1446    MachineInstr *Def = MRI->getVRegDef(VR);
1447    if (!Def)
1448      return false;
1449    if (!Flags.isByVal()) {
1450      if (!TII->isLoadFromStackSlot(Def, FI))
1451        return false;
1452    } else {
1453      return false;
1454    }
1455  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1456    if (Flags.isByVal())
1457      // ByVal argument is passed in as a pointer but it's now being
1458      // dereferenced. e.g.
1459      // define @foo(%struct.X* %A) {
1460      //   tail call @bar(%struct.X* byval %A)
1461      // }
1462      return false;
1463    SDValue Ptr = Ld->getBasePtr();
1464    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1465    if (!FINode)
1466      return false;
1467    FI = FINode->getIndex();
1468  } else
1469    return false;
1470
1471  assert(FI != INT_MAX);
1472  if (!MFI->isFixedObjectIndex(FI))
1473    return false;
1474  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1475}
1476
1477/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1478/// for tail call optimization. Targets which want to do tail call
1479/// optimization should implement this function.
1480bool
1481ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1482                                                     CallingConv::ID CalleeCC,
1483                                                     bool isVarArg,
1484                                                     bool isCalleeStructRet,
1485                                                     bool isCallerStructRet,
1486                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1487                                    const SmallVectorImpl<SDValue> &OutVals,
1488                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1489                                                     SelectionDAG& DAG) const {
1490  const Function *CallerF = DAG.getMachineFunction().getFunction();
1491  CallingConv::ID CallerCC = CallerF->getCallingConv();
1492  bool CCMatch = CallerCC == CalleeCC;
1493
1494  // Look for obvious safe cases to perform tail call optimization that do not
1495  // require ABI changes. This is what gcc calls sibcall.
1496
1497  // Do not sibcall optimize vararg calls unless the call site is not passing
1498  // any arguments.
1499  if (isVarArg && !Outs.empty())
1500    return false;
1501
1502  // Also avoid sibcall optimization if either caller or callee uses struct
1503  // return semantics.
1504  if (isCalleeStructRet || isCallerStructRet)
1505    return false;
1506
1507  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1508  // emitEpilogue is not ready for them.
1509  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1510  // LR.  This means if we need to reload LR, it takes an extra instructions,
1511  // which outweighs the value of the tail call; but here we don't know yet
1512  // whether LR is going to be used.  Probably the right approach is to
1513  // generate the tail call here and turn it back into CALL/RET in
1514  // emitEpilogue if LR is used.
1515  if (Subtarget->isThumb1Only())
1516    return false;
1517
1518  // For the moment, we can only do this to functions defined in this
1519  // compilation, or to indirect calls.  A Thumb B to an ARM function,
1520  // or vice versa, is not easily fixed up in the linker unlike BL.
1521  // (We could do this by loading the address of the callee into a register;
1522  // that is an extra instruction over the direct call and burns a register
1523  // as well, so is not likely to be a win.)
1524
1525  // It might be safe to remove this restriction on non-Darwin.
1526
1527  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1528  // but we need to make sure there are enough registers; the only valid
1529  // registers are the 4 used for parameters.  We don't currently do this
1530  // case.
1531  if (isa<ExternalSymbolSDNode>(Callee))
1532      return false;
1533
1534  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1535    const GlobalValue *GV = G->getGlobal();
1536    if (GV->isDeclaration() || GV->isWeakForLinker())
1537      return false;
1538  }
1539
1540  // If the calling conventions do not match, then we'd better make sure the
1541  // results are returned in the same way as what the caller expects.
1542  if (!CCMatch) {
1543    SmallVector<CCValAssign, 16> RVLocs1;
1544    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
1545                    RVLocs1, *DAG.getContext());
1546    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1547
1548    SmallVector<CCValAssign, 16> RVLocs2;
1549    CCState CCInfo2(CallerCC, false, getTargetMachine(),
1550                    RVLocs2, *DAG.getContext());
1551    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1552
1553    if (RVLocs1.size() != RVLocs2.size())
1554      return false;
1555    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1556      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1557        return false;
1558      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1559        return false;
1560      if (RVLocs1[i].isRegLoc()) {
1561        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1562          return false;
1563      } else {
1564        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1565          return false;
1566      }
1567    }
1568  }
1569
1570  // If the callee takes no arguments then go on to check the results of the
1571  // call.
1572  if (!Outs.empty()) {
1573    // Check if stack adjustment is needed. For now, do not do this if any
1574    // argument is passed on the stack.
1575    SmallVector<CCValAssign, 16> ArgLocs;
1576    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
1577                   ArgLocs, *DAG.getContext());
1578    CCInfo.AnalyzeCallOperands(Outs,
1579                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1580    if (CCInfo.getNextStackOffset()) {
1581      MachineFunction &MF = DAG.getMachineFunction();
1582
1583      // Check if the arguments are already laid out in the right way as
1584      // the caller's fixed stack objects.
1585      MachineFrameInfo *MFI = MF.getFrameInfo();
1586      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1587      const ARMInstrInfo *TII =
1588        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1589      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1590           i != e;
1591           ++i, ++realArgIdx) {
1592        CCValAssign &VA = ArgLocs[i];
1593        EVT RegVT = VA.getLocVT();
1594        SDValue Arg = OutVals[realArgIdx];
1595        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1596        if (VA.getLocInfo() == CCValAssign::Indirect)
1597          return false;
1598        if (VA.needsCustom()) {
1599          // f64 and vector types are split into multiple registers or
1600          // register/stack-slot combinations.  The types will not match
1601          // the registers; give up on memory f64 refs until we figure
1602          // out what to do about this.
1603          if (!VA.isRegLoc())
1604            return false;
1605          if (!ArgLocs[++i].isRegLoc())
1606            return false;
1607          if (RegVT == MVT::v2f64) {
1608            if (!ArgLocs[++i].isRegLoc())
1609              return false;
1610            if (!ArgLocs[++i].isRegLoc())
1611              return false;
1612          }
1613        } else if (!VA.isRegLoc()) {
1614          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1615                                   MFI, MRI, TII))
1616            return false;
1617        }
1618      }
1619    }
1620  }
1621
1622  return true;
1623}
1624
1625SDValue
1626ARMTargetLowering::LowerReturn(SDValue Chain,
1627                               CallingConv::ID CallConv, bool isVarArg,
1628                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1629                               const SmallVectorImpl<SDValue> &OutVals,
1630                               DebugLoc dl, SelectionDAG &DAG) const {
1631
1632  // CCValAssign - represent the assignment of the return value to a location.
1633  SmallVector<CCValAssign, 16> RVLocs;
1634
1635  // CCState - Info about the registers and stack slots.
1636  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1637                 *DAG.getContext());
1638
1639  // Analyze outgoing return values.
1640  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1641                                               isVarArg));
1642
1643  // If this is the first return lowered for this function, add
1644  // the regs to the liveout set for the function.
1645  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1646    for (unsigned i = 0; i != RVLocs.size(); ++i)
1647      if (RVLocs[i].isRegLoc())
1648        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1649  }
1650
1651  SDValue Flag;
1652
1653  // Copy the result values into the output registers.
1654  for (unsigned i = 0, realRVLocIdx = 0;
1655       i != RVLocs.size();
1656       ++i, ++realRVLocIdx) {
1657    CCValAssign &VA = RVLocs[i];
1658    assert(VA.isRegLoc() && "Can only return in registers!");
1659
1660    SDValue Arg = OutVals[realRVLocIdx];
1661
1662    switch (VA.getLocInfo()) {
1663    default: llvm_unreachable("Unknown loc info!");
1664    case CCValAssign::Full: break;
1665    case CCValAssign::BCvt:
1666      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1667      break;
1668    }
1669
1670    if (VA.needsCustom()) {
1671      if (VA.getLocVT() == MVT::v2f64) {
1672        // Extract the first half and return it in two registers.
1673        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1674                                   DAG.getConstant(0, MVT::i32));
1675        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1676                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
1677
1678        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1679        Flag = Chain.getValue(1);
1680        VA = RVLocs[++i]; // skip ahead to next loc
1681        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1682                                 HalfGPRs.getValue(1), Flag);
1683        Flag = Chain.getValue(1);
1684        VA = RVLocs[++i]; // skip ahead to next loc
1685
1686        // Extract the 2nd half and fall through to handle it as an f64 value.
1687        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1688                          DAG.getConstant(1, MVT::i32));
1689      }
1690      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1691      // available.
1692      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1693                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1694      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1695      Flag = Chain.getValue(1);
1696      VA = RVLocs[++i]; // skip ahead to next loc
1697      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1698                               Flag);
1699    } else
1700      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1701
1702    // Guarantee that all emitted copies are
1703    // stuck together, avoiding something bad.
1704    Flag = Chain.getValue(1);
1705  }
1706
1707  SDValue result;
1708  if (Flag.getNode())
1709    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1710  else // Return Void
1711    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1712
1713  return result;
1714}
1715
1716// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1717// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1718// one of the above mentioned nodes. It has to be wrapped because otherwise
1719// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1720// be used to form addressing mode. These wrapped nodes will be selected
1721// into MOVi.
1722static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1723  EVT PtrVT = Op.getValueType();
1724  // FIXME there is no actual debug info here
1725  DebugLoc dl = Op.getDebugLoc();
1726  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1727  SDValue Res;
1728  if (CP->isMachineConstantPoolEntry())
1729    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1730                                    CP->getAlignment());
1731  else
1732    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1733                                    CP->getAlignment());
1734  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1735}
1736
1737unsigned ARMTargetLowering::getJumpTableEncoding() const {
1738  return MachineJumpTableInfo::EK_Inline;
1739}
1740
1741SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1742                                             SelectionDAG &DAG) const {
1743  MachineFunction &MF = DAG.getMachineFunction();
1744  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1745  unsigned ARMPCLabelIndex = 0;
1746  DebugLoc DL = Op.getDebugLoc();
1747  EVT PtrVT = getPointerTy();
1748  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1749  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1750  SDValue CPAddr;
1751  if (RelocM == Reloc::Static) {
1752    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1753  } else {
1754    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1755    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1756    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1757                                                         ARMCP::CPBlockAddress,
1758                                                         PCAdj);
1759    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1760  }
1761  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1762  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1763                               MachinePointerInfo::getConstantPool(),
1764                               false, false, 0);
1765  if (RelocM == Reloc::Static)
1766    return Result;
1767  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1768  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1769}
1770
1771// Lower ISD::GlobalTLSAddress using the "general dynamic" model
1772SDValue
1773ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1774                                                 SelectionDAG &DAG) const {
1775  DebugLoc dl = GA->getDebugLoc();
1776  EVT PtrVT = getPointerTy();
1777  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1778  MachineFunction &MF = DAG.getMachineFunction();
1779  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1780  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1781  ARMConstantPoolValue *CPV =
1782    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1783                             ARMCP::CPValue, PCAdj, "tlsgd", true);
1784  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1785  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1786  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1787                         MachinePointerInfo::getConstantPool(),
1788                         false, false, 0);
1789  SDValue Chain = Argument.getValue(1);
1790
1791  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1792  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1793
1794  // call __tls_get_addr.
1795  ArgListTy Args;
1796  ArgListEntry Entry;
1797  Entry.Node = Argument;
1798  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1799  Args.push_back(Entry);
1800  // FIXME: is there useful debug info available here?
1801  std::pair<SDValue, SDValue> CallResult =
1802    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1803                false, false, false, false,
1804                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1805                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1806  return CallResult.first;
1807}
1808
1809// Lower ISD::GlobalTLSAddress using the "initial exec" or
1810// "local exec" model.
1811SDValue
1812ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1813                                        SelectionDAG &DAG) const {
1814  const GlobalValue *GV = GA->getGlobal();
1815  DebugLoc dl = GA->getDebugLoc();
1816  SDValue Offset;
1817  SDValue Chain = DAG.getEntryNode();
1818  EVT PtrVT = getPointerTy();
1819  // Get the Thread Pointer
1820  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1821
1822  if (GV->isDeclaration()) {
1823    MachineFunction &MF = DAG.getMachineFunction();
1824    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1825    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1826    // Initial exec model.
1827    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1828    ARMConstantPoolValue *CPV =
1829      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1830                               ARMCP::CPValue, PCAdj, "gottpoff", true);
1831    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1832    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1833    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1834                         MachinePointerInfo::getConstantPool(),
1835                         false, false, 0);
1836    Chain = Offset.getValue(1);
1837
1838    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1839    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1840
1841    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1842                         MachinePointerInfo::getConstantPool(),
1843                         false, false, 0);
1844  } else {
1845    // local exec model
1846    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1847    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1848    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1849    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1850                         MachinePointerInfo::getConstantPool(),
1851                         false, false, 0);
1852  }
1853
1854  // The address of the thread local variable is the add of the thread
1855  // pointer with the offset of the variable.
1856  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1857}
1858
1859SDValue
1860ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1861  // TODO: implement the "local dynamic" model
1862  assert(Subtarget->isTargetELF() &&
1863         "TLS not implemented for non-ELF targets");
1864  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1865  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1866  // otherwise use the "Local Exec" TLS Model
1867  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1868    return LowerToTLSGeneralDynamicModel(GA, DAG);
1869  else
1870    return LowerToTLSExecModels(GA, DAG);
1871}
1872
1873SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1874                                                 SelectionDAG &DAG) const {
1875  EVT PtrVT = getPointerTy();
1876  DebugLoc dl = Op.getDebugLoc();
1877  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1878  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1879  if (RelocM == Reloc::PIC_) {
1880    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1881    ARMConstantPoolValue *CPV =
1882      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1883    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1884    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1885    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1886                                 CPAddr,
1887                                 MachinePointerInfo::getConstantPool(),
1888                                 false, false, 0);
1889    SDValue Chain = Result.getValue(1);
1890    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1891    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1892    if (!UseGOTOFF)
1893      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1894                           MachinePointerInfo::getGOT(), false, false, 0);
1895    return Result;
1896  } else {
1897    // If we have T2 ops, we can materialize the address directly via movt/movw
1898    // pair. This is always cheaper.
1899    if (Subtarget->useMovt()) {
1900      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1901                         DAG.getTargetGlobalAddress(GV, dl, PtrVT));
1902    } else {
1903      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1904      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1905      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1906                         MachinePointerInfo::getConstantPool(),
1907                         false, false, 0);
1908    }
1909  }
1910}
1911
1912SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1913                                                    SelectionDAG &DAG) const {
1914  MachineFunction &MF = DAG.getMachineFunction();
1915  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1916  unsigned ARMPCLabelIndex = 0;
1917  EVT PtrVT = getPointerTy();
1918  DebugLoc dl = Op.getDebugLoc();
1919  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1920  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1921  SDValue CPAddr;
1922  if (RelocM == Reloc::Static)
1923    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1924  else {
1925    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1926    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1927    ARMConstantPoolValue *CPV =
1928      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1929    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1930  }
1931  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1932
1933  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1934                               MachinePointerInfo::getConstantPool(),
1935                               false, false, 0);
1936  SDValue Chain = Result.getValue(1);
1937
1938  if (RelocM == Reloc::PIC_) {
1939    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1940    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1941  }
1942
1943  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1944    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
1945                         false, false, 0);
1946
1947  return Result;
1948}
1949
1950SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1951                                                    SelectionDAG &DAG) const {
1952  assert(Subtarget->isTargetELF() &&
1953         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1954  MachineFunction &MF = DAG.getMachineFunction();
1955  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1956  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1957  EVT PtrVT = getPointerTy();
1958  DebugLoc dl = Op.getDebugLoc();
1959  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1960  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1961                                                       "_GLOBAL_OFFSET_TABLE_",
1962                                                       ARMPCLabelIndex, PCAdj);
1963  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1964  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1965  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1966                               MachinePointerInfo::getConstantPool(),
1967                               false, false, 0);
1968  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1969  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1970}
1971
1972SDValue
1973ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
1974  const {
1975  DebugLoc dl = Op.getDebugLoc();
1976  return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
1977                     Op.getOperand(0), Op.getOperand(1));
1978}
1979
1980SDValue
1981ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
1982  DebugLoc dl = Op.getDebugLoc();
1983  SDValue Val = DAG.getConstant(0, MVT::i32);
1984  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
1985                     Op.getOperand(1), Val);
1986}
1987
1988SDValue
1989ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1990  DebugLoc dl = Op.getDebugLoc();
1991  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1992                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
1993}
1994
1995SDValue
1996ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1997                                          const ARMSubtarget *Subtarget) const {
1998  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1999  DebugLoc dl = Op.getDebugLoc();
2000  switch (IntNo) {
2001  default: return SDValue();    // Don't custom lower most intrinsics.
2002  case Intrinsic::arm_thread_pointer: {
2003    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2004    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2005  }
2006  case Intrinsic::eh_sjlj_lsda: {
2007    MachineFunction &MF = DAG.getMachineFunction();
2008    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2009    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
2010    EVT PtrVT = getPointerTy();
2011    DebugLoc dl = Op.getDebugLoc();
2012    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2013    SDValue CPAddr;
2014    unsigned PCAdj = (RelocM != Reloc::PIC_)
2015      ? 0 : (Subtarget->isThumb() ? 4 : 8);
2016    ARMConstantPoolValue *CPV =
2017      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
2018                               ARMCP::CPLSDA, PCAdj);
2019    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2020    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2021    SDValue Result =
2022      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2023                  MachinePointerInfo::getConstantPool(),
2024                  false, false, 0);
2025
2026    if (RelocM == Reloc::PIC_) {
2027      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2028      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2029    }
2030    return Result;
2031  }
2032  }
2033}
2034
2035static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2036                               const ARMSubtarget *Subtarget) {
2037  DebugLoc dl = Op.getDebugLoc();
2038  if (!Subtarget->hasDataBarrier()) {
2039    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2040    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2041    // here.
2042    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
2043           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2044    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2045                       DAG.getConstant(0, MVT::i32));
2046  }
2047
2048  SDValue Op5 = Op.getOperand(5);
2049  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
2050  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2051  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2052  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
2053
2054  ARM_MB::MemBOpt DMBOpt;
2055  if (isDeviceBarrier)
2056    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
2057  else
2058    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
2059  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2060                     DAG.getConstant(DMBOpt, MVT::i32));
2061}
2062
2063static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2064  MachineFunction &MF = DAG.getMachineFunction();
2065  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2066
2067  // vastart just stores the address of the VarArgsFrameIndex slot into the
2068  // memory location argument.
2069  DebugLoc dl = Op.getDebugLoc();
2070  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2071  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2072  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2073  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2074                      MachinePointerInfo(SV), false, false, 0);
2075}
2076
2077SDValue
2078ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2079                                        SDValue &Root, SelectionDAG &DAG,
2080                                        DebugLoc dl) const {
2081  MachineFunction &MF = DAG.getMachineFunction();
2082  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2083
2084  TargetRegisterClass *RC;
2085  if (AFI->isThumb1OnlyFunction())
2086    RC = ARM::tGPRRegisterClass;
2087  else
2088    RC = ARM::GPRRegisterClass;
2089
2090  // Transform the arguments stored in physical registers into virtual ones.
2091  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2092  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2093
2094  SDValue ArgValue2;
2095  if (NextVA.isMemLoc()) {
2096    MachineFrameInfo *MFI = MF.getFrameInfo();
2097    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2098
2099    // Create load node to retrieve arguments from the stack.
2100    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2101    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2102                            MachinePointerInfo::getFixedStack(FI),
2103                            false, false, 0);
2104  } else {
2105    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2106    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2107  }
2108
2109  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2110}
2111
2112SDValue
2113ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2114                                        CallingConv::ID CallConv, bool isVarArg,
2115                                        const SmallVectorImpl<ISD::InputArg>
2116                                          &Ins,
2117                                        DebugLoc dl, SelectionDAG &DAG,
2118                                        SmallVectorImpl<SDValue> &InVals)
2119                                          const {
2120
2121  MachineFunction &MF = DAG.getMachineFunction();
2122  MachineFrameInfo *MFI = MF.getFrameInfo();
2123
2124  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2125
2126  // Assign locations to all of the incoming arguments.
2127  SmallVector<CCValAssign, 16> ArgLocs;
2128  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
2129                 *DAG.getContext());
2130  CCInfo.AnalyzeFormalArguments(Ins,
2131                                CCAssignFnForNode(CallConv, /* Return*/ false,
2132                                                  isVarArg));
2133
2134  SmallVector<SDValue, 16> ArgValues;
2135
2136  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2137    CCValAssign &VA = ArgLocs[i];
2138
2139    // Arguments stored in registers.
2140    if (VA.isRegLoc()) {
2141      EVT RegVT = VA.getLocVT();
2142
2143      SDValue ArgValue;
2144      if (VA.needsCustom()) {
2145        // f64 and vector types are split up into multiple registers or
2146        // combinations of registers and stack slots.
2147        if (VA.getLocVT() == MVT::v2f64) {
2148          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2149                                                   Chain, DAG, dl);
2150          VA = ArgLocs[++i]; // skip ahead to next loc
2151          SDValue ArgValue2;
2152          if (VA.isMemLoc()) {
2153            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2154            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2155            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2156                                    MachinePointerInfo::getFixedStack(FI),
2157                                    false, false, 0);
2158          } else {
2159            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2160                                             Chain, DAG, dl);
2161          }
2162          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2163          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2164                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2165          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2166                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2167        } else
2168          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2169
2170      } else {
2171        TargetRegisterClass *RC;
2172
2173        if (RegVT == MVT::f32)
2174          RC = ARM::SPRRegisterClass;
2175        else if (RegVT == MVT::f64)
2176          RC = ARM::DPRRegisterClass;
2177        else if (RegVT == MVT::v2f64)
2178          RC = ARM::QPRRegisterClass;
2179        else if (RegVT == MVT::i32)
2180          RC = (AFI->isThumb1OnlyFunction() ?
2181                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2182        else
2183          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2184
2185        // Transform the arguments in physical registers into virtual ones.
2186        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2187        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2188      }
2189
2190      // If this is an 8 or 16-bit value, it is really passed promoted
2191      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2192      // truncate to the right size.
2193      switch (VA.getLocInfo()) {
2194      default: llvm_unreachable("Unknown loc info!");
2195      case CCValAssign::Full: break;
2196      case CCValAssign::BCvt:
2197        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
2198        break;
2199      case CCValAssign::SExt:
2200        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2201                               DAG.getValueType(VA.getValVT()));
2202        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2203        break;
2204      case CCValAssign::ZExt:
2205        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2206                               DAG.getValueType(VA.getValVT()));
2207        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2208        break;
2209      }
2210
2211      InVals.push_back(ArgValue);
2212
2213    } else { // VA.isRegLoc()
2214
2215      // sanity check
2216      assert(VA.isMemLoc());
2217      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2218
2219      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
2220      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
2221
2222      // Create load nodes to retrieve arguments from the stack.
2223      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2224      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2225                                   MachinePointerInfo::getFixedStack(FI),
2226                                   false, false, 0));
2227    }
2228  }
2229
2230  // varargs
2231  if (isVarArg) {
2232    static const unsigned GPRArgRegs[] = {
2233      ARM::R0, ARM::R1, ARM::R2, ARM::R3
2234    };
2235
2236    unsigned NumGPRs = CCInfo.getFirstUnallocated
2237      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2238
2239    unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
2240    unsigned VARegSize = (4 - NumGPRs) * 4;
2241    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2242    unsigned ArgOffset = CCInfo.getNextStackOffset();
2243    if (VARegSaveSize) {
2244      // If this function is vararg, store any remaining integer argument regs
2245      // to their spots on the stack so that they may be loaded by deferencing
2246      // the result of va_next.
2247      AFI->setVarArgsRegSaveSize(VARegSaveSize);
2248      AFI->setVarArgsFrameIndex(
2249        MFI->CreateFixedObject(VARegSaveSize,
2250                               ArgOffset + VARegSaveSize - VARegSize,
2251                               false));
2252      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2253                                      getPointerTy());
2254
2255      SmallVector<SDValue, 4> MemOps;
2256      for (; NumGPRs < 4; ++NumGPRs) {
2257        TargetRegisterClass *RC;
2258        if (AFI->isThumb1OnlyFunction())
2259          RC = ARM::tGPRRegisterClass;
2260        else
2261          RC = ARM::GPRRegisterClass;
2262
2263        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
2264        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2265        SDValue Store =
2266          DAG.getStore(Val.getValue(1), dl, Val, FIN,
2267               MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
2268                       false, false, 0);
2269        MemOps.push_back(Store);
2270        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2271                          DAG.getConstant(4, getPointerTy()));
2272      }
2273      if (!MemOps.empty())
2274        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2275                            &MemOps[0], MemOps.size());
2276    } else
2277      // This will point to the next argument passed via stack.
2278      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2279  }
2280
2281  return Chain;
2282}
2283
2284/// isFloatingPointZero - Return true if this is +0.0.
2285static bool isFloatingPointZero(SDValue Op) {
2286  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2287    return CFP->getValueAPF().isPosZero();
2288  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2289    // Maybe this has already been legalized into the constant pool?
2290    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2291      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2292      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2293        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2294          return CFP->getValueAPF().isPosZero();
2295    }
2296  }
2297  return false;
2298}
2299
2300/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2301/// the given operands.
2302SDValue
2303ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2304                             SDValue &ARMcc, SelectionDAG &DAG,
2305                             DebugLoc dl) const {
2306  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2307    unsigned C = RHSC->getZExtValue();
2308    if (!isLegalICmpImmediate(C)) {
2309      // Constant does not fit, try adjusting it by one?
2310      switch (CC) {
2311      default: break;
2312      case ISD::SETLT:
2313      case ISD::SETGE:
2314        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2315          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2316          RHS = DAG.getConstant(C-1, MVT::i32);
2317        }
2318        break;
2319      case ISD::SETULT:
2320      case ISD::SETUGE:
2321        if (C != 0 && isLegalICmpImmediate(C-1)) {
2322          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2323          RHS = DAG.getConstant(C-1, MVT::i32);
2324        }
2325        break;
2326      case ISD::SETLE:
2327      case ISD::SETGT:
2328        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2329          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2330          RHS = DAG.getConstant(C+1, MVT::i32);
2331        }
2332        break;
2333      case ISD::SETULE:
2334      case ISD::SETUGT:
2335        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2336          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2337          RHS = DAG.getConstant(C+1, MVT::i32);
2338        }
2339        break;
2340      }
2341    }
2342  }
2343
2344  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2345  ARMISD::NodeType CompareType;
2346  switch (CondCode) {
2347  default:
2348    CompareType = ARMISD::CMP;
2349    break;
2350  case ARMCC::EQ:
2351  case ARMCC::NE:
2352    // Uses only Z Flag
2353    CompareType = ARMISD::CMPZ;
2354    break;
2355  }
2356  ARMcc = DAG.getConstant(CondCode, MVT::i32);
2357  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2358}
2359
2360/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2361SDValue
2362ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2363                             DebugLoc dl) const {
2364  SDValue Cmp;
2365  if (!isFloatingPointZero(RHS))
2366    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
2367  else
2368    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
2369  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
2370}
2371
2372SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2373  SDValue Cond = Op.getOperand(0);
2374  SDValue SelectTrue = Op.getOperand(1);
2375  SDValue SelectFalse = Op.getOperand(2);
2376  DebugLoc dl = Op.getDebugLoc();
2377
2378  // Convert:
2379  //
2380  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2381  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2382  //
2383  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2384    const ConstantSDNode *CMOVTrue =
2385      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2386    const ConstantSDNode *CMOVFalse =
2387      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2388
2389    if (CMOVTrue && CMOVFalse) {
2390      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2391      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2392
2393      SDValue True;
2394      SDValue False;
2395      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2396        True = SelectTrue;
2397        False = SelectFalse;
2398      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2399        True = SelectFalse;
2400        False = SelectTrue;
2401      }
2402
2403      if (True.getNode() && False.getNode()) {
2404        EVT VT = Cond.getValueType();
2405        SDValue ARMcc = Cond.getOperand(2);
2406        SDValue CCR = Cond.getOperand(3);
2407        SDValue Cmp = Cond.getOperand(4);
2408        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2409      }
2410    }
2411  }
2412
2413  return DAG.getSelectCC(dl, Cond,
2414                         DAG.getConstant(0, Cond.getValueType()),
2415                         SelectTrue, SelectFalse, ISD::SETNE);
2416}
2417
2418SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2419  EVT VT = Op.getValueType();
2420  SDValue LHS = Op.getOperand(0);
2421  SDValue RHS = Op.getOperand(1);
2422  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2423  SDValue TrueVal = Op.getOperand(2);
2424  SDValue FalseVal = Op.getOperand(3);
2425  DebugLoc dl = Op.getDebugLoc();
2426
2427  if (LHS.getValueType() == MVT::i32) {
2428    SDValue ARMcc;
2429    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2430    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2431    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2432  }
2433
2434  ARMCC::CondCodes CondCode, CondCode2;
2435  FPCCToARMCC(CC, CondCode, CondCode2);
2436
2437  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2438  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2439  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2440  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2441                               ARMcc, CCR, Cmp);
2442  if (CondCode2 != ARMCC::AL) {
2443    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2444    // FIXME: Needs another CMP because flag can have but one use.
2445    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2446    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2447                         Result, TrueVal, ARMcc2, CCR, Cmp2);
2448  }
2449  return Result;
2450}
2451
2452/// canChangeToInt - Given the fp compare operand, return true if it is suitable
2453/// to morph to an integer compare sequence.
2454static bool canChangeToInt(SDValue Op, bool &SeenZero,
2455                           const ARMSubtarget *Subtarget) {
2456  SDNode *N = Op.getNode();
2457  if (!N->hasOneUse())
2458    // Otherwise it requires moving the value from fp to integer registers.
2459    return false;
2460  if (!N->getNumValues())
2461    return false;
2462  EVT VT = Op.getValueType();
2463  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2464    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2465    // vmrs are very slow, e.g. cortex-a8.
2466    return false;
2467
2468  if (isFloatingPointZero(Op)) {
2469    SeenZero = true;
2470    return true;
2471  }
2472  return ISD::isNormalLoad(N);
2473}
2474
2475static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2476  if (isFloatingPointZero(Op))
2477    return DAG.getConstant(0, MVT::i32);
2478
2479  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2480    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2481                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
2482                       Ld->isVolatile(), Ld->isNonTemporal(),
2483                       Ld->getAlignment());
2484
2485  llvm_unreachable("Unknown VFP cmp argument!");
2486}
2487
2488static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2489                           SDValue &RetVal1, SDValue &RetVal2) {
2490  if (isFloatingPointZero(Op)) {
2491    RetVal1 = DAG.getConstant(0, MVT::i32);
2492    RetVal2 = DAG.getConstant(0, MVT::i32);
2493    return;
2494  }
2495
2496  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2497    SDValue Ptr = Ld->getBasePtr();
2498    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2499                          Ld->getChain(), Ptr,
2500                          Ld->getPointerInfo(),
2501                          Ld->isVolatile(), Ld->isNonTemporal(),
2502                          Ld->getAlignment());
2503
2504    EVT PtrType = Ptr.getValueType();
2505    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2506    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2507                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
2508    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2509                          Ld->getChain(), NewPtr,
2510                          Ld->getPointerInfo().getWithOffset(4),
2511                          Ld->isVolatile(), Ld->isNonTemporal(),
2512                          NewAlign);
2513    return;
2514  }
2515
2516  llvm_unreachable("Unknown VFP cmp argument!");
2517}
2518
2519/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2520/// f32 and even f64 comparisons to integer ones.
2521SDValue
2522ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2523  SDValue Chain = Op.getOperand(0);
2524  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2525  SDValue LHS = Op.getOperand(2);
2526  SDValue RHS = Op.getOperand(3);
2527  SDValue Dest = Op.getOperand(4);
2528  DebugLoc dl = Op.getDebugLoc();
2529
2530  bool SeenZero = false;
2531  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
2532      canChangeToInt(RHS, SeenZero, Subtarget) &&
2533      // If one of the operand is zero, it's safe to ignore the NaN case since
2534      // we only care about equality comparisons.
2535      (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
2536    // If unsafe fp math optimization is enabled and there are no othter uses of
2537    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
2538    // to an integer comparison.
2539    if (CC == ISD::SETOEQ)
2540      CC = ISD::SETEQ;
2541    else if (CC == ISD::SETUNE)
2542      CC = ISD::SETNE;
2543
2544    SDValue ARMcc;
2545    if (LHS.getValueType() == MVT::f32) {
2546      LHS = bitcastf32Toi32(LHS, DAG);
2547      RHS = bitcastf32Toi32(RHS, DAG);
2548      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2549      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2550      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2551                         Chain, Dest, ARMcc, CCR, Cmp);
2552    }
2553
2554    SDValue LHS1, LHS2;
2555    SDValue RHS1, RHS2;
2556    expandf64Toi32(LHS, DAG, LHS1, LHS2);
2557    expandf64Toi32(RHS, DAG, RHS1, RHS2);
2558    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2559    ARMcc = DAG.getConstant(CondCode, MVT::i32);
2560    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2561    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
2562    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
2563  }
2564
2565  return SDValue();
2566}
2567
2568SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2569  SDValue Chain = Op.getOperand(0);
2570  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2571  SDValue LHS = Op.getOperand(2);
2572  SDValue RHS = Op.getOperand(3);
2573  SDValue Dest = Op.getOperand(4);
2574  DebugLoc dl = Op.getDebugLoc();
2575
2576  if (LHS.getValueType() == MVT::i32) {
2577    SDValue ARMcc;
2578    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2579    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2580    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2581                       Chain, Dest, ARMcc, CCR, Cmp);
2582  }
2583
2584  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2585
2586  if (UnsafeFPMath &&
2587      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2588       CC == ISD::SETNE || CC == ISD::SETUNE)) {
2589    SDValue Result = OptimizeVFPBrcond(Op, DAG);
2590    if (Result.getNode())
2591      return Result;
2592  }
2593
2594  ARMCC::CondCodes CondCode, CondCode2;
2595  FPCCToARMCC(CC, CondCode, CondCode2);
2596
2597  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2598  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2599  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2600  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2601  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
2602  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2603  if (CondCode2 != ARMCC::AL) {
2604    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
2605    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
2606    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2607  }
2608  return Res;
2609}
2610
2611SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2612  SDValue Chain = Op.getOperand(0);
2613  SDValue Table = Op.getOperand(1);
2614  SDValue Index = Op.getOperand(2);
2615  DebugLoc dl = Op.getDebugLoc();
2616
2617  EVT PTy = getPointerTy();
2618  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2619  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2620  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2621  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2622  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2623  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2624  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2625  if (Subtarget->isThumb2()) {
2626    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2627    // which does another jump to the destination. This also makes it easier
2628    // to translate it to TBB / TBH later.
2629    // FIXME: This might not work if the function is extremely large.
2630    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2631                       Addr, Op.getOperand(2), JTI, UId);
2632  }
2633  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2634    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2635                       MachinePointerInfo::getJumpTable(),
2636                       false, false, 0);
2637    Chain = Addr.getValue(1);
2638    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2639    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2640  } else {
2641    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2642                       MachinePointerInfo::getJumpTable(), false, false, 0);
2643    Chain = Addr.getValue(1);
2644    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2645  }
2646}
2647
2648static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2649  DebugLoc dl = Op.getDebugLoc();
2650  unsigned Opc;
2651
2652  switch (Op.getOpcode()) {
2653  default:
2654    assert(0 && "Invalid opcode!");
2655  case ISD::FP_TO_SINT:
2656    Opc = ARMISD::FTOSI;
2657    break;
2658  case ISD::FP_TO_UINT:
2659    Opc = ARMISD::FTOUI;
2660    break;
2661  }
2662  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2663  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2664}
2665
2666static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2667  EVT VT = Op.getValueType();
2668  DebugLoc dl = Op.getDebugLoc();
2669  unsigned Opc;
2670
2671  switch (Op.getOpcode()) {
2672  default:
2673    assert(0 && "Invalid opcode!");
2674  case ISD::SINT_TO_FP:
2675    Opc = ARMISD::SITOF;
2676    break;
2677  case ISD::UINT_TO_FP:
2678    Opc = ARMISD::UITOF;
2679    break;
2680  }
2681
2682  Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2683  return DAG.getNode(Opc, dl, VT, Op);
2684}
2685
2686SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
2687  // Implement fcopysign with a fabs and a conditional fneg.
2688  SDValue Tmp0 = Op.getOperand(0);
2689  SDValue Tmp1 = Op.getOperand(1);
2690  DebugLoc dl = Op.getDebugLoc();
2691  EVT VT = Op.getValueType();
2692  EVT SrcVT = Tmp1.getValueType();
2693  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2694  SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
2695  SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
2696  SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
2697  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2698  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
2699}
2700
2701SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2702  MachineFunction &MF = DAG.getMachineFunction();
2703  MachineFrameInfo *MFI = MF.getFrameInfo();
2704  MFI->setReturnAddressIsTaken(true);
2705
2706  EVT VT = Op.getValueType();
2707  DebugLoc dl = Op.getDebugLoc();
2708  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2709  if (Depth) {
2710    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2711    SDValue Offset = DAG.getConstant(4, MVT::i32);
2712    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2713                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2714                       MachinePointerInfo(), false, false, 0);
2715  }
2716
2717  // Return LR, which contains the return address. Mark it an implicit live-in.
2718  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
2719  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2720}
2721
2722SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2723  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2724  MFI->setFrameAddressIsTaken(true);
2725
2726  EVT VT = Op.getValueType();
2727  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2728  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2729  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2730    ? ARM::R7 : ARM::R11;
2731  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2732  while (Depth--)
2733    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
2734                            MachinePointerInfo(),
2735                            false, false, 0);
2736  return FrameAddr;
2737}
2738
2739/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2740/// expand a bit convert where either the source or destination type is i64 to
2741/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2742/// operand type is illegal (e.g., v2f32 for a target that doesn't support
2743/// vectors), since the legalizer won't know what to do with that.
2744static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2745  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2746  DebugLoc dl = N->getDebugLoc();
2747  SDValue Op = N->getOperand(0);
2748
2749  // This function is only supposed to be called for i64 types, either as the
2750  // source or destination of the bit convert.
2751  EVT SrcVT = Op.getValueType();
2752  EVT DstVT = N->getValueType(0);
2753  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2754         "ExpandBIT_CONVERT called for non-i64 type");
2755
2756  // Turn i64->f64 into VMOVDRR.
2757  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2758    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2759                             DAG.getConstant(0, MVT::i32));
2760    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2761                             DAG.getConstant(1, MVT::i32));
2762    return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
2763                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
2764  }
2765
2766  // Turn f64->i64 into VMOVRRD.
2767  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2768    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2769                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2770    // Merge the pieces into a single i64 value.
2771    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2772  }
2773
2774  return SDValue();
2775}
2776
2777/// getZeroVector - Returns a vector of specified type with all zero elements.
2778/// Zero vectors are used to represent vector negation and in those cases
2779/// will be implemented with the NEON VNEG instruction.  However, VNEG does
2780/// not support i64 elements, so sometimes the zero vectors will need to be
2781/// explicitly constructed.  Regardless, use a canonical VMOV to create the
2782/// zero vector.
2783static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2784  assert(VT.isVector() && "Expected a vector type");
2785  // The canonical modified immediate encoding of a zero vector is....0!
2786  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
2787  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
2788  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
2789  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
2790}
2791
2792/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2793/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2794SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2795                                                SelectionDAG &DAG) const {
2796  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2797  EVT VT = Op.getValueType();
2798  unsigned VTBits = VT.getSizeInBits();
2799  DebugLoc dl = Op.getDebugLoc();
2800  SDValue ShOpLo = Op.getOperand(0);
2801  SDValue ShOpHi = Op.getOperand(1);
2802  SDValue ShAmt  = Op.getOperand(2);
2803  SDValue ARMcc;
2804  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2805
2806  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2807
2808  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2809                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2810  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2811  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2812                                   DAG.getConstant(VTBits, MVT::i32));
2813  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2814  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2815  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2816
2817  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2818  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2819                          ARMcc, DAG, dl);
2820  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2821  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
2822                           CCR, Cmp);
2823
2824  SDValue Ops[2] = { Lo, Hi };
2825  return DAG.getMergeValues(Ops, 2, dl);
2826}
2827
2828/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2829/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2830SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2831                                               SelectionDAG &DAG) const {
2832  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2833  EVT VT = Op.getValueType();
2834  unsigned VTBits = VT.getSizeInBits();
2835  DebugLoc dl = Op.getDebugLoc();
2836  SDValue ShOpLo = Op.getOperand(0);
2837  SDValue ShOpHi = Op.getOperand(1);
2838  SDValue ShAmt  = Op.getOperand(2);
2839  SDValue ARMcc;
2840
2841  assert(Op.getOpcode() == ISD::SHL_PARTS);
2842  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2843                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2844  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2845  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2846                                   DAG.getConstant(VTBits, MVT::i32));
2847  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2848  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2849
2850  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2851  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2852  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2853                          ARMcc, DAG, dl);
2854  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2855  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
2856                           CCR, Cmp);
2857
2858  SDValue Ops[2] = { Lo, Hi };
2859  return DAG.getMergeValues(Ops, 2, dl);
2860}
2861
2862SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2863                                            SelectionDAG &DAG) const {
2864  // The rounding mode is in bits 23:22 of the FPSCR.
2865  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2866  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2867  // so that the shift + and get folded into a bitfield extract.
2868  DebugLoc dl = Op.getDebugLoc();
2869  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
2870                              DAG.getConstant(Intrinsic::arm_get_fpscr,
2871                                              MVT::i32));
2872  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
2873                                  DAG.getConstant(1U << 22, MVT::i32));
2874  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2875                              DAG.getConstant(22, MVT::i32));
2876  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2877                     DAG.getConstant(3, MVT::i32));
2878}
2879
2880static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2881                         const ARMSubtarget *ST) {
2882  EVT VT = N->getValueType(0);
2883  DebugLoc dl = N->getDebugLoc();
2884
2885  if (!ST->hasV6T2Ops())
2886    return SDValue();
2887
2888  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2889  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2890}
2891
2892static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2893                          const ARMSubtarget *ST) {
2894  EVT VT = N->getValueType(0);
2895  DebugLoc dl = N->getDebugLoc();
2896
2897  // Lower vector shifts on NEON to use VSHL.
2898  if (VT.isVector()) {
2899    assert(ST->hasNEON() && "unexpected vector shift");
2900
2901    // Left shifts translate directly to the vshiftu intrinsic.
2902    if (N->getOpcode() == ISD::SHL)
2903      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2904                         DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2905                         N->getOperand(0), N->getOperand(1));
2906
2907    assert((N->getOpcode() == ISD::SRA ||
2908            N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2909
2910    // NEON uses the same intrinsics for both left and right shifts.  For
2911    // right shifts, the shift amounts are negative, so negate the vector of
2912    // shift amounts.
2913    EVT ShiftVT = N->getOperand(1).getValueType();
2914    SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2915                                       getZeroVector(ShiftVT, DAG, dl),
2916                                       N->getOperand(1));
2917    Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2918                               Intrinsic::arm_neon_vshifts :
2919                               Intrinsic::arm_neon_vshiftu);
2920    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2921                       DAG.getConstant(vshiftInt, MVT::i32),
2922                       N->getOperand(0), NegatedCount);
2923  }
2924
2925  // We can get here for a node like i32 = ISD::SHL i32, i64
2926  if (VT != MVT::i64)
2927    return SDValue();
2928
2929  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2930         "Unknown shift to lower!");
2931
2932  // We only lower SRA, SRL of 1 here, all others use generic lowering.
2933  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2934      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2935    return SDValue();
2936
2937  // If we are in thumb mode, we don't have RRX.
2938  if (ST->isThumb1Only()) return SDValue();
2939
2940  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2941  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2942                           DAG.getConstant(0, MVT::i32));
2943  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2944                           DAG.getConstant(1, MVT::i32));
2945
2946  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2947  // captures the result into a carry flag.
2948  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2949  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2950
2951  // The low part is an ARMISD::RRX operand, which shifts the carry in.
2952  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2953
2954  // Merge the pieces into a single i64 value.
2955 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2956}
2957
2958static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2959  SDValue TmpOp0, TmpOp1;
2960  bool Invert = false;
2961  bool Swap = false;
2962  unsigned Opc = 0;
2963
2964  SDValue Op0 = Op.getOperand(0);
2965  SDValue Op1 = Op.getOperand(1);
2966  SDValue CC = Op.getOperand(2);
2967  EVT VT = Op.getValueType();
2968  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2969  DebugLoc dl = Op.getDebugLoc();
2970
2971  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2972    switch (SetCCOpcode) {
2973    default: llvm_unreachable("Illegal FP comparison"); break;
2974    case ISD::SETUNE:
2975    case ISD::SETNE:  Invert = true; // Fallthrough
2976    case ISD::SETOEQ:
2977    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2978    case ISD::SETOLT:
2979    case ISD::SETLT: Swap = true; // Fallthrough
2980    case ISD::SETOGT:
2981    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2982    case ISD::SETOLE:
2983    case ISD::SETLE:  Swap = true; // Fallthrough
2984    case ISD::SETOGE:
2985    case ISD::SETGE: Opc = ARMISD::VCGE; break;
2986    case ISD::SETUGE: Swap = true; // Fallthrough
2987    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2988    case ISD::SETUGT: Swap = true; // Fallthrough
2989    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2990    case ISD::SETUEQ: Invert = true; // Fallthrough
2991    case ISD::SETONE:
2992      // Expand this to (OLT | OGT).
2993      TmpOp0 = Op0;
2994      TmpOp1 = Op1;
2995      Opc = ISD::OR;
2996      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2997      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2998      break;
2999    case ISD::SETUO: Invert = true; // Fallthrough
3000    case ISD::SETO:
3001      // Expand this to (OLT | OGE).
3002      TmpOp0 = Op0;
3003      TmpOp1 = Op1;
3004      Opc = ISD::OR;
3005      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3006      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
3007      break;
3008    }
3009  } else {
3010    // Integer comparisons.
3011    switch (SetCCOpcode) {
3012    default: llvm_unreachable("Illegal integer comparison"); break;
3013    case ISD::SETNE:  Invert = true;
3014    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3015    case ISD::SETLT:  Swap = true;
3016    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3017    case ISD::SETLE:  Swap = true;
3018    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
3019    case ISD::SETULT: Swap = true;
3020    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
3021    case ISD::SETULE: Swap = true;
3022    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
3023    }
3024
3025    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
3026    if (Opc == ARMISD::VCEQ) {
3027
3028      SDValue AndOp;
3029      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3030        AndOp = Op0;
3031      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
3032        AndOp = Op1;
3033
3034      // Ignore bitconvert.
3035      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
3036        AndOp = AndOp.getOperand(0);
3037
3038      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
3039        Opc = ARMISD::VTST;
3040        Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
3041        Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
3042        Invert = !Invert;
3043      }
3044    }
3045  }
3046
3047  if (Swap)
3048    std::swap(Op0, Op1);
3049
3050  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3051
3052  if (Invert)
3053    Result = DAG.getNOT(dl, Result, VT);
3054
3055  return Result;
3056}
3057
3058/// isNEONModifiedImm - Check if the specified splat value corresponds to a
3059/// valid vector constant for a NEON instruction with a "modified immediate"
3060/// operand (e.g., VMOV).  If so, return the encoded value.
3061static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3062                                 unsigned SplatBitSize, SelectionDAG &DAG,
3063                                 EVT &VT, bool is128Bits, bool isVMOV) {
3064  unsigned OpCmode, Imm;
3065
3066  // SplatBitSize is set to the smallest size that splats the vector, so a
3067  // zero vector will always have SplatBitSize == 8.  However, NEON modified
3068  // immediate instructions others than VMOV do not support the 8-bit encoding
3069  // of a zero vector, and the default encoding of zero is supposed to be the
3070  // 32-bit version.
3071  if (SplatBits == 0)
3072    SplatBitSize = 32;
3073
3074  switch (SplatBitSize) {
3075  case 8:
3076    if (!isVMOV)
3077      return SDValue();
3078    // Any 1-byte value is OK.  Op=0, Cmode=1110.
3079    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3080    OpCmode = 0xe;
3081    Imm = SplatBits;
3082    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3083    break;
3084
3085  case 16:
3086    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3087    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3088    if ((SplatBits & ~0xff) == 0) {
3089      // Value = 0x00nn: Op=x, Cmode=100x.
3090      OpCmode = 0x8;
3091      Imm = SplatBits;
3092      break;
3093    }
3094    if ((SplatBits & ~0xff00) == 0) {
3095      // Value = 0xnn00: Op=x, Cmode=101x.
3096      OpCmode = 0xa;
3097      Imm = SplatBits >> 8;
3098      break;
3099    }
3100    return SDValue();
3101
3102  case 32:
3103    // NEON's 32-bit VMOV supports splat values where:
3104    // * only one byte is nonzero, or
3105    // * the least significant byte is 0xff and the second byte is nonzero, or
3106    // * the least significant 2 bytes are 0xff and the third is nonzero.
3107    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3108    if ((SplatBits & ~0xff) == 0) {
3109      // Value = 0x000000nn: Op=x, Cmode=000x.
3110      OpCmode = 0;
3111      Imm = SplatBits;
3112      break;
3113    }
3114    if ((SplatBits & ~0xff00) == 0) {
3115      // Value = 0x0000nn00: Op=x, Cmode=001x.
3116      OpCmode = 0x2;
3117      Imm = SplatBits >> 8;
3118      break;
3119    }
3120    if ((SplatBits & ~0xff0000) == 0) {
3121      // Value = 0x00nn0000: Op=x, Cmode=010x.
3122      OpCmode = 0x4;
3123      Imm = SplatBits >> 16;
3124      break;
3125    }
3126    if ((SplatBits & ~0xff000000) == 0) {
3127      // Value = 0xnn000000: Op=x, Cmode=011x.
3128      OpCmode = 0x6;
3129      Imm = SplatBits >> 24;
3130      break;
3131    }
3132
3133    if ((SplatBits & ~0xffff) == 0 &&
3134        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3135      // Value = 0x0000nnff: Op=x, Cmode=1100.
3136      OpCmode = 0xc;
3137      Imm = SplatBits >> 8;
3138      SplatBits |= 0xff;
3139      break;
3140    }
3141
3142    if ((SplatBits & ~0xffffff) == 0 &&
3143        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3144      // Value = 0x00nnffff: Op=x, Cmode=1101.
3145      OpCmode = 0xd;
3146      Imm = SplatBits >> 16;
3147      SplatBits |= 0xffff;
3148      break;
3149    }
3150
3151    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3152    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3153    // VMOV.I32.  A (very) minor optimization would be to replicate the value
3154    // and fall through here to test for a valid 64-bit splat.  But, then the
3155    // caller would also need to check and handle the change in size.
3156    return SDValue();
3157
3158  case 64: {
3159    if (!isVMOV)
3160      return SDValue();
3161    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3162    uint64_t BitMask = 0xff;
3163    uint64_t Val = 0;
3164    unsigned ImmMask = 1;
3165    Imm = 0;
3166    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3167      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3168        Val |= BitMask;
3169        Imm |= ImmMask;
3170      } else if ((SplatBits & BitMask) != 0) {
3171        return SDValue();
3172      }
3173      BitMask <<= 8;
3174      ImmMask <<= 1;
3175    }
3176    // Op=1, Cmode=1110.
3177    OpCmode = 0x1e;
3178    SplatBits = Val;
3179    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3180    break;
3181  }
3182
3183  default:
3184    llvm_unreachable("unexpected size for isNEONModifiedImm");
3185    return SDValue();
3186  }
3187
3188  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3189  return DAG.getTargetConstant(EncodedVal, MVT::i32);
3190}
3191
3192static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
3193                       bool &ReverseVEXT, unsigned &Imm) {
3194  unsigned NumElts = VT.getVectorNumElements();
3195  ReverseVEXT = false;
3196
3197  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3198  if (M[0] < 0)
3199    return false;
3200
3201  Imm = M[0];
3202
3203  // If this is a VEXT shuffle, the immediate value is the index of the first
3204  // element.  The other shuffle indices must be the successive elements after
3205  // the first one.
3206  unsigned ExpectedElt = Imm;
3207  for (unsigned i = 1; i < NumElts; ++i) {
3208    // Increment the expected index.  If it wraps around, it may still be
3209    // a VEXT but the source vectors must be swapped.
3210    ExpectedElt += 1;
3211    if (ExpectedElt == NumElts * 2) {
3212      ExpectedElt = 0;
3213      ReverseVEXT = true;
3214    }
3215
3216    if (M[i] < 0) continue; // ignore UNDEF indices
3217    if (ExpectedElt != static_cast<unsigned>(M[i]))
3218      return false;
3219  }
3220
3221  // Adjust the index value if the source operands will be swapped.
3222  if (ReverseVEXT)
3223    Imm -= NumElts;
3224
3225  return true;
3226}
3227
3228/// isVREVMask - Check if a vector shuffle corresponds to a VREV
3229/// instruction with the specified blocksize.  (The order of the elements
3230/// within each block of the vector is reversed.)
3231static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
3232                       unsigned BlockSize) {
3233  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3234         "Only possible block sizes for VREV are: 16, 32, 64");
3235
3236  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3237  if (EltSz == 64)
3238    return false;
3239
3240  unsigned NumElts = VT.getVectorNumElements();
3241  unsigned BlockElts = M[0] + 1;
3242  // If the first shuffle index is UNDEF, be optimistic.
3243  if (M[0] < 0)
3244    BlockElts = BlockSize / EltSz;
3245
3246  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3247    return false;
3248
3249  for (unsigned i = 0; i < NumElts; ++i) {
3250    if (M[i] < 0) continue; // ignore UNDEF indices
3251    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3252      return false;
3253  }
3254
3255  return true;
3256}
3257
3258static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
3259                       unsigned &WhichResult) {
3260  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3261  if (EltSz == 64)
3262    return false;
3263
3264  unsigned NumElts = VT.getVectorNumElements();
3265  WhichResult = (M[0] == 0 ? 0 : 1);
3266  for (unsigned i = 0; i < NumElts; i += 2) {
3267    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3268        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3269      return false;
3270  }
3271  return true;
3272}
3273
3274/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3275/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3276/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3277static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3278                                unsigned &WhichResult) {
3279  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3280  if (EltSz == 64)
3281    return false;
3282
3283  unsigned NumElts = VT.getVectorNumElements();
3284  WhichResult = (M[0] == 0 ? 0 : 1);
3285  for (unsigned i = 0; i < NumElts; i += 2) {
3286    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3287        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
3288      return false;
3289  }
3290  return true;
3291}
3292
3293static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
3294                       unsigned &WhichResult) {
3295  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3296  if (EltSz == 64)
3297    return false;
3298
3299  unsigned NumElts = VT.getVectorNumElements();
3300  WhichResult = (M[0] == 0 ? 0 : 1);
3301  for (unsigned i = 0; i != NumElts; ++i) {
3302    if (M[i] < 0) continue; // ignore UNDEF indices
3303    if ((unsigned) M[i] != 2 * i + WhichResult)
3304      return false;
3305  }
3306
3307  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3308  if (VT.is64BitVector() && EltSz == 32)
3309    return false;
3310
3311  return true;
3312}
3313
3314/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3315/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3316/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3317static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3318                                unsigned &WhichResult) {
3319  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3320  if (EltSz == 64)
3321    return false;
3322
3323  unsigned Half = VT.getVectorNumElements() / 2;
3324  WhichResult = (M[0] == 0 ? 0 : 1);
3325  for (unsigned j = 0; j != 2; ++j) {
3326    unsigned Idx = WhichResult;
3327    for (unsigned i = 0; i != Half; ++i) {
3328      int MIdx = M[i + j * Half];
3329      if (MIdx >= 0 && (unsigned) MIdx != Idx)
3330        return false;
3331      Idx += 2;
3332    }
3333  }
3334
3335  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3336  if (VT.is64BitVector() && EltSz == 32)
3337    return false;
3338
3339  return true;
3340}
3341
3342static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
3343                       unsigned &WhichResult) {
3344  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3345  if (EltSz == 64)
3346    return false;
3347
3348  unsigned NumElts = VT.getVectorNumElements();
3349  WhichResult = (M[0] == 0 ? 0 : 1);
3350  unsigned Idx = WhichResult * NumElts / 2;
3351  for (unsigned i = 0; i != NumElts; i += 2) {
3352    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3353        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
3354      return false;
3355    Idx += 1;
3356  }
3357
3358  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3359  if (VT.is64BitVector() && EltSz == 32)
3360    return false;
3361
3362  return true;
3363}
3364
3365/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3366/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3367/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3368static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3369                                unsigned &WhichResult) {
3370  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3371  if (EltSz == 64)
3372    return false;
3373
3374  unsigned NumElts = VT.getVectorNumElements();
3375  WhichResult = (M[0] == 0 ? 0 : 1);
3376  unsigned Idx = WhichResult * NumElts / 2;
3377  for (unsigned i = 0; i != NumElts; i += 2) {
3378    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3379        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
3380      return false;
3381    Idx += 1;
3382  }
3383
3384  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3385  if (VT.is64BitVector() && EltSz == 32)
3386    return false;
3387
3388  return true;
3389}
3390
3391// If N is an integer constant that can be moved into a register in one
3392// instruction, return an SDValue of such a constant (will become a MOV
3393// instruction).  Otherwise return null.
3394static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
3395                                     const ARMSubtarget *ST, DebugLoc dl) {
3396  uint64_t Val;
3397  if (!isa<ConstantSDNode>(N))
3398    return SDValue();
3399  Val = cast<ConstantSDNode>(N)->getZExtValue();
3400
3401  if (ST->isThumb1Only()) {
3402    if (Val <= 255 || ~Val <= 255)
3403      return DAG.getConstant(Val, MVT::i32);
3404  } else {
3405    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
3406      return DAG.getConstant(Val, MVT::i32);
3407  }
3408  return SDValue();
3409}
3410
3411// If this is a case we can't handle, return null and let the default
3412// expansion code take care of it.
3413static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3414                                 const ARMSubtarget *ST) {
3415  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3416  DebugLoc dl = Op.getDebugLoc();
3417  EVT VT = Op.getValueType();
3418
3419  APInt SplatBits, SplatUndef;
3420  unsigned SplatBitSize;
3421  bool HasAnyUndefs;
3422  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3423    if (SplatBitSize <= 64) {
3424      // Check if an immediate VMOV works.
3425      EVT VmovVT;
3426      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
3427                                      SplatUndef.getZExtValue(), SplatBitSize,
3428                                      DAG, VmovVT, VT.is128BitVector(), true);
3429      if (Val.getNode()) {
3430        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
3431        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
3432      }
3433
3434      // Try an immediate VMVN.
3435      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
3436                             ((1LL << SplatBitSize) - 1));
3437      Val = isNEONModifiedImm(NegatedImm,
3438                                      SplatUndef.getZExtValue(), SplatBitSize,
3439                                      DAG, VmovVT, VT.is128BitVector(), false);
3440      if (Val.getNode()) {
3441        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
3442        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
3443      }
3444    }
3445  }
3446
3447  // Scan through the operands to see if only one value is used.
3448  unsigned NumElts = VT.getVectorNumElements();
3449  bool isOnlyLowElement = true;
3450  bool usesOnlyOneValue = true;
3451  bool isConstant = true;
3452  SDValue Value;
3453  for (unsigned i = 0; i < NumElts; ++i) {
3454    SDValue V = Op.getOperand(i);
3455    if (V.getOpcode() == ISD::UNDEF)
3456      continue;
3457    if (i > 0)
3458      isOnlyLowElement = false;
3459    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3460      isConstant = false;
3461
3462    if (!Value.getNode())
3463      Value = V;
3464    else if (V != Value)
3465      usesOnlyOneValue = false;
3466  }
3467
3468  if (!Value.getNode())
3469    return DAG.getUNDEF(VT);
3470
3471  if (isOnlyLowElement)
3472    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3473
3474  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3475
3476  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
3477  // i32 and try again.
3478  if (usesOnlyOneValue && EltSize <= 32) {
3479    if (!isConstant)
3480      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3481    if (VT.getVectorElementType().isFloatingPoint()) {
3482      SmallVector<SDValue, 8> Ops;
3483      for (unsigned i = 0; i < NumElts; ++i)
3484        Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
3485                                  Op.getOperand(i)));
3486      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
3487                                NumElts);
3488      Val = LowerBUILD_VECTOR(Val, DAG, ST);
3489      if (Val.getNode())
3490        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3491    }
3492    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
3493    if (Val.getNode())
3494      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
3495  }
3496
3497  // If all elements are constants and the case above didn't get hit, fall back
3498  // to the default expansion, which will generate a load from the constant
3499  // pool.
3500  if (isConstant)
3501    return SDValue();
3502
3503  // Vectors with 32- or 64-bit elements can be built by directly assigning
3504  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
3505  // will be legalized.
3506  if (EltSize >= 32) {
3507    // Do the expansion with floating-point types, since that is what the VFP
3508    // registers are defined to use, and since i64 is not legal.
3509    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3510    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3511    SmallVector<SDValue, 8> Ops;
3512    for (unsigned i = 0; i < NumElts; ++i)
3513      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
3514    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3515    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3516  }
3517
3518  return SDValue();
3519}
3520
3521/// isShuffleMaskLegal - Targets can use this to indicate that they only
3522/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3523/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3524/// are assumed to be legal.
3525bool
3526ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
3527                                      EVT VT) const {
3528  if (VT.getVectorNumElements() == 4 &&
3529      (VT.is128BitVector() || VT.is64BitVector())) {
3530    unsigned PFIndexes[4];
3531    for (unsigned i = 0; i != 4; ++i) {
3532      if (M[i] < 0)
3533        PFIndexes[i] = 8;
3534      else
3535        PFIndexes[i] = M[i];
3536    }
3537
3538    // Compute the index in the perfect shuffle table.
3539    unsigned PFTableIndex =
3540      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3541    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3542    unsigned Cost = (PFEntry >> 30);
3543
3544    if (Cost <= 4)
3545      return true;
3546  }
3547
3548  bool ReverseVEXT;
3549  unsigned Imm, WhichResult;
3550
3551  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3552  return (EltSize >= 32 ||
3553          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3554          isVREVMask(M, VT, 64) ||
3555          isVREVMask(M, VT, 32) ||
3556          isVREVMask(M, VT, 16) ||
3557          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
3558          isVTRNMask(M, VT, WhichResult) ||
3559          isVUZPMask(M, VT, WhichResult) ||
3560          isVZIPMask(M, VT, WhichResult) ||
3561          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
3562          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
3563          isVZIP_v_undef_Mask(M, VT, WhichResult));
3564}
3565
3566/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3567/// the specified operations to build the shuffle.
3568static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3569                                      SDValue RHS, SelectionDAG &DAG,
3570                                      DebugLoc dl) {
3571  unsigned OpNum = (PFEntry >> 26) & 0x0F;
3572  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3573  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3574
3575  enum {
3576    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3577    OP_VREV,
3578    OP_VDUP0,
3579    OP_VDUP1,
3580    OP_VDUP2,
3581    OP_VDUP3,
3582    OP_VEXT1,
3583    OP_VEXT2,
3584    OP_VEXT3,
3585    OP_VUZPL, // VUZP, left result
3586    OP_VUZPR, // VUZP, right result
3587    OP_VZIPL, // VZIP, left result
3588    OP_VZIPR, // VZIP, right result
3589    OP_VTRNL, // VTRN, left result
3590    OP_VTRNR  // VTRN, right result
3591  };
3592
3593  if (OpNum == OP_COPY) {
3594    if (LHSID == (1*9+2)*9+3) return LHS;
3595    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3596    return RHS;
3597  }
3598
3599  SDValue OpLHS, OpRHS;
3600  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3601  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3602  EVT VT = OpLHS.getValueType();
3603
3604  switch (OpNum) {
3605  default: llvm_unreachable("Unknown shuffle opcode!");
3606  case OP_VREV:
3607    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
3608  case OP_VDUP0:
3609  case OP_VDUP1:
3610  case OP_VDUP2:
3611  case OP_VDUP3:
3612    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
3613                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3614  case OP_VEXT1:
3615  case OP_VEXT2:
3616  case OP_VEXT3:
3617    return DAG.getNode(ARMISD::VEXT, dl, VT,
3618                       OpLHS, OpRHS,
3619                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3620  case OP_VUZPL:
3621  case OP_VUZPR:
3622    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3623                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3624  case OP_VZIPL:
3625  case OP_VZIPR:
3626    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3627                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3628  case OP_VTRNL:
3629  case OP_VTRNR:
3630    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3631                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3632  }
3633}
3634
3635static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3636  SDValue V1 = Op.getOperand(0);
3637  SDValue V2 = Op.getOperand(1);
3638  DebugLoc dl = Op.getDebugLoc();
3639  EVT VT = Op.getValueType();
3640  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3641  SmallVector<int, 8> ShuffleMask;
3642
3643  // Convert shuffles that are directly supported on NEON to target-specific
3644  // DAG nodes, instead of keeping them as shuffles and matching them again
3645  // during code selection.  This is more efficient and avoids the possibility
3646  // of inconsistencies between legalization and selection.
3647  // FIXME: floating-point vectors should be canonicalized to integer vectors
3648  // of the same time so that they get CSEd properly.
3649  SVN->getMask(ShuffleMask);
3650
3651  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3652  if (EltSize <= 32) {
3653    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3654      int Lane = SVN->getSplatIndex();
3655      // If this is undef splat, generate it via "just" vdup, if possible.
3656      if (Lane == -1) Lane = 0;
3657
3658      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3659        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3660      }
3661      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3662                         DAG.getConstant(Lane, MVT::i32));
3663    }
3664
3665    bool ReverseVEXT;
3666    unsigned Imm;
3667    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3668      if (ReverseVEXT)
3669        std::swap(V1, V2);
3670      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3671                         DAG.getConstant(Imm, MVT::i32));
3672    }
3673
3674    if (isVREVMask(ShuffleMask, VT, 64))
3675      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3676    if (isVREVMask(ShuffleMask, VT, 32))
3677      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3678    if (isVREVMask(ShuffleMask, VT, 16))
3679      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3680
3681    // Check for Neon shuffles that modify both input vectors in place.
3682    // If both results are used, i.e., if there are two shuffles with the same
3683    // source operands and with masks corresponding to both results of one of
3684    // these operations, DAG memoization will ensure that a single node is
3685    // used for both shuffles.
3686    unsigned WhichResult;
3687    if (isVTRNMask(ShuffleMask, VT, WhichResult))
3688      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3689                         V1, V2).getValue(WhichResult);
3690    if (isVUZPMask(ShuffleMask, VT, WhichResult))
3691      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3692                         V1, V2).getValue(WhichResult);
3693    if (isVZIPMask(ShuffleMask, VT, WhichResult))
3694      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3695                         V1, V2).getValue(WhichResult);
3696
3697    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3698      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3699                         V1, V1).getValue(WhichResult);
3700    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3701      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3702                         V1, V1).getValue(WhichResult);
3703    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3704      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3705                         V1, V1).getValue(WhichResult);
3706  }
3707
3708  // If the shuffle is not directly supported and it has 4 elements, use
3709  // the PerfectShuffle-generated table to synthesize it from other shuffles.
3710  unsigned NumElts = VT.getVectorNumElements();
3711  if (NumElts == 4) {
3712    unsigned PFIndexes[4];
3713    for (unsigned i = 0; i != 4; ++i) {
3714      if (ShuffleMask[i] < 0)
3715        PFIndexes[i] = 8;
3716      else
3717        PFIndexes[i] = ShuffleMask[i];
3718    }
3719
3720    // Compute the index in the perfect shuffle table.
3721    unsigned PFTableIndex =
3722      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3723    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3724    unsigned Cost = (PFEntry >> 30);
3725
3726    if (Cost <= 4)
3727      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3728  }
3729
3730  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
3731  if (EltSize >= 32) {
3732    // Do the expansion with floating-point types, since that is what the VFP
3733    // registers are defined to use, and since i64 is not legal.
3734    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3735    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3736    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
3737    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
3738    SmallVector<SDValue, 8> Ops;
3739    for (unsigned i = 0; i < NumElts; ++i) {
3740      if (ShuffleMask[i] < 0)
3741        Ops.push_back(DAG.getUNDEF(EltVT));
3742      else
3743        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3744                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
3745                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3746                                                  MVT::i32)));
3747    }
3748    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3749    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3750  }
3751
3752  return SDValue();
3753}
3754
3755static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3756  EVT VT = Op.getValueType();
3757  DebugLoc dl = Op.getDebugLoc();
3758  SDValue Vec = Op.getOperand(0);
3759  SDValue Lane = Op.getOperand(1);
3760  assert(VT == MVT::i32 &&
3761         Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3762         "unexpected type for custom-lowering vector extract");
3763  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3764}
3765
3766static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3767  // The only time a CONCAT_VECTORS operation can have legal types is when
3768  // two 64-bit vectors are concatenated to a 128-bit vector.
3769  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3770         "unexpected CONCAT_VECTORS");
3771  DebugLoc dl = Op.getDebugLoc();
3772  SDValue Val = DAG.getUNDEF(MVT::v2f64);
3773  SDValue Op0 = Op.getOperand(0);
3774  SDValue Op1 = Op.getOperand(1);
3775  if (Op0.getOpcode() != ISD::UNDEF)
3776    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3777                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3778                      DAG.getIntPtrConstant(0));
3779  if (Op1.getOpcode() != ISD::UNDEF)
3780    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3781                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3782                      DAG.getIntPtrConstant(1));
3783  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3784}
3785
3786/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
3787/// an extending load, return the unextended value.
3788static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
3789  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
3790    return N->getOperand(0);
3791  LoadSDNode *LD = cast<LoadSDNode>(N);
3792  return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
3793                     LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
3794                     LD->isNonTemporal(), LD->getAlignment());
3795}
3796
3797static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
3798  // Multiplications are only custom-lowered for 128-bit vectors so that
3799  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
3800  EVT VT = Op.getValueType();
3801  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
3802  SDNode *N0 = Op.getOperand(0).getNode();
3803  SDNode *N1 = Op.getOperand(1).getNode();
3804  unsigned NewOpc = 0;
3805  if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
3806      (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
3807    NewOpc = ARMISD::VMULLs;
3808  } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
3809             (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
3810    NewOpc = ARMISD::VMULLu;
3811  } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
3812    // Fall through to expand this.  It is not legal.
3813    return SDValue();
3814  } else {
3815    // Other vector multiplications are legal.
3816    return Op;
3817  }
3818
3819  // Legalize to a VMULL instruction.
3820  DebugLoc DL = Op.getDebugLoc();
3821  SDValue Op0 = SkipExtension(N0, DAG);
3822  SDValue Op1 = SkipExtension(N1, DAG);
3823
3824  assert(Op0.getValueType().is64BitVector() &&
3825         Op1.getValueType().is64BitVector() &&
3826         "unexpected types for extended operands to VMULL");
3827  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3828}
3829
3830SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3831  switch (Op.getOpcode()) {
3832  default: llvm_unreachable("Don't know how to custom lower this!");
3833  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
3834  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
3835  case ISD::GlobalAddress:
3836    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3837      LowerGlobalAddressELF(Op, DAG);
3838  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3839  case ISD::SELECT:        return LowerSELECT(Op, DAG);
3840  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
3841  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
3842  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
3843  case ISD::VASTART:       return LowerVASTART(Op, DAG);
3844  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
3845  case ISD::SINT_TO_FP:
3846  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
3847  case ISD::FP_TO_SINT:
3848  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
3849  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
3850  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
3851  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
3852  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3853  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
3854  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
3855  case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
3856  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3857                                                               Subtarget);
3858  case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
3859  case ISD::SHL:
3860  case ISD::SRL:
3861  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
3862  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
3863  case ISD::SRL_PARTS:
3864  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
3865  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3866  case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
3867  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
3868  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3869  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3870  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3871  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
3872  case ISD::MUL:           return LowerMUL(Op, DAG);
3873  }
3874  return SDValue();
3875}
3876
3877/// ReplaceNodeResults - Replace the results of node with an illegal result
3878/// type with new values built out of custom code.
3879void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3880                                           SmallVectorImpl<SDValue>&Results,
3881                                           SelectionDAG &DAG) const {
3882  SDValue Res;
3883  switch (N->getOpcode()) {
3884  default:
3885    llvm_unreachable("Don't know how to custom expand this!");
3886    break;
3887  case ISD::BIT_CONVERT:
3888    Res = ExpandBIT_CONVERT(N, DAG);
3889    break;
3890  case ISD::SRL:
3891  case ISD::SRA:
3892    Res = LowerShift(N, DAG, Subtarget);
3893    break;
3894  }
3895  if (Res.getNode())
3896    Results.push_back(Res);
3897}
3898
3899//===----------------------------------------------------------------------===//
3900//                           ARM Scheduler Hooks
3901//===----------------------------------------------------------------------===//
3902
3903MachineBasicBlock *
3904ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3905                                     MachineBasicBlock *BB,
3906                                     unsigned Size) const {
3907  unsigned dest    = MI->getOperand(0).getReg();
3908  unsigned ptr     = MI->getOperand(1).getReg();
3909  unsigned oldval  = MI->getOperand(2).getReg();
3910  unsigned newval  = MI->getOperand(3).getReg();
3911  unsigned scratch = BB->getParent()->getRegInfo()
3912    .createVirtualRegister(ARM::GPRRegisterClass);
3913  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3914  DebugLoc dl = MI->getDebugLoc();
3915  bool isThumb2 = Subtarget->isThumb2();
3916
3917  unsigned ldrOpc, strOpc;
3918  switch (Size) {
3919  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3920  case 1:
3921    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3922    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3923    break;
3924  case 2:
3925    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3926    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3927    break;
3928  case 4:
3929    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3930    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3931    break;
3932  }
3933
3934  MachineFunction *MF = BB->getParent();
3935  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3936  MachineFunction::iterator It = BB;
3937  ++It; // insert the new blocks after the current block
3938
3939  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3940  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3941  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3942  MF->insert(It, loop1MBB);
3943  MF->insert(It, loop2MBB);
3944  MF->insert(It, exitMBB);
3945
3946  // Transfer the remainder of BB and its successor edges to exitMBB.
3947  exitMBB->splice(exitMBB->begin(), BB,
3948                  llvm::next(MachineBasicBlock::iterator(MI)),
3949                  BB->end());
3950  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
3951
3952  //  thisMBB:
3953  //   ...
3954  //   fallthrough --> loop1MBB
3955  BB->addSuccessor(loop1MBB);
3956
3957  // loop1MBB:
3958  //   ldrex dest, [ptr]
3959  //   cmp dest, oldval
3960  //   bne exitMBB
3961  BB = loop1MBB;
3962  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3963  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3964                 .addReg(dest).addReg(oldval));
3965  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3966    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3967  BB->addSuccessor(loop2MBB);
3968  BB->addSuccessor(exitMBB);
3969
3970  // loop2MBB:
3971  //   strex scratch, newval, [ptr]
3972  //   cmp scratch, #0
3973  //   bne loop1MBB
3974  BB = loop2MBB;
3975  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3976                 .addReg(ptr));
3977  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3978                 .addReg(scratch).addImm(0));
3979  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3980    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3981  BB->addSuccessor(loop1MBB);
3982  BB->addSuccessor(exitMBB);
3983
3984  //  exitMBB:
3985  //   ...
3986  BB = exitMBB;
3987
3988  MI->eraseFromParent();   // The instruction is gone now.
3989
3990  return BB;
3991}
3992
3993MachineBasicBlock *
3994ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3995                                    unsigned Size, unsigned BinOpcode) const {
3996  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3997  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3998
3999  const BasicBlock *LLVM_BB = BB->getBasicBlock();
4000  MachineFunction *MF = BB->getParent();
4001  MachineFunction::iterator It = BB;
4002  ++It;
4003
4004  unsigned dest = MI->getOperand(0).getReg();
4005  unsigned ptr = MI->getOperand(1).getReg();
4006  unsigned incr = MI->getOperand(2).getReg();
4007  DebugLoc dl = MI->getDebugLoc();
4008
4009  bool isThumb2 = Subtarget->isThumb2();
4010  unsigned ldrOpc, strOpc;
4011  switch (Size) {
4012  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
4013  case 1:
4014    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
4015    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
4016    break;
4017  case 2:
4018    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
4019    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
4020    break;
4021  case 4:
4022    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
4023    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
4024    break;
4025  }
4026
4027  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4028  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4029  MF->insert(It, loopMBB);
4030  MF->insert(It, exitMBB);
4031
4032  // Transfer the remainder of BB and its successor edges to exitMBB.
4033  exitMBB->splice(exitMBB->begin(), BB,
4034                  llvm::next(MachineBasicBlock::iterator(MI)),
4035                  BB->end());
4036  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
4037
4038  MachineRegisterInfo &RegInfo = MF->getRegInfo();
4039  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
4040  unsigned scratch2 = (!BinOpcode) ? incr :
4041    RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
4042
4043  //  thisMBB:
4044  //   ...
4045  //   fallthrough --> loopMBB
4046  BB->addSuccessor(loopMBB);
4047
4048  //  loopMBB:
4049  //   ldrex dest, ptr
4050  //   <binop> scratch2, dest, incr
4051  //   strex scratch, scratch2, ptr
4052  //   cmp scratch, #0
4053  //   bne- loopMBB
4054  //   fallthrough --> exitMBB
4055  BB = loopMBB;
4056  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
4057  if (BinOpcode) {
4058    // operand order needs to go the other way for NAND
4059    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
4060      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
4061                     addReg(incr).addReg(dest)).addReg(0);
4062    else
4063      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
4064                     addReg(dest).addReg(incr)).addReg(0);
4065  }
4066
4067  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
4068                 .addReg(ptr));
4069  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4070                 .addReg(scratch).addImm(0));
4071  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4072    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
4073
4074  BB->addSuccessor(loopMBB);
4075  BB->addSuccessor(exitMBB);
4076
4077  //  exitMBB:
4078  //   ...
4079  BB = exitMBB;
4080
4081  MI->eraseFromParent();   // The instruction is gone now.
4082
4083  return BB;
4084}
4085
4086static
4087MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
4088  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
4089       E = MBB->succ_end(); I != E; ++I)
4090    if (*I != Succ)
4091      return *I;
4092  llvm_unreachable("Expecting a BB with two successors!");
4093}
4094
4095MachineBasicBlock *
4096ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4097                                               MachineBasicBlock *BB) const {
4098  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4099  DebugLoc dl = MI->getDebugLoc();
4100  bool isThumb2 = Subtarget->isThumb2();
4101  switch (MI->getOpcode()) {
4102  default:
4103    MI->dump();
4104    llvm_unreachable("Unexpected instr type to insert");
4105
4106  case ARM::ATOMIC_LOAD_ADD_I8:
4107     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4108  case ARM::ATOMIC_LOAD_ADD_I16:
4109     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4110  case ARM::ATOMIC_LOAD_ADD_I32:
4111     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4112
4113  case ARM::ATOMIC_LOAD_AND_I8:
4114     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4115  case ARM::ATOMIC_LOAD_AND_I16:
4116     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4117  case ARM::ATOMIC_LOAD_AND_I32:
4118     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4119
4120  case ARM::ATOMIC_LOAD_OR_I8:
4121     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4122  case ARM::ATOMIC_LOAD_OR_I16:
4123     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4124  case ARM::ATOMIC_LOAD_OR_I32:
4125     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4126
4127  case ARM::ATOMIC_LOAD_XOR_I8:
4128     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4129  case ARM::ATOMIC_LOAD_XOR_I16:
4130     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4131  case ARM::ATOMIC_LOAD_XOR_I32:
4132     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4133
4134  case ARM::ATOMIC_LOAD_NAND_I8:
4135     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4136  case ARM::ATOMIC_LOAD_NAND_I16:
4137     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4138  case ARM::ATOMIC_LOAD_NAND_I32:
4139     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4140
4141  case ARM::ATOMIC_LOAD_SUB_I8:
4142     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4143  case ARM::ATOMIC_LOAD_SUB_I16:
4144     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4145  case ARM::ATOMIC_LOAD_SUB_I32:
4146     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4147
4148  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
4149  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
4150  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
4151
4152  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
4153  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
4154  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
4155
4156  case ARM::tMOVCCr_pseudo: {
4157    // To "insert" a SELECT_CC instruction, we actually have to insert the
4158    // diamond control-flow pattern.  The incoming instruction knows the
4159    // destination vreg to set, the condition code register to branch on, the
4160    // true/false values to select between, and a branch opcode to use.
4161    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4162    MachineFunction::iterator It = BB;
4163    ++It;
4164
4165    //  thisMBB:
4166    //  ...
4167    //   TrueVal = ...
4168    //   cmpTY ccX, r1, r2
4169    //   bCC copy1MBB
4170    //   fallthrough --> copy0MBB
4171    MachineBasicBlock *thisMBB  = BB;
4172    MachineFunction *F = BB->getParent();
4173    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4174    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
4175    F->insert(It, copy0MBB);
4176    F->insert(It, sinkMBB);
4177
4178    // Transfer the remainder of BB and its successor edges to sinkMBB.
4179    sinkMBB->splice(sinkMBB->begin(), BB,
4180                    llvm::next(MachineBasicBlock::iterator(MI)),
4181                    BB->end());
4182    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
4183
4184    BB->addSuccessor(copy0MBB);
4185    BB->addSuccessor(sinkMBB);
4186
4187    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
4188      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
4189
4190    //  copy0MBB:
4191    //   %FalseValue = ...
4192    //   # fallthrough to sinkMBB
4193    BB = copy0MBB;
4194
4195    // Update machine-CFG edges
4196    BB->addSuccessor(sinkMBB);
4197
4198    //  sinkMBB:
4199    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4200    //  ...
4201    BB = sinkMBB;
4202    BuildMI(*BB, BB->begin(), dl,
4203            TII->get(ARM::PHI), MI->getOperand(0).getReg())
4204      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4205      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4206
4207    MI->eraseFromParent();   // The pseudo instruction is gone now.
4208    return BB;
4209  }
4210
4211  case ARM::BCCi64:
4212  case ARM::BCCZi64: {
4213    // Compare both parts that make up the double comparison separately for
4214    // equality.
4215    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
4216
4217    unsigned LHS1 = MI->getOperand(1).getReg();
4218    unsigned LHS2 = MI->getOperand(2).getReg();
4219    if (RHSisZero) {
4220      AddDefaultPred(BuildMI(BB, dl,
4221                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4222                     .addReg(LHS1).addImm(0));
4223      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4224        .addReg(LHS2).addImm(0)
4225        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4226    } else {
4227      unsigned RHS1 = MI->getOperand(3).getReg();
4228      unsigned RHS2 = MI->getOperand(4).getReg();
4229      AddDefaultPred(BuildMI(BB, dl,
4230                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4231                     .addReg(LHS1).addReg(RHS1));
4232      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4233        .addReg(LHS2).addReg(RHS2)
4234        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4235    }
4236
4237    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
4238    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
4239    if (MI->getOperand(0).getImm() == ARMCC::NE)
4240      std::swap(destMBB, exitMBB);
4241
4242    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4243      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
4244    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
4245      .addMBB(exitMBB);
4246
4247    MI->eraseFromParent();   // The pseudo instruction is gone now.
4248    return BB;
4249  }
4250  }
4251}
4252
4253//===----------------------------------------------------------------------===//
4254//                           ARM Optimization Hooks
4255//===----------------------------------------------------------------------===//
4256
4257static
4258SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
4259                            TargetLowering::DAGCombinerInfo &DCI) {
4260  SelectionDAG &DAG = DCI.DAG;
4261  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4262  EVT VT = N->getValueType(0);
4263  unsigned Opc = N->getOpcode();
4264  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
4265  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
4266  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
4267  ISD::CondCode CC = ISD::SETCC_INVALID;
4268
4269  if (isSlctCC) {
4270    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
4271  } else {
4272    SDValue CCOp = Slct.getOperand(0);
4273    if (CCOp.getOpcode() == ISD::SETCC)
4274      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
4275  }
4276
4277  bool DoXform = false;
4278  bool InvCC = false;
4279  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
4280          "Bad input!");
4281
4282  if (LHS.getOpcode() == ISD::Constant &&
4283      cast<ConstantSDNode>(LHS)->isNullValue()) {
4284    DoXform = true;
4285  } else if (CC != ISD::SETCC_INVALID &&
4286             RHS.getOpcode() == ISD::Constant &&
4287             cast<ConstantSDNode>(RHS)->isNullValue()) {
4288    std::swap(LHS, RHS);
4289    SDValue Op0 = Slct.getOperand(0);
4290    EVT OpVT = isSlctCC ? Op0.getValueType() :
4291                          Op0.getOperand(0).getValueType();
4292    bool isInt = OpVT.isInteger();
4293    CC = ISD::getSetCCInverse(CC, isInt);
4294
4295    if (!TLI.isCondCodeLegal(CC, OpVT))
4296      return SDValue();         // Inverse operator isn't legal.
4297
4298    DoXform = true;
4299    InvCC = true;
4300  }
4301
4302  if (DoXform) {
4303    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
4304    if (isSlctCC)
4305      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
4306                             Slct.getOperand(0), Slct.getOperand(1), CC);
4307    SDValue CCOp = Slct.getOperand(0);
4308    if (InvCC)
4309      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
4310                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
4311    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
4312                       CCOp, OtherOp, Result);
4313  }
4314  return SDValue();
4315}
4316
4317/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
4318/// operands N0 and N1.  This is a helper for PerformADDCombine that is
4319/// called with the default operands, and if that fails, with commuted
4320/// operands.
4321static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
4322                                         TargetLowering::DAGCombinerInfo &DCI) {
4323  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
4324  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
4325    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
4326    if (Result.getNode()) return Result;
4327  }
4328  return SDValue();
4329}
4330
4331/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
4332///
4333static SDValue PerformADDCombine(SDNode *N,
4334                                 TargetLowering::DAGCombinerInfo &DCI) {
4335  SDValue N0 = N->getOperand(0);
4336  SDValue N1 = N->getOperand(1);
4337
4338  // First try with the default operand order.
4339  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
4340  if (Result.getNode())
4341    return Result;
4342
4343  // If that didn't work, try again with the operands commuted.
4344  return PerformADDCombineWithOperands(N, N1, N0, DCI);
4345}
4346
4347/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
4348///
4349static SDValue PerformSUBCombine(SDNode *N,
4350                                 TargetLowering::DAGCombinerInfo &DCI) {
4351  SDValue N0 = N->getOperand(0);
4352  SDValue N1 = N->getOperand(1);
4353
4354  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
4355  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
4356    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
4357    if (Result.getNode()) return Result;
4358  }
4359
4360  return SDValue();
4361}
4362
4363static SDValue PerformMULCombine(SDNode *N,
4364                                 TargetLowering::DAGCombinerInfo &DCI,
4365                                 const ARMSubtarget *Subtarget) {
4366  SelectionDAG &DAG = DCI.DAG;
4367
4368  if (Subtarget->isThumb1Only())
4369    return SDValue();
4370
4371  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
4372    return SDValue();
4373
4374  EVT VT = N->getValueType(0);
4375  if (VT != MVT::i32)
4376    return SDValue();
4377
4378  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
4379  if (!C)
4380    return SDValue();
4381
4382  uint64_t MulAmt = C->getZExtValue();
4383  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
4384  ShiftAmt = ShiftAmt & (32 - 1);
4385  SDValue V = N->getOperand(0);
4386  DebugLoc DL = N->getDebugLoc();
4387
4388  SDValue Res;
4389  MulAmt >>= ShiftAmt;
4390  if (isPowerOf2_32(MulAmt - 1)) {
4391    // (mul x, 2^N + 1) => (add (shl x, N), x)
4392    Res = DAG.getNode(ISD::ADD, DL, VT,
4393                      V, DAG.getNode(ISD::SHL, DL, VT,
4394                                     V, DAG.getConstant(Log2_32(MulAmt-1),
4395                                                        MVT::i32)));
4396  } else if (isPowerOf2_32(MulAmt + 1)) {
4397    // (mul x, 2^N - 1) => (sub (shl x, N), x)
4398    Res = DAG.getNode(ISD::SUB, DL, VT,
4399                      DAG.getNode(ISD::SHL, DL, VT,
4400                                  V, DAG.getConstant(Log2_32(MulAmt+1),
4401                                                     MVT::i32)),
4402                                                     V);
4403  } else
4404    return SDValue();
4405
4406  if (ShiftAmt != 0)
4407    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
4408                      DAG.getConstant(ShiftAmt, MVT::i32));
4409
4410  // Do not add new nodes to DAG combiner worklist.
4411  DCI.CombineTo(N, Res, false);
4412  return SDValue();
4413}
4414
4415/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
4416static SDValue PerformORCombine(SDNode *N,
4417                                TargetLowering::DAGCombinerInfo &DCI,
4418                                const ARMSubtarget *Subtarget) {
4419  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
4420  // reasonable.
4421
4422  // BFI is only available on V6T2+
4423  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
4424    return SDValue();
4425
4426  SelectionDAG &DAG = DCI.DAG;
4427  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4428  DebugLoc DL = N->getDebugLoc();
4429  // 1) or (and A, mask), val => ARMbfi A, val, mask
4430  //      iff (val & mask) == val
4431  //
4432  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4433  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
4434  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4435  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
4436  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4437  //  (i.e., copy a bitfield value into another bitfield of the same width)
4438  if (N0.getOpcode() != ISD::AND)
4439    return SDValue();
4440
4441  EVT VT = N->getValueType(0);
4442  if (VT != MVT::i32)
4443    return SDValue();
4444
4445
4446  // The value and the mask need to be constants so we can verify this is
4447  // actually a bitfield set. If the mask is 0xffff, we can do better
4448  // via a movt instruction, so don't use BFI in that case.
4449  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4450  if (!C)
4451    return SDValue();
4452  unsigned Mask = C->getZExtValue();
4453  if (Mask == 0xffff)
4454    return SDValue();
4455  SDValue Res;
4456  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
4457  if ((C = dyn_cast<ConstantSDNode>(N1))) {
4458    unsigned Val = C->getZExtValue();
4459    if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
4460      return SDValue();
4461    Val >>= CountTrailingZeros_32(~Mask);
4462
4463    Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
4464                      DAG.getConstant(Val, MVT::i32),
4465                      DAG.getConstant(Mask, MVT::i32));
4466
4467    // Do not add new nodes to DAG combiner worklist.
4468    DCI.CombineTo(N, Res, false);
4469  } else if (N1.getOpcode() == ISD::AND) {
4470    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4471    C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4472    if (!C)
4473      return SDValue();
4474    unsigned Mask2 = C->getZExtValue();
4475
4476    if (ARM::isBitFieldInvertedMask(Mask) &&
4477        ARM::isBitFieldInvertedMask(~Mask2) &&
4478        (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
4479      // The pack halfword instruction works better for masks that fit it,
4480      // so use that when it's available.
4481      if (Subtarget->hasT2ExtractPack() &&
4482          (Mask == 0xffff || Mask == 0xffff0000))
4483        return SDValue();
4484      // 2a
4485      unsigned lsb = CountTrailingZeros_32(Mask2);
4486      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
4487                        DAG.getConstant(lsb, MVT::i32));
4488      Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
4489                        DAG.getConstant(Mask, MVT::i32));
4490      // Do not add new nodes to DAG combiner worklist.
4491      DCI.CombineTo(N, Res, false);
4492    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
4493               ARM::isBitFieldInvertedMask(Mask2) &&
4494               (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
4495      // The pack halfword instruction works better for masks that fit it,
4496      // so use that when it's available.
4497      if (Subtarget->hasT2ExtractPack() &&
4498          (Mask2 == 0xffff || Mask2 == 0xffff0000))
4499        return SDValue();
4500      // 2b
4501      unsigned lsb = CountTrailingZeros_32(Mask);
4502      Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4503                        DAG.getConstant(lsb, MVT::i32));
4504      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
4505                                DAG.getConstant(Mask2, MVT::i32));
4506      // Do not add new nodes to DAG combiner worklist.
4507      DCI.CombineTo(N, Res, false);
4508    }
4509  }
4510
4511  return SDValue();
4512}
4513
4514/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
4515/// ARMISD::VMOVRRD.
4516static SDValue PerformVMOVRRDCombine(SDNode *N,
4517                                     TargetLowering::DAGCombinerInfo &DCI) {
4518  // vmovrrd(vmovdrr x, y) -> x,y
4519  SDValue InDouble = N->getOperand(0);
4520  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
4521    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
4522  return SDValue();
4523}
4524
4525/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
4526/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
4527static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
4528  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
4529  SDValue Op0 = N->getOperand(0);
4530  SDValue Op1 = N->getOperand(1);
4531  if (Op0.getOpcode() == ISD::BIT_CONVERT)
4532    Op0 = Op0.getOperand(0);
4533  if (Op1.getOpcode() == ISD::BIT_CONVERT)
4534    Op1 = Op1.getOperand(0);
4535  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
4536      Op0.getNode() == Op1.getNode() &&
4537      Op0.getResNo() == 0 && Op1.getResNo() == 1)
4538    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
4539                       N->getValueType(0), Op0.getOperand(0));
4540  return SDValue();
4541}
4542
4543/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
4544/// ISD::BUILD_VECTOR.
4545static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) {
4546  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
4547  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
4548  // into a pair of GPRs, which is fine when the value is used as a scalar,
4549  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
4550  if (N->getNumOperands() == 2)
4551    return PerformVMOVDRRCombine(N, DAG);
4552
4553  return SDValue();
4554}
4555
4556/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
4557/// ISD::VECTOR_SHUFFLE.
4558static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
4559  // The LLVM shufflevector instruction does not require the shuffle mask
4560  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
4561  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
4562  // operands do not match the mask length, they are extended by concatenating
4563  // them with undef vectors.  That is probably the right thing for other
4564  // targets, but for NEON it is better to concatenate two double-register
4565  // size vector operands into a single quad-register size vector.  Do that
4566  // transformation here:
4567  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
4568  //   shuffle(concat(v1, v2), undef)
4569  SDValue Op0 = N->getOperand(0);
4570  SDValue Op1 = N->getOperand(1);
4571  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
4572      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
4573      Op0.getNumOperands() != 2 ||
4574      Op1.getNumOperands() != 2)
4575    return SDValue();
4576  SDValue Concat0Op1 = Op0.getOperand(1);
4577  SDValue Concat1Op1 = Op1.getOperand(1);
4578  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
4579      Concat1Op1.getOpcode() != ISD::UNDEF)
4580    return SDValue();
4581  // Skip the transformation if any of the types are illegal.
4582  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4583  EVT VT = N->getValueType(0);
4584  if (!TLI.isTypeLegal(VT) ||
4585      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
4586      !TLI.isTypeLegal(Concat1Op1.getValueType()))
4587    return SDValue();
4588
4589  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
4590                                  Op0.getOperand(0), Op1.getOperand(0));
4591  // Translate the shuffle mask.
4592  SmallVector<int, 16> NewMask;
4593  unsigned NumElts = VT.getVectorNumElements();
4594  unsigned HalfElts = NumElts/2;
4595  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4596  for (unsigned n = 0; n < NumElts; ++n) {
4597    int MaskElt = SVN->getMaskElt(n);
4598    int NewElt = -1;
4599    if (MaskElt < (int)HalfElts)
4600      NewElt = MaskElt;
4601    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
4602      NewElt = HalfElts + MaskElt - NumElts;
4603    NewMask.push_back(NewElt);
4604  }
4605  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
4606                              DAG.getUNDEF(VT), NewMask.data());
4607}
4608
4609/// PerformVDUPLANECombine - Target-specific dag combine xforms for
4610/// ARMISD::VDUPLANE.
4611static SDValue PerformVDUPLANECombine(SDNode *N, SelectionDAG &DAG) {
4612  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
4613  // redundant.
4614  SDValue Op = N->getOperand(0);
4615  EVT VT = N->getValueType(0);
4616
4617  // Ignore bit_converts.
4618  while (Op.getOpcode() == ISD::BIT_CONVERT)
4619    Op = Op.getOperand(0);
4620  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
4621    return SDValue();
4622
4623  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
4624  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
4625  // The canonical VMOV for a zero vector uses a 32-bit element size.
4626  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4627  unsigned EltBits;
4628  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
4629    EltSize = 8;
4630  if (EltSize > VT.getVectorElementType().getSizeInBits())
4631    return SDValue();
4632
4633  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
4634}
4635
4636/// getVShiftImm - Check if this is a valid build_vector for the immediate
4637/// operand of a vector shift operation, where all the elements of the
4638/// build_vector must have the same constant integer value.
4639static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
4640  // Ignore bit_converts.
4641  while (Op.getOpcode() == ISD::BIT_CONVERT)
4642    Op = Op.getOperand(0);
4643  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4644  APInt SplatBits, SplatUndef;
4645  unsigned SplatBitSize;
4646  bool HasAnyUndefs;
4647  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
4648                                      HasAnyUndefs, ElementBits) ||
4649      SplatBitSize > ElementBits)
4650    return false;
4651  Cnt = SplatBits.getSExtValue();
4652  return true;
4653}
4654
4655/// isVShiftLImm - Check if this is a valid build_vector for the immediate
4656/// operand of a vector shift left operation.  That value must be in the range:
4657///   0 <= Value < ElementBits for a left shift; or
4658///   0 <= Value <= ElementBits for a long left shift.
4659static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
4660  assert(VT.isVector() && "vector shift count is not a vector type");
4661  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4662  if (! getVShiftImm(Op, ElementBits, Cnt))
4663    return false;
4664  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
4665}
4666
4667/// isVShiftRImm - Check if this is a valid build_vector for the immediate
4668/// operand of a vector shift right operation.  For a shift opcode, the value
4669/// is positive, but for an intrinsic the value count must be negative. The
4670/// absolute value must be in the range:
4671///   1 <= |Value| <= ElementBits for a right shift; or
4672///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
4673static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
4674                         int64_t &Cnt) {
4675  assert(VT.isVector() && "vector shift count is not a vector type");
4676  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4677  if (! getVShiftImm(Op, ElementBits, Cnt))
4678    return false;
4679  if (isIntrinsic)
4680    Cnt = -Cnt;
4681  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
4682}
4683
4684/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
4685static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
4686  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4687  switch (IntNo) {
4688  default:
4689    // Don't do anything for most intrinsics.
4690    break;
4691
4692  // Vector shifts: check for immediate versions and lower them.
4693  // Note: This is done during DAG combining instead of DAG legalizing because
4694  // the build_vectors for 64-bit vector element shift counts are generally
4695  // not legal, and it is hard to see their values after they get legalized to
4696  // loads from a constant pool.
4697  case Intrinsic::arm_neon_vshifts:
4698  case Intrinsic::arm_neon_vshiftu:
4699  case Intrinsic::arm_neon_vshiftls:
4700  case Intrinsic::arm_neon_vshiftlu:
4701  case Intrinsic::arm_neon_vshiftn:
4702  case Intrinsic::arm_neon_vrshifts:
4703  case Intrinsic::arm_neon_vrshiftu:
4704  case Intrinsic::arm_neon_vrshiftn:
4705  case Intrinsic::arm_neon_vqshifts:
4706  case Intrinsic::arm_neon_vqshiftu:
4707  case Intrinsic::arm_neon_vqshiftsu:
4708  case Intrinsic::arm_neon_vqshiftns:
4709  case Intrinsic::arm_neon_vqshiftnu:
4710  case Intrinsic::arm_neon_vqshiftnsu:
4711  case Intrinsic::arm_neon_vqrshiftns:
4712  case Intrinsic::arm_neon_vqrshiftnu:
4713  case Intrinsic::arm_neon_vqrshiftnsu: {
4714    EVT VT = N->getOperand(1).getValueType();
4715    int64_t Cnt;
4716    unsigned VShiftOpc = 0;
4717
4718    switch (IntNo) {
4719    case Intrinsic::arm_neon_vshifts:
4720    case Intrinsic::arm_neon_vshiftu:
4721      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
4722        VShiftOpc = ARMISD::VSHL;
4723        break;
4724      }
4725      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
4726        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
4727                     ARMISD::VSHRs : ARMISD::VSHRu);
4728        break;
4729      }
4730      return SDValue();
4731
4732    case Intrinsic::arm_neon_vshiftls:
4733    case Intrinsic::arm_neon_vshiftlu:
4734      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
4735        break;
4736      llvm_unreachable("invalid shift count for vshll intrinsic");
4737
4738    case Intrinsic::arm_neon_vrshifts:
4739    case Intrinsic::arm_neon_vrshiftu:
4740      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
4741        break;
4742      return SDValue();
4743
4744    case Intrinsic::arm_neon_vqshifts:
4745    case Intrinsic::arm_neon_vqshiftu:
4746      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4747        break;
4748      return SDValue();
4749
4750    case Intrinsic::arm_neon_vqshiftsu:
4751      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4752        break;
4753      llvm_unreachable("invalid shift count for vqshlu intrinsic");
4754
4755    case Intrinsic::arm_neon_vshiftn:
4756    case Intrinsic::arm_neon_vrshiftn:
4757    case Intrinsic::arm_neon_vqshiftns:
4758    case Intrinsic::arm_neon_vqshiftnu:
4759    case Intrinsic::arm_neon_vqshiftnsu:
4760    case Intrinsic::arm_neon_vqrshiftns:
4761    case Intrinsic::arm_neon_vqrshiftnu:
4762    case Intrinsic::arm_neon_vqrshiftnsu:
4763      // Narrowing shifts require an immediate right shift.
4764      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
4765        break;
4766      llvm_unreachable("invalid shift count for narrowing vector shift "
4767                       "intrinsic");
4768
4769    default:
4770      llvm_unreachable("unhandled vector shift");
4771    }
4772
4773    switch (IntNo) {
4774    case Intrinsic::arm_neon_vshifts:
4775    case Intrinsic::arm_neon_vshiftu:
4776      // Opcode already set above.
4777      break;
4778    case Intrinsic::arm_neon_vshiftls:
4779    case Intrinsic::arm_neon_vshiftlu:
4780      if (Cnt == VT.getVectorElementType().getSizeInBits())
4781        VShiftOpc = ARMISD::VSHLLi;
4782      else
4783        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
4784                     ARMISD::VSHLLs : ARMISD::VSHLLu);
4785      break;
4786    case Intrinsic::arm_neon_vshiftn:
4787      VShiftOpc = ARMISD::VSHRN; break;
4788    case Intrinsic::arm_neon_vrshifts:
4789      VShiftOpc = ARMISD::VRSHRs; break;
4790    case Intrinsic::arm_neon_vrshiftu:
4791      VShiftOpc = ARMISD::VRSHRu; break;
4792    case Intrinsic::arm_neon_vrshiftn:
4793      VShiftOpc = ARMISD::VRSHRN; break;
4794    case Intrinsic::arm_neon_vqshifts:
4795      VShiftOpc = ARMISD::VQSHLs; break;
4796    case Intrinsic::arm_neon_vqshiftu:
4797      VShiftOpc = ARMISD::VQSHLu; break;
4798    case Intrinsic::arm_neon_vqshiftsu:
4799      VShiftOpc = ARMISD::VQSHLsu; break;
4800    case Intrinsic::arm_neon_vqshiftns:
4801      VShiftOpc = ARMISD::VQSHRNs; break;
4802    case Intrinsic::arm_neon_vqshiftnu:
4803      VShiftOpc = ARMISD::VQSHRNu; break;
4804    case Intrinsic::arm_neon_vqshiftnsu:
4805      VShiftOpc = ARMISD::VQSHRNsu; break;
4806    case Intrinsic::arm_neon_vqrshiftns:
4807      VShiftOpc = ARMISD::VQRSHRNs; break;
4808    case Intrinsic::arm_neon_vqrshiftnu:
4809      VShiftOpc = ARMISD::VQRSHRNu; break;
4810    case Intrinsic::arm_neon_vqrshiftnsu:
4811      VShiftOpc = ARMISD::VQRSHRNsu; break;
4812    }
4813
4814    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4815                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
4816  }
4817
4818  case Intrinsic::arm_neon_vshiftins: {
4819    EVT VT = N->getOperand(1).getValueType();
4820    int64_t Cnt;
4821    unsigned VShiftOpc = 0;
4822
4823    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
4824      VShiftOpc = ARMISD::VSLI;
4825    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
4826      VShiftOpc = ARMISD::VSRI;
4827    else {
4828      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
4829    }
4830
4831    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4832                       N->getOperand(1), N->getOperand(2),
4833                       DAG.getConstant(Cnt, MVT::i32));
4834  }
4835
4836  case Intrinsic::arm_neon_vqrshifts:
4837  case Intrinsic::arm_neon_vqrshiftu:
4838    // No immediate versions of these to check for.
4839    break;
4840  }
4841
4842  return SDValue();
4843}
4844
4845/// PerformShiftCombine - Checks for immediate versions of vector shifts and
4846/// lowers them.  As with the vector shift intrinsics, this is done during DAG
4847/// combining instead of DAG legalizing because the build_vectors for 64-bit
4848/// vector element shift counts are generally not legal, and it is hard to see
4849/// their values after they get legalized to loads from a constant pool.
4850static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
4851                                   const ARMSubtarget *ST) {
4852  EVT VT = N->getValueType(0);
4853
4854  // Nothing to be done for scalar shifts.
4855  if (! VT.isVector())
4856    return SDValue();
4857
4858  assert(ST->hasNEON() && "unexpected vector shift");
4859  int64_t Cnt;
4860
4861  switch (N->getOpcode()) {
4862  default: llvm_unreachable("unexpected shift opcode");
4863
4864  case ISD::SHL:
4865    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
4866      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
4867                         DAG.getConstant(Cnt, MVT::i32));
4868    break;
4869
4870  case ISD::SRA:
4871  case ISD::SRL:
4872    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
4873      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
4874                            ARMISD::VSHRs : ARMISD::VSHRu);
4875      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
4876                         DAG.getConstant(Cnt, MVT::i32));
4877    }
4878  }
4879  return SDValue();
4880}
4881
4882/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
4883/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
4884static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
4885                                    const ARMSubtarget *ST) {
4886  SDValue N0 = N->getOperand(0);
4887
4888  // Check for sign- and zero-extensions of vector extract operations of 8-
4889  // and 16-bit vector elements.  NEON supports these directly.  They are
4890  // handled during DAG combining because type legalization will promote them
4891  // to 32-bit types and it is messy to recognize the operations after that.
4892  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4893    SDValue Vec = N0.getOperand(0);
4894    SDValue Lane = N0.getOperand(1);
4895    EVT VT = N->getValueType(0);
4896    EVT EltVT = N0.getValueType();
4897    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4898
4899    if (VT == MVT::i32 &&
4900        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
4901        TLI.isTypeLegal(Vec.getValueType())) {
4902
4903      unsigned Opc = 0;
4904      switch (N->getOpcode()) {
4905      default: llvm_unreachable("unexpected opcode");
4906      case ISD::SIGN_EXTEND:
4907        Opc = ARMISD::VGETLANEs;
4908        break;
4909      case ISD::ZERO_EXTEND:
4910      case ISD::ANY_EXTEND:
4911        Opc = ARMISD::VGETLANEu;
4912        break;
4913      }
4914      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
4915    }
4916  }
4917
4918  return SDValue();
4919}
4920
4921/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
4922/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
4923static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
4924                                       const ARMSubtarget *ST) {
4925  // If the target supports NEON, try to use vmax/vmin instructions for f32
4926  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
4927  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
4928  // a NaN; only do the transformation when it matches that behavior.
4929
4930  // For now only do this when using NEON for FP operations; if using VFP, it
4931  // is not obvious that the benefit outweighs the cost of switching to the
4932  // NEON pipeline.
4933  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
4934      N->getValueType(0) != MVT::f32)
4935    return SDValue();
4936
4937  SDValue CondLHS = N->getOperand(0);
4938  SDValue CondRHS = N->getOperand(1);
4939  SDValue LHS = N->getOperand(2);
4940  SDValue RHS = N->getOperand(3);
4941  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4942
4943  unsigned Opcode = 0;
4944  bool IsReversed;
4945  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
4946    IsReversed = false; // x CC y ? x : y
4947  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
4948    IsReversed = true ; // x CC y ? y : x
4949  } else {
4950    return SDValue();
4951  }
4952
4953  bool IsUnordered;
4954  switch (CC) {
4955  default: break;
4956  case ISD::SETOLT:
4957  case ISD::SETOLE:
4958  case ISD::SETLT:
4959  case ISD::SETLE:
4960  case ISD::SETULT:
4961  case ISD::SETULE:
4962    // If LHS is NaN, an ordered comparison will be false and the result will
4963    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
4964    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4965    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4966    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4967      break;
4968    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4969    // will return -0, so vmin can only be used for unsafe math or if one of
4970    // the operands is known to be nonzero.
4971    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4972        !UnsafeFPMath &&
4973        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4974      break;
4975    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4976    break;
4977
4978  case ISD::SETOGT:
4979  case ISD::SETOGE:
4980  case ISD::SETGT:
4981  case ISD::SETGE:
4982  case ISD::SETUGT:
4983  case ISD::SETUGE:
4984    // If LHS is NaN, an ordered comparison will be false and the result will
4985    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
4986    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4987    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4988    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4989      break;
4990    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4991    // will return +0, so vmax can only be used for unsafe math or if one of
4992    // the operands is known to be nonzero.
4993    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4994        !UnsafeFPMath &&
4995        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4996      break;
4997    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4998    break;
4999  }
5000
5001  if (!Opcode)
5002    return SDValue();
5003  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
5004}
5005
5006SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
5007                                             DAGCombinerInfo &DCI) const {
5008  switch (N->getOpcode()) {
5009  default: break;
5010  case ISD::ADD:        return PerformADDCombine(N, DCI);
5011  case ISD::SUB:        return PerformSUBCombine(N, DCI);
5012  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
5013  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
5014  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
5015  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
5016  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
5017  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
5018  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI.DAG);
5019  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
5020  case ISD::SHL:
5021  case ISD::SRA:
5022  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
5023  case ISD::SIGN_EXTEND:
5024  case ISD::ZERO_EXTEND:
5025  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
5026  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
5027  }
5028  return SDValue();
5029}
5030
5031bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
5032  if (!Subtarget->allowsUnalignedMem())
5033    return false;
5034
5035  switch (VT.getSimpleVT().SimpleTy) {
5036  default:
5037    return false;
5038  case MVT::i8:
5039  case MVT::i16:
5040  case MVT::i32:
5041    return true;
5042  // FIXME: VLD1 etc with standard alignment is legal.
5043  }
5044}
5045
5046static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
5047  if (V < 0)
5048    return false;
5049
5050  unsigned Scale = 1;
5051  switch (VT.getSimpleVT().SimpleTy) {
5052  default: return false;
5053  case MVT::i1:
5054  case MVT::i8:
5055    // Scale == 1;
5056    break;
5057  case MVT::i16:
5058    // Scale == 2;
5059    Scale = 2;
5060    break;
5061  case MVT::i32:
5062    // Scale == 4;
5063    Scale = 4;
5064    break;
5065  }
5066
5067  if ((V & (Scale - 1)) != 0)
5068    return false;
5069  V /= Scale;
5070  return V == (V & ((1LL << 5) - 1));
5071}
5072
5073static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
5074                                      const ARMSubtarget *Subtarget) {
5075  bool isNeg = false;
5076  if (V < 0) {
5077    isNeg = true;
5078    V = - V;
5079  }
5080
5081  switch (VT.getSimpleVT().SimpleTy) {
5082  default: return false;
5083  case MVT::i1:
5084  case MVT::i8:
5085  case MVT::i16:
5086  case MVT::i32:
5087    // + imm12 or - imm8
5088    if (isNeg)
5089      return V == (V & ((1LL << 8) - 1));
5090    return V == (V & ((1LL << 12) - 1));
5091  case MVT::f32:
5092  case MVT::f64:
5093    // Same as ARM mode. FIXME: NEON?
5094    if (!Subtarget->hasVFP2())
5095      return false;
5096    if ((V & 3) != 0)
5097      return false;
5098    V >>= 2;
5099    return V == (V & ((1LL << 8) - 1));
5100  }
5101}
5102
5103/// isLegalAddressImmediate - Return true if the integer value can be used
5104/// as the offset of the target addressing mode for load / store of the
5105/// given type.
5106static bool isLegalAddressImmediate(int64_t V, EVT VT,
5107                                    const ARMSubtarget *Subtarget) {
5108  if (V == 0)
5109    return true;
5110
5111  if (!VT.isSimple())
5112    return false;
5113
5114  if (Subtarget->isThumb1Only())
5115    return isLegalT1AddressImmediate(V, VT);
5116  else if (Subtarget->isThumb2())
5117    return isLegalT2AddressImmediate(V, VT, Subtarget);
5118
5119  // ARM mode.
5120  if (V < 0)
5121    V = - V;
5122  switch (VT.getSimpleVT().SimpleTy) {
5123  default: return false;
5124  case MVT::i1:
5125  case MVT::i8:
5126  case MVT::i32:
5127    // +- imm12
5128    return V == (V & ((1LL << 12) - 1));
5129  case MVT::i16:
5130    // +- imm8
5131    return V == (V & ((1LL << 8) - 1));
5132  case MVT::f32:
5133  case MVT::f64:
5134    if (!Subtarget->hasVFP2()) // FIXME: NEON?
5135      return false;
5136    if ((V & 3) != 0)
5137      return false;
5138    V >>= 2;
5139    return V == (V & ((1LL << 8) - 1));
5140  }
5141}
5142
5143bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
5144                                                      EVT VT) const {
5145  int Scale = AM.Scale;
5146  if (Scale < 0)
5147    return false;
5148
5149  switch (VT.getSimpleVT().SimpleTy) {
5150  default: return false;
5151  case MVT::i1:
5152  case MVT::i8:
5153  case MVT::i16:
5154  case MVT::i32:
5155    if (Scale == 1)
5156      return true;
5157    // r + r << imm
5158    Scale = Scale & ~1;
5159    return Scale == 2 || Scale == 4 || Scale == 8;
5160  case MVT::i64:
5161    // r + r
5162    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5163      return true;
5164    return false;
5165  case MVT::isVoid:
5166    // Note, we allow "void" uses (basically, uses that aren't loads or
5167    // stores), because arm allows folding a scale into many arithmetic
5168    // operations.  This should be made more precise and revisited later.
5169
5170    // Allow r << imm, but the imm has to be a multiple of two.
5171    if (Scale & 1) return false;
5172    return isPowerOf2_32(Scale);
5173  }
5174}
5175
5176/// isLegalAddressingMode - Return true if the addressing mode represented
5177/// by AM is legal for this target, for a load/store of the specified type.
5178bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
5179                                              const Type *Ty) const {
5180  EVT VT = getValueType(Ty, true);
5181  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
5182    return false;
5183
5184  // Can never fold addr of global into load/store.
5185  if (AM.BaseGV)
5186    return false;
5187
5188  switch (AM.Scale) {
5189  case 0:  // no scale reg, must be "r+i" or "r", or "i".
5190    break;
5191  case 1:
5192    if (Subtarget->isThumb1Only())
5193      return false;
5194    // FALL THROUGH.
5195  default:
5196    // ARM doesn't support any R+R*scale+imm addr modes.
5197    if (AM.BaseOffs)
5198      return false;
5199
5200    if (!VT.isSimple())
5201      return false;
5202
5203    if (Subtarget->isThumb2())
5204      return isLegalT2ScaledAddressingMode(AM, VT);
5205
5206    int Scale = AM.Scale;
5207    switch (VT.getSimpleVT().SimpleTy) {
5208    default: return false;
5209    case MVT::i1:
5210    case MVT::i8:
5211    case MVT::i32:
5212      if (Scale < 0) Scale = -Scale;
5213      if (Scale == 1)
5214        return true;
5215      // r + r << imm
5216      return isPowerOf2_32(Scale & ~1);
5217    case MVT::i16:
5218    case MVT::i64:
5219      // r + r
5220      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5221        return true;
5222      return false;
5223
5224    case MVT::isVoid:
5225      // Note, we allow "void" uses (basically, uses that aren't loads or
5226      // stores), because arm allows folding a scale into many arithmetic
5227      // operations.  This should be made more precise and revisited later.
5228
5229      // Allow r << imm, but the imm has to be a multiple of two.
5230      if (Scale & 1) return false;
5231      return isPowerOf2_32(Scale);
5232    }
5233    break;
5234  }
5235  return true;
5236}
5237
5238/// isLegalICmpImmediate - Return true if the specified immediate is legal
5239/// icmp immediate, that is the target has icmp instructions which can compare
5240/// a register against the immediate without having to materialize the
5241/// immediate into a register.
5242bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
5243  if (!Subtarget->isThumb())
5244    return ARM_AM::getSOImmVal(Imm) != -1;
5245  if (Subtarget->isThumb2())
5246    return ARM_AM::getT2SOImmVal(Imm) != -1;
5247  return Imm >= 0 && Imm <= 255;
5248}
5249
5250static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
5251                                      bool isSEXTLoad, SDValue &Base,
5252                                      SDValue &Offset, bool &isInc,
5253                                      SelectionDAG &DAG) {
5254  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5255    return false;
5256
5257  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
5258    // AddressingMode 3
5259    Base = Ptr->getOperand(0);
5260    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5261      int RHSC = (int)RHS->getZExtValue();
5262      if (RHSC < 0 && RHSC > -256) {
5263        assert(Ptr->getOpcode() == ISD::ADD);
5264        isInc = false;
5265        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5266        return true;
5267      }
5268    }
5269    isInc = (Ptr->getOpcode() == ISD::ADD);
5270    Offset = Ptr->getOperand(1);
5271    return true;
5272  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
5273    // AddressingMode 2
5274    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5275      int RHSC = (int)RHS->getZExtValue();
5276      if (RHSC < 0 && RHSC > -0x1000) {
5277        assert(Ptr->getOpcode() == ISD::ADD);
5278        isInc = false;
5279        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5280        Base = Ptr->getOperand(0);
5281        return true;
5282      }
5283    }
5284
5285    if (Ptr->getOpcode() == ISD::ADD) {
5286      isInc = true;
5287      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
5288      if (ShOpcVal != ARM_AM::no_shift) {
5289        Base = Ptr->getOperand(1);
5290        Offset = Ptr->getOperand(0);
5291      } else {
5292        Base = Ptr->getOperand(0);
5293        Offset = Ptr->getOperand(1);
5294      }
5295      return true;
5296    }
5297
5298    isInc = (Ptr->getOpcode() == ISD::ADD);
5299    Base = Ptr->getOperand(0);
5300    Offset = Ptr->getOperand(1);
5301    return true;
5302  }
5303
5304  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
5305  return false;
5306}
5307
5308static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
5309                                     bool isSEXTLoad, SDValue &Base,
5310                                     SDValue &Offset, bool &isInc,
5311                                     SelectionDAG &DAG) {
5312  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5313    return false;
5314
5315  Base = Ptr->getOperand(0);
5316  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5317    int RHSC = (int)RHS->getZExtValue();
5318    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
5319      assert(Ptr->getOpcode() == ISD::ADD);
5320      isInc = false;
5321      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5322      return true;
5323    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
5324      isInc = Ptr->getOpcode() == ISD::ADD;
5325      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
5326      return true;
5327    }
5328  }
5329
5330  return false;
5331}
5332
5333/// getPreIndexedAddressParts - returns true by value, base pointer and
5334/// offset pointer and addressing mode by reference if the node's address
5335/// can be legally represented as pre-indexed load / store address.
5336bool
5337ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
5338                                             SDValue &Offset,
5339                                             ISD::MemIndexedMode &AM,
5340                                             SelectionDAG &DAG) const {
5341  if (Subtarget->isThumb1Only())
5342    return false;
5343
5344  EVT VT;
5345  SDValue Ptr;
5346  bool isSEXTLoad = false;
5347  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5348    Ptr = LD->getBasePtr();
5349    VT  = LD->getMemoryVT();
5350    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5351  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5352    Ptr = ST->getBasePtr();
5353    VT  = ST->getMemoryVT();
5354  } else
5355    return false;
5356
5357  bool isInc;
5358  bool isLegal = false;
5359  if (Subtarget->isThumb2())
5360    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5361                                       Offset, isInc, DAG);
5362  else
5363    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5364                                        Offset, isInc, DAG);
5365  if (!isLegal)
5366    return false;
5367
5368  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
5369  return true;
5370}
5371
5372/// getPostIndexedAddressParts - returns true by value, base pointer and
5373/// offset pointer and addressing mode by reference if this node can be
5374/// combined with a load / store to form a post-indexed load / store.
5375bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
5376                                                   SDValue &Base,
5377                                                   SDValue &Offset,
5378                                                   ISD::MemIndexedMode &AM,
5379                                                   SelectionDAG &DAG) const {
5380  if (Subtarget->isThumb1Only())
5381    return false;
5382
5383  EVT VT;
5384  SDValue Ptr;
5385  bool isSEXTLoad = false;
5386  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5387    VT  = LD->getMemoryVT();
5388    Ptr = LD->getBasePtr();
5389    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5390  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5391    VT  = ST->getMemoryVT();
5392    Ptr = ST->getBasePtr();
5393  } else
5394    return false;
5395
5396  bool isInc;
5397  bool isLegal = false;
5398  if (Subtarget->isThumb2())
5399    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5400                                       isInc, DAG);
5401  else
5402    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5403                                        isInc, DAG);
5404  if (!isLegal)
5405    return false;
5406
5407  if (Ptr != Base) {
5408    // Swap base ptr and offset to catch more post-index load / store when
5409    // it's legal. In Thumb2 mode, offset must be an immediate.
5410    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
5411        !Subtarget->isThumb2())
5412      std::swap(Base, Offset);
5413
5414    // Post-indexed load / store update the base pointer.
5415    if (Ptr != Base)
5416      return false;
5417  }
5418
5419  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
5420  return true;
5421}
5422
5423void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
5424                                                       const APInt &Mask,
5425                                                       APInt &KnownZero,
5426                                                       APInt &KnownOne,
5427                                                       const SelectionDAG &DAG,
5428                                                       unsigned Depth) const {
5429  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
5430  switch (Op.getOpcode()) {
5431  default: break;
5432  case ARMISD::CMOV: {
5433    // Bits are known zero/one if known on the LHS and RHS.
5434    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
5435    if (KnownZero == 0 && KnownOne == 0) return;
5436
5437    APInt KnownZeroRHS, KnownOneRHS;
5438    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
5439                          KnownZeroRHS, KnownOneRHS, Depth+1);
5440    KnownZero &= KnownZeroRHS;
5441    KnownOne  &= KnownOneRHS;
5442    return;
5443  }
5444  }
5445}
5446
5447//===----------------------------------------------------------------------===//
5448//                           ARM Inline Assembly Support
5449//===----------------------------------------------------------------------===//
5450
5451/// getConstraintType - Given a constraint letter, return the type of
5452/// constraint it is for this target.
5453ARMTargetLowering::ConstraintType
5454ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
5455  if (Constraint.size() == 1) {
5456    switch (Constraint[0]) {
5457    default:  break;
5458    case 'l': return C_RegisterClass;
5459    case 'w': return C_RegisterClass;
5460    }
5461  }
5462  return TargetLowering::getConstraintType(Constraint);
5463}
5464
5465/// Examine constraint type and operand type and determine a weight value.
5466/// This object must already have been set up with the operand type
5467/// and the current alternative constraint selected.
5468TargetLowering::ConstraintWeight
5469ARMTargetLowering::getSingleConstraintMatchWeight(
5470    AsmOperandInfo &info, const char *constraint) const {
5471  ConstraintWeight weight = CW_Invalid;
5472  Value *CallOperandVal = info.CallOperandVal;
5473    // If we don't have a value, we can't do a match,
5474    // but allow it at the lowest weight.
5475  if (CallOperandVal == NULL)
5476    return CW_Default;
5477  const Type *type = CallOperandVal->getType();
5478  // Look at the constraint type.
5479  switch (*constraint) {
5480  default:
5481    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5482    break;
5483  case 'l':
5484    if (type->isIntegerTy()) {
5485      if (Subtarget->isThumb())
5486        weight = CW_SpecificReg;
5487      else
5488        weight = CW_Register;
5489    }
5490    break;
5491  case 'w':
5492    if (type->isFloatingPointTy())
5493      weight = CW_Register;
5494    break;
5495  }
5496  return weight;
5497}
5498
5499std::pair<unsigned, const TargetRegisterClass*>
5500ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5501                                                EVT VT) const {
5502  if (Constraint.size() == 1) {
5503    // GCC ARM Constraint Letters
5504    switch (Constraint[0]) {
5505    case 'l':
5506      if (Subtarget->isThumb())
5507        return std::make_pair(0U, ARM::tGPRRegisterClass);
5508      else
5509        return std::make_pair(0U, ARM::GPRRegisterClass);
5510    case 'r':
5511      return std::make_pair(0U, ARM::GPRRegisterClass);
5512    case 'w':
5513      if (VT == MVT::f32)
5514        return std::make_pair(0U, ARM::SPRRegisterClass);
5515      if (VT.getSizeInBits() == 64)
5516        return std::make_pair(0U, ARM::DPRRegisterClass);
5517      if (VT.getSizeInBits() == 128)
5518        return std::make_pair(0U, ARM::QPRRegisterClass);
5519      break;
5520    }
5521  }
5522  if (StringRef("{cc}").equals_lower(Constraint))
5523    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
5524
5525  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5526}
5527
5528std::vector<unsigned> ARMTargetLowering::
5529getRegClassForInlineAsmConstraint(const std::string &Constraint,
5530                                  EVT VT) const {
5531  if (Constraint.size() != 1)
5532    return std::vector<unsigned>();
5533
5534  switch (Constraint[0]) {      // GCC ARM Constraint Letters
5535  default: break;
5536  case 'l':
5537    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5538                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5539                                 0);
5540  case 'r':
5541    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5542                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5543                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
5544                                 ARM::R12, ARM::LR, 0);
5545  case 'w':
5546    if (VT == MVT::f32)
5547      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
5548                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
5549                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
5550                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
5551                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
5552                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
5553                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
5554                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
5555    if (VT.getSizeInBits() == 64)
5556      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
5557                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
5558                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
5559                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
5560    if (VT.getSizeInBits() == 128)
5561      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
5562                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
5563      break;
5564  }
5565
5566  return std::vector<unsigned>();
5567}
5568
5569/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5570/// vector.  If it is invalid, don't add anything to Ops.
5571void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5572                                                     char Constraint,
5573                                                     std::vector<SDValue>&Ops,
5574                                                     SelectionDAG &DAG) const {
5575  SDValue Result(0, 0);
5576
5577  switch (Constraint) {
5578  default: break;
5579  case 'I': case 'J': case 'K': case 'L':
5580  case 'M': case 'N': case 'O':
5581    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5582    if (!C)
5583      return;
5584
5585    int64_t CVal64 = C->getSExtValue();
5586    int CVal = (int) CVal64;
5587    // None of these constraints allow values larger than 32 bits.  Check
5588    // that the value fits in an int.
5589    if (CVal != CVal64)
5590      return;
5591
5592    switch (Constraint) {
5593      case 'I':
5594        if (Subtarget->isThumb1Only()) {
5595          // This must be a constant between 0 and 255, for ADD
5596          // immediates.
5597          if (CVal >= 0 && CVal <= 255)
5598            break;
5599        } else if (Subtarget->isThumb2()) {
5600          // A constant that can be used as an immediate value in a
5601          // data-processing instruction.
5602          if (ARM_AM::getT2SOImmVal(CVal) != -1)
5603            break;
5604        } else {
5605          // A constant that can be used as an immediate value in a
5606          // data-processing instruction.
5607          if (ARM_AM::getSOImmVal(CVal) != -1)
5608            break;
5609        }
5610        return;
5611
5612      case 'J':
5613        if (Subtarget->isThumb()) {  // FIXME thumb2
5614          // This must be a constant between -255 and -1, for negated ADD
5615          // immediates. This can be used in GCC with an "n" modifier that
5616          // prints the negated value, for use with SUB instructions. It is
5617          // not useful otherwise but is implemented for compatibility.
5618          if (CVal >= -255 && CVal <= -1)
5619            break;
5620        } else {
5621          // This must be a constant between -4095 and 4095. It is not clear
5622          // what this constraint is intended for. Implemented for
5623          // compatibility with GCC.
5624          if (CVal >= -4095 && CVal <= 4095)
5625            break;
5626        }
5627        return;
5628
5629      case 'K':
5630        if (Subtarget->isThumb1Only()) {
5631          // A 32-bit value where only one byte has a nonzero value. Exclude
5632          // zero to match GCC. This constraint is used by GCC internally for
5633          // constants that can be loaded with a move/shift combination.
5634          // It is not useful otherwise but is implemented for compatibility.
5635          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
5636            break;
5637        } else if (Subtarget->isThumb2()) {
5638          // A constant whose bitwise inverse can be used as an immediate
5639          // value in a data-processing instruction. This can be used in GCC
5640          // with a "B" modifier that prints the inverted value, for use with
5641          // BIC and MVN instructions. It is not useful otherwise but is
5642          // implemented for compatibility.
5643          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
5644            break;
5645        } else {
5646          // A constant whose bitwise inverse can be used as an immediate
5647          // value in a data-processing instruction. This can be used in GCC
5648          // with a "B" modifier that prints the inverted value, for use with
5649          // BIC and MVN instructions. It is not useful otherwise but is
5650          // implemented for compatibility.
5651          if (ARM_AM::getSOImmVal(~CVal) != -1)
5652            break;
5653        }
5654        return;
5655
5656      case 'L':
5657        if (Subtarget->isThumb1Only()) {
5658          // This must be a constant between -7 and 7,
5659          // for 3-operand ADD/SUB immediate instructions.
5660          if (CVal >= -7 && CVal < 7)
5661            break;
5662        } else if (Subtarget->isThumb2()) {
5663          // A constant whose negation can be used as an immediate value in a
5664          // data-processing instruction. This can be used in GCC with an "n"
5665          // modifier that prints the negated value, for use with SUB
5666          // instructions. It is not useful otherwise but is implemented for
5667          // compatibility.
5668          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
5669            break;
5670        } else {
5671          // A constant whose negation can be used as an immediate value in a
5672          // data-processing instruction. This can be used in GCC with an "n"
5673          // modifier that prints the negated value, for use with SUB
5674          // instructions. It is not useful otherwise but is implemented for
5675          // compatibility.
5676          if (ARM_AM::getSOImmVal(-CVal) != -1)
5677            break;
5678        }
5679        return;
5680
5681      case 'M':
5682        if (Subtarget->isThumb()) { // FIXME thumb2
5683          // This must be a multiple of 4 between 0 and 1020, for
5684          // ADD sp + immediate.
5685          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
5686            break;
5687        } else {
5688          // A power of two or a constant between 0 and 32.  This is used in
5689          // GCC for the shift amount on shifted register operands, but it is
5690          // useful in general for any shift amounts.
5691          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
5692            break;
5693        }
5694        return;
5695
5696      case 'N':
5697        if (Subtarget->isThumb()) {  // FIXME thumb2
5698          // This must be a constant between 0 and 31, for shift amounts.
5699          if (CVal >= 0 && CVal <= 31)
5700            break;
5701        }
5702        return;
5703
5704      case 'O':
5705        if (Subtarget->isThumb()) {  // FIXME thumb2
5706          // This must be a multiple of 4 between -508 and 508, for
5707          // ADD/SUB sp = sp + immediate.
5708          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
5709            break;
5710        }
5711        return;
5712    }
5713    Result = DAG.getTargetConstant(CVal, Op.getValueType());
5714    break;
5715  }
5716
5717  if (Result.getNode()) {
5718    Ops.push_back(Result);
5719    return;
5720  }
5721  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5722}
5723
5724bool
5725ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5726  // The ARM target isn't yet aware of offsets.
5727  return false;
5728}
5729
5730int ARM::getVFPf32Imm(const APFloat &FPImm) {
5731  APInt Imm = FPImm.bitcastToAPInt();
5732  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
5733  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
5734  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
5735
5736  // We can handle 4 bits of mantissa.
5737  // mantissa = (16+UInt(e:f:g:h))/16.
5738  if (Mantissa & 0x7ffff)
5739    return -1;
5740  Mantissa >>= 19;
5741  if ((Mantissa & 0xf) != Mantissa)
5742    return -1;
5743
5744  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5745  if (Exp < -3 || Exp > 4)
5746    return -1;
5747  Exp = ((Exp+3) & 0x7) ^ 4;
5748
5749  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5750}
5751
5752int ARM::getVFPf64Imm(const APFloat &FPImm) {
5753  APInt Imm = FPImm.bitcastToAPInt();
5754  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
5755  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
5756  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
5757
5758  // We can handle 4 bits of mantissa.
5759  // mantissa = (16+UInt(e:f:g:h))/16.
5760  if (Mantissa & 0xffffffffffffLL)
5761    return -1;
5762  Mantissa >>= 48;
5763  if ((Mantissa & 0xf) != Mantissa)
5764    return -1;
5765
5766  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5767  if (Exp < -3 || Exp > 4)
5768    return -1;
5769  Exp = ((Exp+3) & 0x7) ^ 4;
5770
5771  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5772}
5773
5774bool ARM::isBitFieldInvertedMask(unsigned v) {
5775  if (v == 0xffffffff)
5776    return 0;
5777  // there can be 1's on either or both "outsides", all the "inside"
5778  // bits must be 0's
5779  unsigned int lsb = 0, msb = 31;
5780  while (v & (1 << msb)) --msb;
5781  while (v & (1 << lsb)) ++lsb;
5782  for (unsigned int i = lsb; i <= msb; ++i) {
5783    if (v & (1 << i))
5784      return 0;
5785  }
5786  return 1;
5787}
5788
5789/// isFPImmLegal - Returns true if the target can instruction select the
5790/// specified FP immediate natively. If false, the legalizer will
5791/// materialize the FP immediate as a load from a constant pool.
5792bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5793  if (!Subtarget->hasVFP3())
5794    return false;
5795  if (VT == MVT::f32)
5796    return ARM::getVFPf32Imm(Imm) != -1;
5797  if (VT == MVT::f64)
5798    return ARM::getVFPf64Imm(Imm) != -1;
5799  return false;
5800}
5801
5802/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
5803/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
5804/// specified in the intrinsic calls.
5805bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5806                                           const CallInst &I,
5807                                           unsigned Intrinsic) const {
5808  switch (Intrinsic) {
5809  case Intrinsic::arm_neon_vld1:
5810  case Intrinsic::arm_neon_vld2:
5811  case Intrinsic::arm_neon_vld3:
5812  case Intrinsic::arm_neon_vld4:
5813  case Intrinsic::arm_neon_vld2lane:
5814  case Intrinsic::arm_neon_vld3lane:
5815  case Intrinsic::arm_neon_vld4lane: {
5816    Info.opc = ISD::INTRINSIC_W_CHAIN;
5817    // Conservatively set memVT to the entire set of vectors loaded.
5818    uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
5819    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
5820    Info.ptrVal = I.getArgOperand(0);
5821    Info.offset = 0;
5822    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
5823    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
5824    Info.vol = false; // volatile loads with NEON intrinsics not supported
5825    Info.readMem = true;
5826    Info.writeMem = false;
5827    return true;
5828  }
5829  case Intrinsic::arm_neon_vst1:
5830  case Intrinsic::arm_neon_vst2:
5831  case Intrinsic::arm_neon_vst3:
5832  case Intrinsic::arm_neon_vst4:
5833  case Intrinsic::arm_neon_vst2lane:
5834  case Intrinsic::arm_neon_vst3lane:
5835  case Intrinsic::arm_neon_vst4lane: {
5836    Info.opc = ISD::INTRINSIC_VOID;
5837    // Conservatively set memVT to the entire set of vectors stored.
5838    unsigned NumElts = 0;
5839    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
5840      const Type *ArgTy = I.getArgOperand(ArgI)->getType();
5841      if (!ArgTy->isVectorTy())
5842        break;
5843      NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
5844    }
5845    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
5846    Info.ptrVal = I.getArgOperand(0);
5847    Info.offset = 0;
5848    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
5849    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
5850    Info.vol = false; // volatile stores with NEON intrinsics not supported
5851    Info.readMem = false;
5852    Info.writeMem = true;
5853    return true;
5854  }
5855  default:
5856    break;
5857  }
5858
5859  return false;
5860}
5861