ARMISelLowering.cpp revision 3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-isel"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetMachine.h"
25#include "ARMTargetObjectFile.h"
26#include "llvm/CallingConv.h"
27#include "llvm/Constants.h"
28#include "llvm/Function.h"
29#include "llvm/GlobalValue.h"
30#include "llvm/Instruction.h"
31#include "llvm/Intrinsics.h"
32#include "llvm/Type.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/PseudoSourceValue.h"
40#include "llvm/CodeGen/SelectionDAG.h"
41#include "llvm/MC/MCSectionMachO.h"
42#include "llvm/Target/TargetOptions.h"
43#include "llvm/ADT/VectorExtras.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Support/CommandLine.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/MathExtras.h"
48#include "llvm/Support/raw_ostream.h"
49#include <sstream>
50using namespace llvm;
51
52STATISTIC(NumTailCalls, "Number of tail calls");
53
54// This option should go away when tail calls fully work.
55static cl::opt<bool>
56EnableARMTailCalls("arm-tail-calls", cl::Hidden,
57  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
58  cl::init(false));
59
60// This option should go away when Machine LICM is smart enough to hoist a
61// reg-to-reg VDUP.
62static cl::opt<bool>
63EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
64  cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
65  cl::init(false));
66
67static cl::opt<bool>
68EnableARMLongCalls("arm-long-calls", cl::Hidden,
69  cl::desc("Generate calls via indirect call instructions"),
70  cl::init(false));
71
72static cl::opt<bool>
73ARMInterworking("arm-interworking", cl::Hidden,
74  cl::desc("Enable / disable ARM interworking (for debugging only)"),
75  cl::init(true));
76
77static cl::opt<bool>
78EnableARMCodePlacement("arm-code-placement", cl::Hidden,
79  cl::desc("Enable code placement pass for ARM"),
80  cl::init(false));
81
82static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
83                                   CCValAssign::LocInfo &LocInfo,
84                                   ISD::ArgFlagsTy &ArgFlags,
85                                   CCState &State);
86static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
87                                    CCValAssign::LocInfo &LocInfo,
88                                    ISD::ArgFlagsTy &ArgFlags,
89                                    CCState &State);
90static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
91                                      CCValAssign::LocInfo &LocInfo,
92                                      ISD::ArgFlagsTy &ArgFlags,
93                                      CCState &State);
94static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
95                                       CCValAssign::LocInfo &LocInfo,
96                                       ISD::ArgFlagsTy &ArgFlags,
97                                       CCState &State);
98
99void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
100                                       EVT PromotedBitwiseVT) {
101  if (VT != PromotedLdStVT) {
102    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
103    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
104                       PromotedLdStVT.getSimpleVT());
105
106    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
107    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
108                       PromotedLdStVT.getSimpleVT());
109  }
110
111  EVT ElemTy = VT.getVectorElementType();
112  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
113    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
114  if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
115    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
116  if (ElemTy != MVT::i32) {
117    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
118    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
119    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
120    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
121  }
122  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
123  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
124  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
125  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
126  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
127  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
128  if (VT.isInteger()) {
129    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
130    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
131    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
132    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
133    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
134  }
135  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
136
137  // Promote all bit-wise operations.
138  if (VT.isInteger() && VT != PromotedBitwiseVT) {
139    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
140    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
141                       PromotedBitwiseVT.getSimpleVT());
142    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
143    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
144                       PromotedBitwiseVT.getSimpleVT());
145    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
146    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
147                       PromotedBitwiseVT.getSimpleVT());
148  }
149
150  // Neon does not support vector divide/remainder operations.
151  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
152  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
153  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
154  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
155  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
156  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
157}
158
159void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
160  addRegisterClass(VT, ARM::DPRRegisterClass);
161  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
162}
163
164void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
165  addRegisterClass(VT, ARM::QPRRegisterClass);
166  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
167}
168
169static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
170  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
171    return new TargetLoweringObjectFileMachO();
172
173  return new ARMElfTargetObjectFile();
174}
175
176ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
177    : TargetLowering(TM, createTLOF(TM)) {
178  Subtarget = &TM.getSubtarget<ARMSubtarget>();
179  RegInfo = TM.getRegisterInfo();
180  Itins = TM.getInstrItineraryData();
181
182  if (Subtarget->isTargetDarwin()) {
183    // Uses VFP for Thumb libfuncs if available.
184    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
185      // Single-precision floating-point arithmetic.
186      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
187      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
188      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
189      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
190
191      // Double-precision floating-point arithmetic.
192      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
193      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
194      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
195      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
196
197      // Single-precision comparisons.
198      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
199      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
200      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
201      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
202      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
203      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
204      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
205      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
206
207      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
208      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
209      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
210      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
211      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
212      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
213      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
214      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
215
216      // Double-precision comparisons.
217      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
218      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
219      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
220      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
221      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
222      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
223      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
224      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
225
226      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
227      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
228      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
229      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
230      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
231      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
232      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
233      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
234
235      // Floating-point to integer conversions.
236      // i64 conversions are done via library routines even when generating VFP
237      // instructions, so use the same ones.
238      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
239      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
240      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
241      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
242
243      // Conversions between floating types.
244      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
245      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
246
247      // Integer to floating-point conversions.
248      // i64 conversions are done via library routines even when generating VFP
249      // instructions, so use the same ones.
250      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
251      // e.g., __floatunsidf vs. __floatunssidfvfp.
252      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
253      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
254      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
255      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
256    }
257  }
258
259  // These libcalls are not available in 32-bit.
260  setLibcallName(RTLIB::SHL_I128, 0);
261  setLibcallName(RTLIB::SRL_I128, 0);
262  setLibcallName(RTLIB::SRA_I128, 0);
263
264  // Libcalls should use the AAPCS base standard ABI, even if hard float
265  // is in effect, as per the ARM RTABI specification, section 4.1.2.
266  if (Subtarget->isAAPCS_ABI()) {
267    for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
268      setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
269                            CallingConv::ARM_AAPCS);
270    }
271  }
272
273  if (Subtarget->isThumb1Only())
274    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
275  else
276    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
277  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
278    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
279    if (!Subtarget->isFPOnlySP())
280      addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
281
282    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
283  }
284
285  if (Subtarget->hasNEON()) {
286    addDRTypeForNEON(MVT::v2f32);
287    addDRTypeForNEON(MVT::v8i8);
288    addDRTypeForNEON(MVT::v4i16);
289    addDRTypeForNEON(MVT::v2i32);
290    addDRTypeForNEON(MVT::v1i64);
291
292    addQRTypeForNEON(MVT::v4f32);
293    addQRTypeForNEON(MVT::v2f64);
294    addQRTypeForNEON(MVT::v16i8);
295    addQRTypeForNEON(MVT::v8i16);
296    addQRTypeForNEON(MVT::v4i32);
297    addQRTypeForNEON(MVT::v2i64);
298
299    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
300    // neither Neon nor VFP support any arithmetic operations on it.
301    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
302    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
303    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
304    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
305    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
306    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
307    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
308    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
309    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
310    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
311    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
312    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
313    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
314    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
315    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
316    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
317    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
318    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
319    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
320    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
321    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
322    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
323    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
324    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
325
326    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
327
328    // Neon does not support some operations on v1i64 and v2i64 types.
329    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
330    // Custom handling for some quad-vector types to detect VMULL.
331    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
332    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
333    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
334    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
335    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
336
337    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
338    setTargetDAGCombine(ISD::SHL);
339    setTargetDAGCombine(ISD::SRL);
340    setTargetDAGCombine(ISD::SRA);
341    setTargetDAGCombine(ISD::SIGN_EXTEND);
342    setTargetDAGCombine(ISD::ZERO_EXTEND);
343    setTargetDAGCombine(ISD::ANY_EXTEND);
344    setTargetDAGCombine(ISD::SELECT_CC);
345  }
346
347  computeRegisterProperties();
348
349  // ARM does not have f32 extending load.
350  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
351
352  // ARM does not have i1 sign extending load.
353  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
354
355  // ARM supports all 4 flavors of integer indexed load / store.
356  if (!Subtarget->isThumb1Only()) {
357    for (unsigned im = (unsigned)ISD::PRE_INC;
358         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
359      setIndexedLoadAction(im,  MVT::i1,  Legal);
360      setIndexedLoadAction(im,  MVT::i8,  Legal);
361      setIndexedLoadAction(im,  MVT::i16, Legal);
362      setIndexedLoadAction(im,  MVT::i32, Legal);
363      setIndexedStoreAction(im, MVT::i1,  Legal);
364      setIndexedStoreAction(im, MVT::i8,  Legal);
365      setIndexedStoreAction(im, MVT::i16, Legal);
366      setIndexedStoreAction(im, MVT::i32, Legal);
367    }
368  }
369
370  // i64 operation support.
371  if (Subtarget->isThumb1Only()) {
372    setOperationAction(ISD::MUL,     MVT::i64, Expand);
373    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
374    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
375    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
376    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
377  } else {
378    setOperationAction(ISD::MUL,     MVT::i64, Expand);
379    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
380    if (!Subtarget->hasV6Ops())
381      setOperationAction(ISD::MULHS, MVT::i32, Expand);
382  }
383  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
384  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
385  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
386  setOperationAction(ISD::SRL,       MVT::i64, Custom);
387  setOperationAction(ISD::SRA,       MVT::i64, Custom);
388
389  // ARM does not have ROTL.
390  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
391  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
392  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
393  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
394    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
395
396  // Only ARMv6 has BSWAP.
397  if (!Subtarget->hasV6Ops())
398    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
399
400  // These are expanded into libcalls.
401  if (!Subtarget->hasDivide()) {
402    // v7M has a hardware divider
403    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
404    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
405  }
406  setOperationAction(ISD::SREM,  MVT::i32, Expand);
407  setOperationAction(ISD::UREM,  MVT::i32, Expand);
408  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
409  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
410
411  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
412  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
413  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
414  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
415  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
416
417  setOperationAction(ISD::TRAP, MVT::Other, Legal);
418
419  // Use the default implementation.
420  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
421  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
422  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
423  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
424  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
425  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
426  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
427  // FIXME: Shouldn't need this, since no register is used, but the legalizer
428  // doesn't yet know how to not do that for SjLj.
429  setExceptionSelectorRegister(ARM::R0);
430  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
431  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
432  // the default expansion.
433  if (Subtarget->hasDataBarrier() ||
434      (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
435    // membarrier needs custom lowering; the rest are legal and handled
436    // normally.
437    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
438  } else {
439    // Set them all for expansion, which will force libcalls.
440    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
441    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
442    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
443    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
444    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
445    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
446    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
447    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
448    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
449    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
450    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
451    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
452    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
453    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
454    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
455    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
456    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
457    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
458    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
459    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
460    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
461    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
462    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
463    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
464    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
465    // Since the libcalls include locking, fold in the fences
466    setShouldFoldAtomicFences(true);
467  }
468  // 64-bit versions are always libcalls (for now)
469  setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
470  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
471  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
472  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
473  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
474  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
475  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
476  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
477
478  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
479  if (!Subtarget->hasV6Ops()) {
480    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
481    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
482  }
483  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
484
485  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
486    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
487    // iff target supports vfp2.
488    setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
489    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
490  }
491
492  // We want to custom lower some of our intrinsics.
493  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
494  if (Subtarget->isTargetDarwin()) {
495    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
496    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
497  }
498
499  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
500  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
501  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
502  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
503  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
504  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
505  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
506  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
507  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
508
509  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
510  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
511  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
512  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
513  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
514
515  // We don't support sin/cos/fmod/copysign/pow
516  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
517  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
518  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
519  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
520  setOperationAction(ISD::FREM,      MVT::f64, Expand);
521  setOperationAction(ISD::FREM,      MVT::f32, Expand);
522  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
523    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
524    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
525  }
526  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
527  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
528
529  // Various VFP goodness
530  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
531    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
532    if (Subtarget->hasVFP2()) {
533      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
534      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
535      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
536      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
537    }
538    // Special handling for half-precision FP.
539    if (!Subtarget->hasFP16()) {
540      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
541      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
542    }
543  }
544
545  // We have target-specific dag combine patterns for the following nodes:
546  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
547  setTargetDAGCombine(ISD::ADD);
548  setTargetDAGCombine(ISD::SUB);
549  setTargetDAGCombine(ISD::MUL);
550
551  if (Subtarget->hasV6T2Ops())
552    setTargetDAGCombine(ISD::OR);
553
554  setStackPointerRegisterToSaveRestore(ARM::SP);
555
556  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
557    setSchedulingPreference(Sched::RegPressure);
558  else
559    setSchedulingPreference(Sched::Hybrid);
560
561  maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
562
563  // On ARM arguments smaller than 4 bytes are extended, so all arguments
564  // are at least 4 bytes aligned.
565  setMinStackArgumentAlignment(4);
566
567  if (EnableARMCodePlacement)
568    benefitFromCodePlacementOpt = true;
569}
570
571std::pair<const TargetRegisterClass*, uint8_t>
572ARMTargetLowering::findRepresentativeClass(EVT VT) const{
573  const TargetRegisterClass *RRC = 0;
574  uint8_t Cost = 1;
575  switch (VT.getSimpleVT().SimpleTy) {
576  default:
577    return TargetLowering::findRepresentativeClass(VT);
578  // Use DPR as representative register class for all floating point
579  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
580  // the cost is 1 for both f32 and f64.
581  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
582  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
583    RRC = ARM::DPRRegisterClass;
584    break;
585  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
586  case MVT::v4f32: case MVT::v2f64:
587    RRC = ARM::DPRRegisterClass;
588    Cost = 2;
589    break;
590  case MVT::v4i64:
591    RRC = ARM::DPRRegisterClass;
592    Cost = 4;
593    break;
594  case MVT::v8i64:
595    RRC = ARM::DPRRegisterClass;
596    Cost = 8;
597    break;
598  }
599  return std::make_pair(RRC, Cost);
600}
601
602const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
603  switch (Opcode) {
604  default: return 0;
605  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
606  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
607  case ARMISD::CALL:          return "ARMISD::CALL";
608  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
609  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
610  case ARMISD::tCALL:         return "ARMISD::tCALL";
611  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
612  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
613  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
614  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
615  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
616  case ARMISD::AND:           return "ARMISD::AND";
617  case ARMISD::CMP:           return "ARMISD::CMP";
618  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
619  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
620  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
621  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
622  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
623  case ARMISD::CMOV:          return "ARMISD::CMOV";
624  case ARMISD::CNEG:          return "ARMISD::CNEG";
625
626  case ARMISD::RBIT:          return "ARMISD::RBIT";
627
628  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
629  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
630  case ARMISD::SITOF:         return "ARMISD::SITOF";
631  case ARMISD::UITOF:         return "ARMISD::UITOF";
632
633  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
634  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
635  case ARMISD::RRX:           return "ARMISD::RRX";
636
637  case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
638  case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
639
640  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
641  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
642
643  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
644
645  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
646
647  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
648
649  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
650  case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
651
652  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
653  case ARMISD::VCGE:          return "ARMISD::VCGE";
654  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
655  case ARMISD::VCGT:          return "ARMISD::VCGT";
656  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
657  case ARMISD::VTST:          return "ARMISD::VTST";
658
659  case ARMISD::VSHL:          return "ARMISD::VSHL";
660  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
661  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
662  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
663  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
664  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
665  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
666  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
667  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
668  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
669  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
670  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
671  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
672  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
673  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
674  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
675  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
676  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
677  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
678  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
679  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
680  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
681  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
682  case ARMISD::VDUP:          return "ARMISD::VDUP";
683  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
684  case ARMISD::VEXT:          return "ARMISD::VEXT";
685  case ARMISD::VREV64:        return "ARMISD::VREV64";
686  case ARMISD::VREV32:        return "ARMISD::VREV32";
687  case ARMISD::VREV16:        return "ARMISD::VREV16";
688  case ARMISD::VZIP:          return "ARMISD::VZIP";
689  case ARMISD::VUZP:          return "ARMISD::VUZP";
690  case ARMISD::VTRN:          return "ARMISD::VTRN";
691  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
692  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
693  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
694  case ARMISD::FMAX:          return "ARMISD::FMAX";
695  case ARMISD::FMIN:          return "ARMISD::FMIN";
696  case ARMISD::BFI:           return "ARMISD::BFI";
697  }
698}
699
700/// getRegClassFor - Return the register class that should be used for the
701/// specified value type.
702TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
703  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
704  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
705  // load / store 4 to 8 consecutive D registers.
706  if (Subtarget->hasNEON()) {
707    if (VT == MVT::v4i64)
708      return ARM::QQPRRegisterClass;
709    else if (VT == MVT::v8i64)
710      return ARM::QQQQPRRegisterClass;
711  }
712  return TargetLowering::getRegClassFor(VT);
713}
714
715// Create a fast isel object.
716FastISel *
717ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
718  return ARM::createFastISel(funcInfo);
719}
720
721/// getFunctionAlignment - Return the Log2 alignment of this function.
722unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
723  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
724}
725
726/// getMaximalGlobalOffset - Returns the maximal possible offset which can
727/// be used for loads / stores from the global.
728unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
729  return (Subtarget->isThumb1Only() ? 127 : 4095);
730}
731
732Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
733  unsigned NumVals = N->getNumValues();
734  if (!NumVals)
735    return Sched::RegPressure;
736
737  for (unsigned i = 0; i != NumVals; ++i) {
738    EVT VT = N->getValueType(i);
739    if (VT.isFloatingPoint() || VT.isVector())
740      return Sched::Latency;
741  }
742
743  if (!N->isMachineOpcode())
744    return Sched::RegPressure;
745
746  // Load are scheduled for latency even if there instruction itinerary
747  // is not available.
748  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
749  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
750  if (TID.mayLoad())
751    return Sched::Latency;
752
753  if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2)
754    return Sched::Latency;
755  return Sched::RegPressure;
756}
757
758unsigned
759ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
760                                       MachineFunction &MF) const {
761  switch (RC->getID()) {
762  default:
763    return 0;
764  case ARM::tGPRRegClassID:
765    return RegInfo->hasFP(MF) ? 4 : 5;
766  case ARM::GPRRegClassID: {
767    unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
768    return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
769  }
770  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
771  case ARM::DPRRegClassID:
772    return 32 - 10;
773  }
774}
775
776//===----------------------------------------------------------------------===//
777// Lowering Code
778//===----------------------------------------------------------------------===//
779
780/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
781static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
782  switch (CC) {
783  default: llvm_unreachable("Unknown condition code!");
784  case ISD::SETNE:  return ARMCC::NE;
785  case ISD::SETEQ:  return ARMCC::EQ;
786  case ISD::SETGT:  return ARMCC::GT;
787  case ISD::SETGE:  return ARMCC::GE;
788  case ISD::SETLT:  return ARMCC::LT;
789  case ISD::SETLE:  return ARMCC::LE;
790  case ISD::SETUGT: return ARMCC::HI;
791  case ISD::SETUGE: return ARMCC::HS;
792  case ISD::SETULT: return ARMCC::LO;
793  case ISD::SETULE: return ARMCC::LS;
794  }
795}
796
797/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
798static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
799                        ARMCC::CondCodes &CondCode2) {
800  CondCode2 = ARMCC::AL;
801  switch (CC) {
802  default: llvm_unreachable("Unknown FP condition!");
803  case ISD::SETEQ:
804  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
805  case ISD::SETGT:
806  case ISD::SETOGT: CondCode = ARMCC::GT; break;
807  case ISD::SETGE:
808  case ISD::SETOGE: CondCode = ARMCC::GE; break;
809  case ISD::SETOLT: CondCode = ARMCC::MI; break;
810  case ISD::SETOLE: CondCode = ARMCC::LS; break;
811  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
812  case ISD::SETO:   CondCode = ARMCC::VC; break;
813  case ISD::SETUO:  CondCode = ARMCC::VS; break;
814  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
815  case ISD::SETUGT: CondCode = ARMCC::HI; break;
816  case ISD::SETUGE: CondCode = ARMCC::PL; break;
817  case ISD::SETLT:
818  case ISD::SETULT: CondCode = ARMCC::LT; break;
819  case ISD::SETLE:
820  case ISD::SETULE: CondCode = ARMCC::LE; break;
821  case ISD::SETNE:
822  case ISD::SETUNE: CondCode = ARMCC::NE; break;
823  }
824}
825
826//===----------------------------------------------------------------------===//
827//                      Calling Convention Implementation
828//===----------------------------------------------------------------------===//
829
830#include "ARMGenCallingConv.inc"
831
832// APCS f64 is in register pairs, possibly split to stack
833static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
834                          CCValAssign::LocInfo &LocInfo,
835                          CCState &State, bool CanFail) {
836  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
837
838  // Try to get the first register.
839  if (unsigned Reg = State.AllocateReg(RegList, 4))
840    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
841  else {
842    // For the 2nd half of a v2f64, do not fail.
843    if (CanFail)
844      return false;
845
846    // Put the whole thing on the stack.
847    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
848                                           State.AllocateStack(8, 4),
849                                           LocVT, LocInfo));
850    return true;
851  }
852
853  // Try to get the second register.
854  if (unsigned Reg = State.AllocateReg(RegList, 4))
855    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
856  else
857    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
858                                           State.AllocateStack(4, 4),
859                                           LocVT, LocInfo));
860  return true;
861}
862
863static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
864                                   CCValAssign::LocInfo &LocInfo,
865                                   ISD::ArgFlagsTy &ArgFlags,
866                                   CCState &State) {
867  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
868    return false;
869  if (LocVT == MVT::v2f64 &&
870      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
871    return false;
872  return true;  // we handled it
873}
874
875// AAPCS f64 is in aligned register pairs
876static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
877                           CCValAssign::LocInfo &LocInfo,
878                           CCState &State, bool CanFail) {
879  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
880  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
881  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
882
883  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
884  if (Reg == 0) {
885    // For the 2nd half of a v2f64, do not just fail.
886    if (CanFail)
887      return false;
888
889    // Put the whole thing on the stack.
890    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
891                                           State.AllocateStack(8, 8),
892                                           LocVT, LocInfo));
893    return true;
894  }
895
896  unsigned i;
897  for (i = 0; i < 2; ++i)
898    if (HiRegList[i] == Reg)
899      break;
900
901  unsigned T = State.AllocateReg(LoRegList[i]);
902  (void)T;
903  assert(T == LoRegList[i] && "Could not allocate register");
904
905  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
906  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
907                                         LocVT, LocInfo));
908  return true;
909}
910
911static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
912                                    CCValAssign::LocInfo &LocInfo,
913                                    ISD::ArgFlagsTy &ArgFlags,
914                                    CCState &State) {
915  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
916    return false;
917  if (LocVT == MVT::v2f64 &&
918      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
919    return false;
920  return true;  // we handled it
921}
922
923static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
924                         CCValAssign::LocInfo &LocInfo, CCState &State) {
925  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
926  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
927
928  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
929  if (Reg == 0)
930    return false; // we didn't handle it
931
932  unsigned i;
933  for (i = 0; i < 2; ++i)
934    if (HiRegList[i] == Reg)
935      break;
936
937  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
938  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
939                                         LocVT, LocInfo));
940  return true;
941}
942
943static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
944                                      CCValAssign::LocInfo &LocInfo,
945                                      ISD::ArgFlagsTy &ArgFlags,
946                                      CCState &State) {
947  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
948    return false;
949  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
950    return false;
951  return true;  // we handled it
952}
953
954static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
955                                       CCValAssign::LocInfo &LocInfo,
956                                       ISD::ArgFlagsTy &ArgFlags,
957                                       CCState &State) {
958  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
959                                   State);
960}
961
962/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
963/// given CallingConvention value.
964CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
965                                                 bool Return,
966                                                 bool isVarArg) const {
967  switch (CC) {
968  default:
969    llvm_unreachable("Unsupported calling convention");
970  case CallingConv::C:
971  case CallingConv::Fast:
972    // Use target triple & subtarget features to do actual dispatch.
973    if (Subtarget->isAAPCS_ABI()) {
974      if (Subtarget->hasVFP2() &&
975          FloatABIType == FloatABI::Hard && !isVarArg)
976        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
977      else
978        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
979    } else
980        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
981  case CallingConv::ARM_AAPCS_VFP:
982    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
983  case CallingConv::ARM_AAPCS:
984    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
985  case CallingConv::ARM_APCS:
986    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
987  }
988}
989
990/// LowerCallResult - Lower the result values of a call into the
991/// appropriate copies out of appropriate physical registers.
992SDValue
993ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
994                                   CallingConv::ID CallConv, bool isVarArg,
995                                   const SmallVectorImpl<ISD::InputArg> &Ins,
996                                   DebugLoc dl, SelectionDAG &DAG,
997                                   SmallVectorImpl<SDValue> &InVals) const {
998
999  // Assign locations to each value returned by this call.
1000  SmallVector<CCValAssign, 16> RVLocs;
1001  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1002                 RVLocs, *DAG.getContext());
1003  CCInfo.AnalyzeCallResult(Ins,
1004                           CCAssignFnForNode(CallConv, /* Return*/ true,
1005                                             isVarArg));
1006
1007  // Copy all of the result registers out of their specified physreg.
1008  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1009    CCValAssign VA = RVLocs[i];
1010
1011    SDValue Val;
1012    if (VA.needsCustom()) {
1013      // Handle f64 or half of a v2f64.
1014      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1015                                      InFlag);
1016      Chain = Lo.getValue(1);
1017      InFlag = Lo.getValue(2);
1018      VA = RVLocs[++i]; // skip ahead to next loc
1019      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1020                                      InFlag);
1021      Chain = Hi.getValue(1);
1022      InFlag = Hi.getValue(2);
1023      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1024
1025      if (VA.getLocVT() == MVT::v2f64) {
1026        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1027        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1028                          DAG.getConstant(0, MVT::i32));
1029
1030        VA = RVLocs[++i]; // skip ahead to next loc
1031        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1032        Chain = Lo.getValue(1);
1033        InFlag = Lo.getValue(2);
1034        VA = RVLocs[++i]; // skip ahead to next loc
1035        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1036        Chain = Hi.getValue(1);
1037        InFlag = Hi.getValue(2);
1038        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1039        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1040                          DAG.getConstant(1, MVT::i32));
1041      }
1042    } else {
1043      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1044                               InFlag);
1045      Chain = Val.getValue(1);
1046      InFlag = Val.getValue(2);
1047    }
1048
1049    switch (VA.getLocInfo()) {
1050    default: llvm_unreachable("Unknown loc info!");
1051    case CCValAssign::Full: break;
1052    case CCValAssign::BCvt:
1053      Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
1054      break;
1055    }
1056
1057    InVals.push_back(Val);
1058  }
1059
1060  return Chain;
1061}
1062
1063/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1064/// by "Src" to address "Dst" of size "Size".  Alignment information is
1065/// specified by the specific parameter attribute.  The copy will be passed as
1066/// a byval function parameter.
1067/// Sometimes what we are copying is the end of a larger object, the part that
1068/// does not fit in registers.
1069static SDValue
1070CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1071                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1072                          DebugLoc dl) {
1073  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1074  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
1075                       /*isVolatile=*/false, /*AlwaysInline=*/false,
1076                       NULL, 0, NULL, 0);
1077}
1078
1079/// LowerMemOpCallTo - Store the argument to the stack.
1080SDValue
1081ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1082                                    SDValue StackPtr, SDValue Arg,
1083                                    DebugLoc dl, SelectionDAG &DAG,
1084                                    const CCValAssign &VA,
1085                                    ISD::ArgFlagsTy Flags) const {
1086  unsigned LocMemOffset = VA.getLocMemOffset();
1087  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1088  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1089  if (Flags.isByVal()) {
1090    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1091  }
1092  return DAG.getStore(Chain, dl, Arg, PtrOff,
1093                      PseudoSourceValue::getStack(), LocMemOffset,
1094                      false, false, 0);
1095}
1096
1097void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
1098                                         SDValue Chain, SDValue &Arg,
1099                                         RegsToPassVector &RegsToPass,
1100                                         CCValAssign &VA, CCValAssign &NextVA,
1101                                         SDValue &StackPtr,
1102                                         SmallVector<SDValue, 8> &MemOpChains,
1103                                         ISD::ArgFlagsTy Flags) const {
1104
1105  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1106                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1107  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1108
1109  if (NextVA.isRegLoc())
1110    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
1111  else {
1112    assert(NextVA.isMemLoc());
1113    if (StackPtr.getNode() == 0)
1114      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1115
1116    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1117                                           dl, DAG, NextVA,
1118                                           Flags));
1119  }
1120}
1121
1122/// LowerCall - Lowering a call into a callseq_start <-
1123/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1124/// nodes.
1125SDValue
1126ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1127                             CallingConv::ID CallConv, bool isVarArg,
1128                             bool &isTailCall,
1129                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1130                             const SmallVectorImpl<SDValue> &OutVals,
1131                             const SmallVectorImpl<ISD::InputArg> &Ins,
1132                             DebugLoc dl, SelectionDAG &DAG,
1133                             SmallVectorImpl<SDValue> &InVals) const {
1134  MachineFunction &MF = DAG.getMachineFunction();
1135  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1136  bool IsSibCall = false;
1137  // Temporarily disable tail calls so things don't break.
1138  if (!EnableARMTailCalls)
1139    isTailCall = false;
1140  if (isTailCall) {
1141    // Check if it's really possible to do a tail call.
1142    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1143                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1144                                                   Outs, OutVals, Ins, DAG);
1145    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1146    // detected sibcalls.
1147    if (isTailCall) {
1148      ++NumTailCalls;
1149      IsSibCall = true;
1150    }
1151  }
1152
1153  // Analyze operands of the call, assigning locations to each operand.
1154  SmallVector<CCValAssign, 16> ArgLocs;
1155  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1156                 *DAG.getContext());
1157  CCInfo.AnalyzeCallOperands(Outs,
1158                             CCAssignFnForNode(CallConv, /* Return*/ false,
1159                                               isVarArg));
1160
1161  // Get a count of how many bytes are to be pushed on the stack.
1162  unsigned NumBytes = CCInfo.getNextStackOffset();
1163
1164  // For tail calls, memory operands are available in our caller's stack.
1165  if (IsSibCall)
1166    NumBytes = 0;
1167
1168  // Adjust the stack pointer for the new arguments...
1169  // These operations are automatically eliminated by the prolog/epilog pass
1170  if (!IsSibCall)
1171    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1172
1173  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1174
1175  RegsToPassVector RegsToPass;
1176  SmallVector<SDValue, 8> MemOpChains;
1177
1178  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1179  // of tail call optimization, arguments are handled later.
1180  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1181       i != e;
1182       ++i, ++realArgIdx) {
1183    CCValAssign &VA = ArgLocs[i];
1184    SDValue Arg = OutVals[realArgIdx];
1185    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1186
1187    // Promote the value if needed.
1188    switch (VA.getLocInfo()) {
1189    default: llvm_unreachable("Unknown loc info!");
1190    case CCValAssign::Full: break;
1191    case CCValAssign::SExt:
1192      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1193      break;
1194    case CCValAssign::ZExt:
1195      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1196      break;
1197    case CCValAssign::AExt:
1198      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1199      break;
1200    case CCValAssign::BCvt:
1201      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1202      break;
1203    }
1204
1205    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1206    if (VA.needsCustom()) {
1207      if (VA.getLocVT() == MVT::v2f64) {
1208        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1209                                  DAG.getConstant(0, MVT::i32));
1210        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1211                                  DAG.getConstant(1, MVT::i32));
1212
1213        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1214                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1215
1216        VA = ArgLocs[++i]; // skip ahead to next loc
1217        if (VA.isRegLoc()) {
1218          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1219                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1220        } else {
1221          assert(VA.isMemLoc());
1222
1223          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1224                                                 dl, DAG, VA, Flags));
1225        }
1226      } else {
1227        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1228                         StackPtr, MemOpChains, Flags);
1229      }
1230    } else if (VA.isRegLoc()) {
1231      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1232    } else if (!IsSibCall) {
1233      assert(VA.isMemLoc());
1234
1235      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1236                                             dl, DAG, VA, Flags));
1237    }
1238  }
1239
1240  if (!MemOpChains.empty())
1241    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1242                        &MemOpChains[0], MemOpChains.size());
1243
1244  // Build a sequence of copy-to-reg nodes chained together with token chain
1245  // and flag operands which copy the outgoing args into the appropriate regs.
1246  SDValue InFlag;
1247  // Tail call byval lowering might overwrite argument registers so in case of
1248  // tail call optimization the copies to registers are lowered later.
1249  if (!isTailCall)
1250    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1251      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1252                               RegsToPass[i].second, InFlag);
1253      InFlag = Chain.getValue(1);
1254    }
1255
1256  // For tail calls lower the arguments to the 'real' stack slot.
1257  if (isTailCall) {
1258    // Force all the incoming stack arguments to be loaded from the stack
1259    // before any new outgoing arguments are stored to the stack, because the
1260    // outgoing stack slots may alias the incoming argument stack slots, and
1261    // the alias isn't otherwise explicit. This is slightly more conservative
1262    // than necessary, because it means that each store effectively depends
1263    // on every argument instead of just those arguments it would clobber.
1264
1265    // Do not flag preceeding copytoreg stuff together with the following stuff.
1266    InFlag = SDValue();
1267    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1268      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1269                               RegsToPass[i].second, InFlag);
1270      InFlag = Chain.getValue(1);
1271    }
1272    InFlag =SDValue();
1273  }
1274
1275  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1276  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1277  // node so that legalize doesn't hack it.
1278  bool isDirect = false;
1279  bool isARMFunc = false;
1280  bool isLocalARMFunc = false;
1281  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1282
1283  if (EnableARMLongCalls) {
1284    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1285            && "long-calls with non-static relocation model!");
1286    // Handle a global address or an external symbol. If it's not one of
1287    // those, the target's already in a register, so we don't need to do
1288    // anything extra.
1289    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1290      const GlobalValue *GV = G->getGlobal();
1291      // Create a constant pool entry for the callee address
1292      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1293      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1294                                                           ARMPCLabelIndex,
1295                                                           ARMCP::CPValue, 0);
1296      // Get the address of the callee into a register
1297      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1298      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1299      Callee = DAG.getLoad(getPointerTy(), dl,
1300                           DAG.getEntryNode(), CPAddr,
1301                           PseudoSourceValue::getConstantPool(), 0,
1302                           false, false, 0);
1303    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1304      const char *Sym = S->getSymbol();
1305
1306      // Create a constant pool entry for the callee address
1307      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1308      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1309                                                       Sym, ARMPCLabelIndex, 0);
1310      // Get the address of the callee into a register
1311      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1312      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1313      Callee = DAG.getLoad(getPointerTy(), dl,
1314                           DAG.getEntryNode(), CPAddr,
1315                           PseudoSourceValue::getConstantPool(), 0,
1316                           false, false, 0);
1317    }
1318  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1319    const GlobalValue *GV = G->getGlobal();
1320    isDirect = true;
1321    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1322    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1323                   getTargetMachine().getRelocationModel() != Reloc::Static;
1324    isARMFunc = !Subtarget->isThumb() || isStub;
1325    // ARM call to a local ARM function is predicable.
1326    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1327    // tBX takes a register source operand.
1328    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1329      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1330      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1331                                                           ARMPCLabelIndex,
1332                                                           ARMCP::CPValue, 4);
1333      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1334      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1335      Callee = DAG.getLoad(getPointerTy(), dl,
1336                           DAG.getEntryNode(), CPAddr,
1337                           PseudoSourceValue::getConstantPool(), 0,
1338                           false, false, 0);
1339      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1340      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1341                           getPointerTy(), Callee, PICLabel);
1342    } else
1343      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
1344  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1345    isDirect = true;
1346    bool isStub = Subtarget->isTargetDarwin() &&
1347                  getTargetMachine().getRelocationModel() != Reloc::Static;
1348    isARMFunc = !Subtarget->isThumb() || isStub;
1349    // tBX takes a register source operand.
1350    const char *Sym = S->getSymbol();
1351    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1352      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1353      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1354                                                       Sym, ARMPCLabelIndex, 4);
1355      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1356      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1357      Callee = DAG.getLoad(getPointerTy(), dl,
1358                           DAG.getEntryNode(), CPAddr,
1359                           PseudoSourceValue::getConstantPool(), 0,
1360                           false, false, 0);
1361      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1362      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1363                           getPointerTy(), Callee, PICLabel);
1364    } else
1365      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1366  }
1367
1368  // FIXME: handle tail calls differently.
1369  unsigned CallOpc;
1370  if (Subtarget->isThumb()) {
1371    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1372      CallOpc = ARMISD::CALL_NOLINK;
1373    else
1374      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1375  } else {
1376    CallOpc = (isDirect || Subtarget->hasV5TOps())
1377      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1378      : ARMISD::CALL_NOLINK;
1379  }
1380
1381  std::vector<SDValue> Ops;
1382  Ops.push_back(Chain);
1383  Ops.push_back(Callee);
1384
1385  // Add argument registers to the end of the list so that they are known live
1386  // into the call.
1387  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1388    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1389                                  RegsToPass[i].second.getValueType()));
1390
1391  if (InFlag.getNode())
1392    Ops.push_back(InFlag);
1393
1394  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1395  if (isTailCall)
1396    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1397
1398  // Returns a chain and a flag for retval copy to use.
1399  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1400  InFlag = Chain.getValue(1);
1401
1402  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1403                             DAG.getIntPtrConstant(0, true), InFlag);
1404  if (!Ins.empty())
1405    InFlag = Chain.getValue(1);
1406
1407  // Handle result values, copying them out of physregs into vregs that we
1408  // return.
1409  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1410                         dl, DAG, InVals);
1411}
1412
1413/// MatchingStackOffset - Return true if the given stack call argument is
1414/// already available in the same position (relatively) of the caller's
1415/// incoming argument stack.
1416static
1417bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1418                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1419                         const ARMInstrInfo *TII) {
1420  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1421  int FI = INT_MAX;
1422  if (Arg.getOpcode() == ISD::CopyFromReg) {
1423    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1424    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
1425      return false;
1426    MachineInstr *Def = MRI->getVRegDef(VR);
1427    if (!Def)
1428      return false;
1429    if (!Flags.isByVal()) {
1430      if (!TII->isLoadFromStackSlot(Def, FI))
1431        return false;
1432    } else {
1433      return false;
1434    }
1435  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1436    if (Flags.isByVal())
1437      // ByVal argument is passed in as a pointer but it's now being
1438      // dereferenced. e.g.
1439      // define @foo(%struct.X* %A) {
1440      //   tail call @bar(%struct.X* byval %A)
1441      // }
1442      return false;
1443    SDValue Ptr = Ld->getBasePtr();
1444    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1445    if (!FINode)
1446      return false;
1447    FI = FINode->getIndex();
1448  } else
1449    return false;
1450
1451  assert(FI != INT_MAX);
1452  if (!MFI->isFixedObjectIndex(FI))
1453    return false;
1454  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1455}
1456
1457/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1458/// for tail call optimization. Targets which want to do tail call
1459/// optimization should implement this function.
1460bool
1461ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1462                                                     CallingConv::ID CalleeCC,
1463                                                     bool isVarArg,
1464                                                     bool isCalleeStructRet,
1465                                                     bool isCallerStructRet,
1466                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1467                                    const SmallVectorImpl<SDValue> &OutVals,
1468                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1469                                                     SelectionDAG& DAG) const {
1470  const Function *CallerF = DAG.getMachineFunction().getFunction();
1471  CallingConv::ID CallerCC = CallerF->getCallingConv();
1472  bool CCMatch = CallerCC == CalleeCC;
1473
1474  // Look for obvious safe cases to perform tail call optimization that do not
1475  // require ABI changes. This is what gcc calls sibcall.
1476
1477  // Do not sibcall optimize vararg calls unless the call site is not passing
1478  // any arguments.
1479  if (isVarArg && !Outs.empty())
1480    return false;
1481
1482  // Also avoid sibcall optimization if either caller or callee uses struct
1483  // return semantics.
1484  if (isCalleeStructRet || isCallerStructRet)
1485    return false;
1486
1487  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1488  // emitEpilogue is not ready for them.
1489  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1490  // LR.  This means if we need to reload LR, it takes an extra instructions,
1491  // which outweighs the value of the tail call; but here we don't know yet
1492  // whether LR is going to be used.  Probably the right approach is to
1493  // generate the tail call here and turn it back into CALL/RET in
1494  // emitEpilogue if LR is used.
1495  if (Subtarget->isThumb1Only())
1496    return false;
1497
1498  // For the moment, we can only do this to functions defined in this
1499  // compilation, or to indirect calls.  A Thumb B to an ARM function,
1500  // or vice versa, is not easily fixed up in the linker unlike BL.
1501  // (We could do this by loading the address of the callee into a register;
1502  // that is an extra instruction over the direct call and burns a register
1503  // as well, so is not likely to be a win.)
1504
1505  // It might be safe to remove this restriction on non-Darwin.
1506
1507  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1508  // but we need to make sure there are enough registers; the only valid
1509  // registers are the 4 used for parameters.  We don't currently do this
1510  // case.
1511  if (isa<ExternalSymbolSDNode>(Callee))
1512      return false;
1513
1514  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1515    const GlobalValue *GV = G->getGlobal();
1516    if (GV->isDeclaration() || GV->isWeakForLinker())
1517      return false;
1518  }
1519
1520  // If the calling conventions do not match, then we'd better make sure the
1521  // results are returned in the same way as what the caller expects.
1522  if (!CCMatch) {
1523    SmallVector<CCValAssign, 16> RVLocs1;
1524    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
1525                    RVLocs1, *DAG.getContext());
1526    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1527
1528    SmallVector<CCValAssign, 16> RVLocs2;
1529    CCState CCInfo2(CallerCC, false, getTargetMachine(),
1530                    RVLocs2, *DAG.getContext());
1531    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1532
1533    if (RVLocs1.size() != RVLocs2.size())
1534      return false;
1535    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1536      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1537        return false;
1538      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1539        return false;
1540      if (RVLocs1[i].isRegLoc()) {
1541        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1542          return false;
1543      } else {
1544        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1545          return false;
1546      }
1547    }
1548  }
1549
1550  // If the callee takes no arguments then go on to check the results of the
1551  // call.
1552  if (!Outs.empty()) {
1553    // Check if stack adjustment is needed. For now, do not do this if any
1554    // argument is passed on the stack.
1555    SmallVector<CCValAssign, 16> ArgLocs;
1556    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
1557                   ArgLocs, *DAG.getContext());
1558    CCInfo.AnalyzeCallOperands(Outs,
1559                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1560    if (CCInfo.getNextStackOffset()) {
1561      MachineFunction &MF = DAG.getMachineFunction();
1562
1563      // Check if the arguments are already laid out in the right way as
1564      // the caller's fixed stack objects.
1565      MachineFrameInfo *MFI = MF.getFrameInfo();
1566      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1567      const ARMInstrInfo *TII =
1568        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1569      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1570           i != e;
1571           ++i, ++realArgIdx) {
1572        CCValAssign &VA = ArgLocs[i];
1573        EVT RegVT = VA.getLocVT();
1574        SDValue Arg = OutVals[realArgIdx];
1575        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1576        if (VA.getLocInfo() == CCValAssign::Indirect)
1577          return false;
1578        if (VA.needsCustom()) {
1579          // f64 and vector types are split into multiple registers or
1580          // register/stack-slot combinations.  The types will not match
1581          // the registers; give up on memory f64 refs until we figure
1582          // out what to do about this.
1583          if (!VA.isRegLoc())
1584            return false;
1585          if (!ArgLocs[++i].isRegLoc())
1586            return false;
1587          if (RegVT == MVT::v2f64) {
1588            if (!ArgLocs[++i].isRegLoc())
1589              return false;
1590            if (!ArgLocs[++i].isRegLoc())
1591              return false;
1592          }
1593        } else if (!VA.isRegLoc()) {
1594          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1595                                   MFI, MRI, TII))
1596            return false;
1597        }
1598      }
1599    }
1600  }
1601
1602  return true;
1603}
1604
1605SDValue
1606ARMTargetLowering::LowerReturn(SDValue Chain,
1607                               CallingConv::ID CallConv, bool isVarArg,
1608                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1609                               const SmallVectorImpl<SDValue> &OutVals,
1610                               DebugLoc dl, SelectionDAG &DAG) const {
1611
1612  // CCValAssign - represent the assignment of the return value to a location.
1613  SmallVector<CCValAssign, 16> RVLocs;
1614
1615  // CCState - Info about the registers and stack slots.
1616  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1617                 *DAG.getContext());
1618
1619  // Analyze outgoing return values.
1620  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1621                                               isVarArg));
1622
1623  // If this is the first return lowered for this function, add
1624  // the regs to the liveout set for the function.
1625  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1626    for (unsigned i = 0; i != RVLocs.size(); ++i)
1627      if (RVLocs[i].isRegLoc())
1628        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1629  }
1630
1631  SDValue Flag;
1632
1633  // Copy the result values into the output registers.
1634  for (unsigned i = 0, realRVLocIdx = 0;
1635       i != RVLocs.size();
1636       ++i, ++realRVLocIdx) {
1637    CCValAssign &VA = RVLocs[i];
1638    assert(VA.isRegLoc() && "Can only return in registers!");
1639
1640    SDValue Arg = OutVals[realRVLocIdx];
1641
1642    switch (VA.getLocInfo()) {
1643    default: llvm_unreachable("Unknown loc info!");
1644    case CCValAssign::Full: break;
1645    case CCValAssign::BCvt:
1646      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1647      break;
1648    }
1649
1650    if (VA.needsCustom()) {
1651      if (VA.getLocVT() == MVT::v2f64) {
1652        // Extract the first half and return it in two registers.
1653        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1654                                   DAG.getConstant(0, MVT::i32));
1655        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1656                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
1657
1658        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1659        Flag = Chain.getValue(1);
1660        VA = RVLocs[++i]; // skip ahead to next loc
1661        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1662                                 HalfGPRs.getValue(1), Flag);
1663        Flag = Chain.getValue(1);
1664        VA = RVLocs[++i]; // skip ahead to next loc
1665
1666        // Extract the 2nd half and fall through to handle it as an f64 value.
1667        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1668                          DAG.getConstant(1, MVT::i32));
1669      }
1670      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1671      // available.
1672      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1673                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1674      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1675      Flag = Chain.getValue(1);
1676      VA = RVLocs[++i]; // skip ahead to next loc
1677      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1678                               Flag);
1679    } else
1680      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1681
1682    // Guarantee that all emitted copies are
1683    // stuck together, avoiding something bad.
1684    Flag = Chain.getValue(1);
1685  }
1686
1687  SDValue result;
1688  if (Flag.getNode())
1689    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1690  else // Return Void
1691    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1692
1693  return result;
1694}
1695
1696// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1697// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1698// one of the above mentioned nodes. It has to be wrapped because otherwise
1699// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1700// be used to form addressing mode. These wrapped nodes will be selected
1701// into MOVi.
1702static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1703  EVT PtrVT = Op.getValueType();
1704  // FIXME there is no actual debug info here
1705  DebugLoc dl = Op.getDebugLoc();
1706  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1707  SDValue Res;
1708  if (CP->isMachineConstantPoolEntry())
1709    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1710                                    CP->getAlignment());
1711  else
1712    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1713                                    CP->getAlignment());
1714  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1715}
1716
1717unsigned ARMTargetLowering::getJumpTableEncoding() const {
1718  return MachineJumpTableInfo::EK_Inline;
1719}
1720
1721SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1722                                             SelectionDAG &DAG) const {
1723  MachineFunction &MF = DAG.getMachineFunction();
1724  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1725  unsigned ARMPCLabelIndex = 0;
1726  DebugLoc DL = Op.getDebugLoc();
1727  EVT PtrVT = getPointerTy();
1728  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1729  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1730  SDValue CPAddr;
1731  if (RelocM == Reloc::Static) {
1732    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1733  } else {
1734    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1735    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1736    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1737                                                         ARMCP::CPBlockAddress,
1738                                                         PCAdj);
1739    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1740  }
1741  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1742  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1743                               PseudoSourceValue::getConstantPool(), 0,
1744                               false, false, 0);
1745  if (RelocM == Reloc::Static)
1746    return Result;
1747  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1748  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1749}
1750
1751// Lower ISD::GlobalTLSAddress using the "general dynamic" model
1752SDValue
1753ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1754                                                 SelectionDAG &DAG) const {
1755  DebugLoc dl = GA->getDebugLoc();
1756  EVT PtrVT = getPointerTy();
1757  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1758  MachineFunction &MF = DAG.getMachineFunction();
1759  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1760  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1761  ARMConstantPoolValue *CPV =
1762    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1763                             ARMCP::CPValue, PCAdj, "tlsgd", true);
1764  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1765  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1766  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1767                         PseudoSourceValue::getConstantPool(), 0,
1768                         false, false, 0);
1769  SDValue Chain = Argument.getValue(1);
1770
1771  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1772  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1773
1774  // call __tls_get_addr.
1775  ArgListTy Args;
1776  ArgListEntry Entry;
1777  Entry.Node = Argument;
1778  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1779  Args.push_back(Entry);
1780  // FIXME: is there useful debug info available here?
1781  std::pair<SDValue, SDValue> CallResult =
1782    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1783                false, false, false, false,
1784                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1785                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1786  return CallResult.first;
1787}
1788
1789// Lower ISD::GlobalTLSAddress using the "initial exec" or
1790// "local exec" model.
1791SDValue
1792ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1793                                        SelectionDAG &DAG) const {
1794  const GlobalValue *GV = GA->getGlobal();
1795  DebugLoc dl = GA->getDebugLoc();
1796  SDValue Offset;
1797  SDValue Chain = DAG.getEntryNode();
1798  EVT PtrVT = getPointerTy();
1799  // Get the Thread Pointer
1800  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1801
1802  if (GV->isDeclaration()) {
1803    MachineFunction &MF = DAG.getMachineFunction();
1804    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1805    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1806    // Initial exec model.
1807    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1808    ARMConstantPoolValue *CPV =
1809      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1810                               ARMCP::CPValue, PCAdj, "gottpoff", true);
1811    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1812    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1813    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1814                         PseudoSourceValue::getConstantPool(), 0,
1815                         false, false, 0);
1816    Chain = Offset.getValue(1);
1817
1818    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1819    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1820
1821    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1822                         PseudoSourceValue::getConstantPool(), 0,
1823                         false, false, 0);
1824  } else {
1825    // local exec model
1826    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1827    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1828    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1829    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1830                         PseudoSourceValue::getConstantPool(), 0,
1831                         false, false, 0);
1832  }
1833
1834  // The address of the thread local variable is the add of the thread
1835  // pointer with the offset of the variable.
1836  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1837}
1838
1839SDValue
1840ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1841  // TODO: implement the "local dynamic" model
1842  assert(Subtarget->isTargetELF() &&
1843         "TLS not implemented for non-ELF targets");
1844  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1845  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1846  // otherwise use the "Local Exec" TLS Model
1847  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1848    return LowerToTLSGeneralDynamicModel(GA, DAG);
1849  else
1850    return LowerToTLSExecModels(GA, DAG);
1851}
1852
1853SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1854                                                 SelectionDAG &DAG) const {
1855  EVT PtrVT = getPointerTy();
1856  DebugLoc dl = Op.getDebugLoc();
1857  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1858  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1859  if (RelocM == Reloc::PIC_) {
1860    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1861    ARMConstantPoolValue *CPV =
1862      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1863    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1864    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1865    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1866                                 CPAddr,
1867                                 PseudoSourceValue::getConstantPool(), 0,
1868                                 false, false, 0);
1869    SDValue Chain = Result.getValue(1);
1870    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1871    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1872    if (!UseGOTOFF)
1873      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1874                           PseudoSourceValue::getGOT(), 0,
1875                           false, false, 0);
1876    return Result;
1877  } else {
1878    // If we have T2 ops, we can materialize the address directly via movt/movw
1879    // pair. This is always cheaper.
1880    if (Subtarget->useMovt()) {
1881      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1882                         DAG.getTargetGlobalAddress(GV, dl, PtrVT));
1883    } else {
1884      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1885      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1886      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1887                         PseudoSourceValue::getConstantPool(), 0,
1888                         false, false, 0);
1889    }
1890  }
1891}
1892
1893SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1894                                                    SelectionDAG &DAG) const {
1895  MachineFunction &MF = DAG.getMachineFunction();
1896  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1897  unsigned ARMPCLabelIndex = 0;
1898  EVT PtrVT = getPointerTy();
1899  DebugLoc dl = Op.getDebugLoc();
1900  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1901  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1902  SDValue CPAddr;
1903  if (RelocM == Reloc::Static)
1904    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1905  else {
1906    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1907    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1908    ARMConstantPoolValue *CPV =
1909      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1910    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1911  }
1912  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1913
1914  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1915                               PseudoSourceValue::getConstantPool(), 0,
1916                               false, false, 0);
1917  SDValue Chain = Result.getValue(1);
1918
1919  if (RelocM == Reloc::PIC_) {
1920    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1921    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1922  }
1923
1924  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1925    Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1926                         PseudoSourceValue::getGOT(), 0,
1927                         false, false, 0);
1928
1929  return Result;
1930}
1931
1932SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1933                                                    SelectionDAG &DAG) const {
1934  assert(Subtarget->isTargetELF() &&
1935         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1936  MachineFunction &MF = DAG.getMachineFunction();
1937  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1938  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1939  EVT PtrVT = getPointerTy();
1940  DebugLoc dl = Op.getDebugLoc();
1941  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1942  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1943                                                       "_GLOBAL_OFFSET_TABLE_",
1944                                                       ARMPCLabelIndex, PCAdj);
1945  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1946  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1947  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1948                               PseudoSourceValue::getConstantPool(), 0,
1949                               false, false, 0);
1950  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1951  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1952}
1953
1954SDValue
1955ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
1956  DebugLoc dl = Op.getDebugLoc();
1957  SDValue Val = DAG.getConstant(0, MVT::i32);
1958  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
1959                     Op.getOperand(1), Val);
1960}
1961
1962SDValue
1963ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1964  DebugLoc dl = Op.getDebugLoc();
1965  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1966                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
1967}
1968
1969SDValue
1970ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1971                                          const ARMSubtarget *Subtarget) const {
1972  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1973  DebugLoc dl = Op.getDebugLoc();
1974  switch (IntNo) {
1975  default: return SDValue();    // Don't custom lower most intrinsics.
1976  case Intrinsic::arm_thread_pointer: {
1977    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1978    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1979  }
1980  case Intrinsic::eh_sjlj_lsda: {
1981    MachineFunction &MF = DAG.getMachineFunction();
1982    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1983    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1984    EVT PtrVT = getPointerTy();
1985    DebugLoc dl = Op.getDebugLoc();
1986    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1987    SDValue CPAddr;
1988    unsigned PCAdj = (RelocM != Reloc::PIC_)
1989      ? 0 : (Subtarget->isThumb() ? 4 : 8);
1990    ARMConstantPoolValue *CPV =
1991      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
1992                               ARMCP::CPLSDA, PCAdj);
1993    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1994    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1995    SDValue Result =
1996      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1997                  PseudoSourceValue::getConstantPool(), 0,
1998                  false, false, 0);
1999
2000    if (RelocM == Reloc::PIC_) {
2001      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2002      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2003    }
2004    return Result;
2005  }
2006  }
2007}
2008
2009static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2010                               const ARMSubtarget *Subtarget) {
2011  DebugLoc dl = Op.getDebugLoc();
2012  SDValue Op5 = Op.getOperand(5);
2013  unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
2014  // Some subtargets which have dmb and dsb instructions can handle barriers
2015  // directly. Some ARMv6 cpus can support them with the help of mcr
2016  // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
2017  // never get here.
2018  unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
2019  if (Subtarget->hasDataBarrier())
2020    return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
2021  else {
2022    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
2023           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2024    return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
2025                       DAG.getConstant(0, MVT::i32));
2026  }
2027}
2028
2029static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2030  MachineFunction &MF = DAG.getMachineFunction();
2031  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2032
2033  // vastart just stores the address of the VarArgsFrameIndex slot into the
2034  // memory location argument.
2035  DebugLoc dl = Op.getDebugLoc();
2036  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2037  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2038  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2039  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
2040                      false, false, 0);
2041}
2042
2043SDValue
2044ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2045                                        SDValue &Root, SelectionDAG &DAG,
2046                                        DebugLoc dl) const {
2047  MachineFunction &MF = DAG.getMachineFunction();
2048  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2049
2050  TargetRegisterClass *RC;
2051  if (AFI->isThumb1OnlyFunction())
2052    RC = ARM::tGPRRegisterClass;
2053  else
2054    RC = ARM::GPRRegisterClass;
2055
2056  // Transform the arguments stored in physical registers into virtual ones.
2057  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2058  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2059
2060  SDValue ArgValue2;
2061  if (NextVA.isMemLoc()) {
2062    MachineFrameInfo *MFI = MF.getFrameInfo();
2063    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2064
2065    // Create load node to retrieve arguments from the stack.
2066    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2067    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2068                            PseudoSourceValue::getFixedStack(FI), 0,
2069                            false, false, 0);
2070  } else {
2071    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2072    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2073  }
2074
2075  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2076}
2077
2078SDValue
2079ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2080                                        CallingConv::ID CallConv, bool isVarArg,
2081                                        const SmallVectorImpl<ISD::InputArg>
2082                                          &Ins,
2083                                        DebugLoc dl, SelectionDAG &DAG,
2084                                        SmallVectorImpl<SDValue> &InVals)
2085                                          const {
2086
2087  MachineFunction &MF = DAG.getMachineFunction();
2088  MachineFrameInfo *MFI = MF.getFrameInfo();
2089
2090  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2091
2092  // Assign locations to all of the incoming arguments.
2093  SmallVector<CCValAssign, 16> ArgLocs;
2094  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
2095                 *DAG.getContext());
2096  CCInfo.AnalyzeFormalArguments(Ins,
2097                                CCAssignFnForNode(CallConv, /* Return*/ false,
2098                                                  isVarArg));
2099
2100  SmallVector<SDValue, 16> ArgValues;
2101
2102  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2103    CCValAssign &VA = ArgLocs[i];
2104
2105    // Arguments stored in registers.
2106    if (VA.isRegLoc()) {
2107      EVT RegVT = VA.getLocVT();
2108
2109      SDValue ArgValue;
2110      if (VA.needsCustom()) {
2111        // f64 and vector types are split up into multiple registers or
2112        // combinations of registers and stack slots.
2113        if (VA.getLocVT() == MVT::v2f64) {
2114          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2115                                                   Chain, DAG, dl);
2116          VA = ArgLocs[++i]; // skip ahead to next loc
2117          SDValue ArgValue2;
2118          if (VA.isMemLoc()) {
2119            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2120            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2121            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2122                                    PseudoSourceValue::getFixedStack(FI), 0,
2123                                    false, false, 0);
2124          } else {
2125            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2126                                             Chain, DAG, dl);
2127          }
2128          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2129          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2130                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2131          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2132                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2133        } else
2134          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2135
2136      } else {
2137        TargetRegisterClass *RC;
2138
2139        if (RegVT == MVT::f32)
2140          RC = ARM::SPRRegisterClass;
2141        else if (RegVT == MVT::f64)
2142          RC = ARM::DPRRegisterClass;
2143        else if (RegVT == MVT::v2f64)
2144          RC = ARM::QPRRegisterClass;
2145        else if (RegVT == MVT::i32)
2146          RC = (AFI->isThumb1OnlyFunction() ?
2147                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2148        else
2149          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2150
2151        // Transform the arguments in physical registers into virtual ones.
2152        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2153        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2154      }
2155
2156      // If this is an 8 or 16-bit value, it is really passed promoted
2157      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2158      // truncate to the right size.
2159      switch (VA.getLocInfo()) {
2160      default: llvm_unreachable("Unknown loc info!");
2161      case CCValAssign::Full: break;
2162      case CCValAssign::BCvt:
2163        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
2164        break;
2165      case CCValAssign::SExt:
2166        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2167                               DAG.getValueType(VA.getValVT()));
2168        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2169        break;
2170      case CCValAssign::ZExt:
2171        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2172                               DAG.getValueType(VA.getValVT()));
2173        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2174        break;
2175      }
2176
2177      InVals.push_back(ArgValue);
2178
2179    } else { // VA.isRegLoc()
2180
2181      // sanity check
2182      assert(VA.isMemLoc());
2183      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2184
2185      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
2186      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
2187
2188      // Create load nodes to retrieve arguments from the stack.
2189      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2190      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2191                                   PseudoSourceValue::getFixedStack(FI), 0,
2192                                   false, false, 0));
2193    }
2194  }
2195
2196  // varargs
2197  if (isVarArg) {
2198    static const unsigned GPRArgRegs[] = {
2199      ARM::R0, ARM::R1, ARM::R2, ARM::R3
2200    };
2201
2202    unsigned NumGPRs = CCInfo.getFirstUnallocated
2203      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2204
2205    unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
2206    unsigned VARegSize = (4 - NumGPRs) * 4;
2207    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2208    unsigned ArgOffset = CCInfo.getNextStackOffset();
2209    if (VARegSaveSize) {
2210      // If this function is vararg, store any remaining integer argument regs
2211      // to their spots on the stack so that they may be loaded by deferencing
2212      // the result of va_next.
2213      AFI->setVarArgsRegSaveSize(VARegSaveSize);
2214      AFI->setVarArgsFrameIndex(
2215        MFI->CreateFixedObject(VARegSaveSize,
2216                               ArgOffset + VARegSaveSize - VARegSize,
2217                               true));
2218      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2219                                      getPointerTy());
2220
2221      SmallVector<SDValue, 4> MemOps;
2222      for (; NumGPRs < 4; ++NumGPRs) {
2223        TargetRegisterClass *RC;
2224        if (AFI->isThumb1OnlyFunction())
2225          RC = ARM::tGPRRegisterClass;
2226        else
2227          RC = ARM::GPRRegisterClass;
2228
2229        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
2230        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2231        SDValue Store =
2232          DAG.getStore(Val.getValue(1), dl, Val, FIN,
2233               PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
2234               0, false, false, 0);
2235        MemOps.push_back(Store);
2236        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2237                          DAG.getConstant(4, getPointerTy()));
2238      }
2239      if (!MemOps.empty())
2240        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2241                            &MemOps[0], MemOps.size());
2242    } else
2243      // This will point to the next argument passed via stack.
2244      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2245  }
2246
2247  return Chain;
2248}
2249
2250/// isFloatingPointZero - Return true if this is +0.0.
2251static bool isFloatingPointZero(SDValue Op) {
2252  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2253    return CFP->getValueAPF().isPosZero();
2254  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2255    // Maybe this has already been legalized into the constant pool?
2256    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2257      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2258      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2259        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2260          return CFP->getValueAPF().isPosZero();
2261    }
2262  }
2263  return false;
2264}
2265
2266/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2267/// the given operands.
2268SDValue
2269ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2270                             SDValue &ARMcc, SelectionDAG &DAG,
2271                             DebugLoc dl) const {
2272  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2273    unsigned C = RHSC->getZExtValue();
2274    if (!isLegalICmpImmediate(C)) {
2275      // Constant does not fit, try adjusting it by one?
2276      switch (CC) {
2277      default: break;
2278      case ISD::SETLT:
2279      case ISD::SETGE:
2280        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2281          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2282          RHS = DAG.getConstant(C-1, MVT::i32);
2283        }
2284        break;
2285      case ISD::SETULT:
2286      case ISD::SETUGE:
2287        if (C != 0 && isLegalICmpImmediate(C-1)) {
2288          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2289          RHS = DAG.getConstant(C-1, MVT::i32);
2290        }
2291        break;
2292      case ISD::SETLE:
2293      case ISD::SETGT:
2294        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2295          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2296          RHS = DAG.getConstant(C+1, MVT::i32);
2297        }
2298        break;
2299      case ISD::SETULE:
2300      case ISD::SETUGT:
2301        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2302          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2303          RHS = DAG.getConstant(C+1, MVT::i32);
2304        }
2305        break;
2306      }
2307    }
2308  }
2309
2310  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2311  ARMISD::NodeType CompareType;
2312  switch (CondCode) {
2313  default:
2314    CompareType = ARMISD::CMP;
2315    break;
2316  case ARMCC::EQ:
2317  case ARMCC::NE:
2318    // Uses only Z Flag
2319    CompareType = ARMISD::CMPZ;
2320    break;
2321  }
2322  ARMcc = DAG.getConstant(CondCode, MVT::i32);
2323  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2324}
2325
2326/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2327SDValue
2328ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2329                             DebugLoc dl) const {
2330  SDValue Cmp;
2331  if (!isFloatingPointZero(RHS))
2332    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
2333  else
2334    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
2335  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
2336}
2337
2338SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2339  SDValue Cond = Op.getOperand(0);
2340  SDValue SelectTrue = Op.getOperand(1);
2341  SDValue SelectFalse = Op.getOperand(2);
2342  DebugLoc dl = Op.getDebugLoc();
2343
2344  // Convert:
2345  //
2346  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2347  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2348  //
2349  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2350    const ConstantSDNode *CMOVTrue =
2351      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2352    const ConstantSDNode *CMOVFalse =
2353      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2354
2355    if (CMOVTrue && CMOVFalse) {
2356      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2357      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2358
2359      SDValue True;
2360      SDValue False;
2361      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2362        True = SelectTrue;
2363        False = SelectFalse;
2364      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2365        True = SelectFalse;
2366        False = SelectTrue;
2367      }
2368
2369      if (True.getNode() && False.getNode()) {
2370        EVT VT = Cond.getValueType();
2371        SDValue ARMcc = Cond.getOperand(2);
2372        SDValue CCR = Cond.getOperand(3);
2373        SDValue Cmp = Cond.getOperand(4);
2374        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2375      }
2376    }
2377  }
2378
2379  return DAG.getSelectCC(dl, Cond,
2380                         DAG.getConstant(0, Cond.getValueType()),
2381                         SelectTrue, SelectFalse, ISD::SETNE);
2382}
2383
2384SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2385  EVT VT = Op.getValueType();
2386  SDValue LHS = Op.getOperand(0);
2387  SDValue RHS = Op.getOperand(1);
2388  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2389  SDValue TrueVal = Op.getOperand(2);
2390  SDValue FalseVal = Op.getOperand(3);
2391  DebugLoc dl = Op.getDebugLoc();
2392
2393  if (LHS.getValueType() == MVT::i32) {
2394    SDValue ARMcc;
2395    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2396    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2397    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2398  }
2399
2400  ARMCC::CondCodes CondCode, CondCode2;
2401  FPCCToARMCC(CC, CondCode, CondCode2);
2402
2403  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2404  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2405  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2406  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2407                               ARMcc, CCR, Cmp);
2408  if (CondCode2 != ARMCC::AL) {
2409    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2410    // FIXME: Needs another CMP because flag can have but one use.
2411    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2412    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2413                         Result, TrueVal, ARMcc2, CCR, Cmp2);
2414  }
2415  return Result;
2416}
2417
2418/// canChangeToInt - Given the fp compare operand, return true if it is suitable
2419/// to morph to an integer compare sequence.
2420static bool canChangeToInt(SDValue Op, bool &SeenZero,
2421                           const ARMSubtarget *Subtarget) {
2422  SDNode *N = Op.getNode();
2423  if (!N->hasOneUse())
2424    // Otherwise it requires moving the value from fp to integer registers.
2425    return false;
2426  if (!N->getNumValues())
2427    return false;
2428  EVT VT = Op.getValueType();
2429  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2430    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2431    // vmrs are very slow, e.g. cortex-a8.
2432    return false;
2433
2434  if (isFloatingPointZero(Op)) {
2435    SeenZero = true;
2436    return true;
2437  }
2438  return ISD::isNormalLoad(N);
2439}
2440
2441static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2442  if (isFloatingPointZero(Op))
2443    return DAG.getConstant(0, MVT::i32);
2444
2445  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2446    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2447                       Ld->getChain(), Ld->getBasePtr(),
2448                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
2449                       Ld->isVolatile(), Ld->isNonTemporal(),
2450                       Ld->getAlignment());
2451
2452  llvm_unreachable("Unknown VFP cmp argument!");
2453}
2454
2455static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2456                           SDValue &RetVal1, SDValue &RetVal2) {
2457  if (isFloatingPointZero(Op)) {
2458    RetVal1 = DAG.getConstant(0, MVT::i32);
2459    RetVal2 = DAG.getConstant(0, MVT::i32);
2460    return;
2461  }
2462
2463  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2464    SDValue Ptr = Ld->getBasePtr();
2465    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2466                          Ld->getChain(), Ptr,
2467                          Ld->getSrcValue(), Ld->getSrcValueOffset(),
2468                          Ld->isVolatile(), Ld->isNonTemporal(),
2469                          Ld->getAlignment());
2470
2471    EVT PtrType = Ptr.getValueType();
2472    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2473    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2474                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
2475    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2476                          Ld->getChain(), NewPtr,
2477                          Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
2478                          Ld->isVolatile(), Ld->isNonTemporal(),
2479                          NewAlign);
2480    return;
2481  }
2482
2483  llvm_unreachable("Unknown VFP cmp argument!");
2484}
2485
2486/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2487/// f32 and even f64 comparisons to integer ones.
2488SDValue
2489ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2490  SDValue Chain = Op.getOperand(0);
2491  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2492  SDValue LHS = Op.getOperand(2);
2493  SDValue RHS = Op.getOperand(3);
2494  SDValue Dest = Op.getOperand(4);
2495  DebugLoc dl = Op.getDebugLoc();
2496
2497  bool SeenZero = false;
2498  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
2499      canChangeToInt(RHS, SeenZero, Subtarget) &&
2500      // If one of the operand is zero, it's safe to ignore the NaN case since
2501      // we only care about equality comparisons.
2502      (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
2503    // If unsafe fp math optimization is enabled and there are no othter uses of
2504    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
2505    // to an integer comparison.
2506    if (CC == ISD::SETOEQ)
2507      CC = ISD::SETEQ;
2508    else if (CC == ISD::SETUNE)
2509      CC = ISD::SETNE;
2510
2511    SDValue ARMcc;
2512    if (LHS.getValueType() == MVT::f32) {
2513      LHS = bitcastf32Toi32(LHS, DAG);
2514      RHS = bitcastf32Toi32(RHS, DAG);
2515      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2516      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2517      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2518                         Chain, Dest, ARMcc, CCR, Cmp);
2519    }
2520
2521    SDValue LHS1, LHS2;
2522    SDValue RHS1, RHS2;
2523    expandf64Toi32(LHS, DAG, LHS1, LHS2);
2524    expandf64Toi32(RHS, DAG, RHS1, RHS2);
2525    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2526    ARMcc = DAG.getConstant(CondCode, MVT::i32);
2527    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2528    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
2529    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
2530  }
2531
2532  return SDValue();
2533}
2534
2535SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2536  SDValue Chain = Op.getOperand(0);
2537  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2538  SDValue LHS = Op.getOperand(2);
2539  SDValue RHS = Op.getOperand(3);
2540  SDValue Dest = Op.getOperand(4);
2541  DebugLoc dl = Op.getDebugLoc();
2542
2543  if (LHS.getValueType() == MVT::i32) {
2544    SDValue ARMcc;
2545    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2546    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2547    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2548                       Chain, Dest, ARMcc, CCR, Cmp);
2549  }
2550
2551  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2552
2553  if (UnsafeFPMath &&
2554      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2555       CC == ISD::SETNE || CC == ISD::SETUNE)) {
2556    SDValue Result = OptimizeVFPBrcond(Op, DAG);
2557    if (Result.getNode())
2558      return Result;
2559  }
2560
2561  ARMCC::CondCodes CondCode, CondCode2;
2562  FPCCToARMCC(CC, CondCode, CondCode2);
2563
2564  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2565  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2566  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2567  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2568  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
2569  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2570  if (CondCode2 != ARMCC::AL) {
2571    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
2572    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
2573    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2574  }
2575  return Res;
2576}
2577
2578SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2579  SDValue Chain = Op.getOperand(0);
2580  SDValue Table = Op.getOperand(1);
2581  SDValue Index = Op.getOperand(2);
2582  DebugLoc dl = Op.getDebugLoc();
2583
2584  EVT PTy = getPointerTy();
2585  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2586  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2587  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2588  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2589  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2590  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2591  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2592  if (Subtarget->isThumb2()) {
2593    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2594    // which does another jump to the destination. This also makes it easier
2595    // to translate it to TBB / TBH later.
2596    // FIXME: This might not work if the function is extremely large.
2597    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2598                       Addr, Op.getOperand(2), JTI, UId);
2599  }
2600  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2601    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2602                       PseudoSourceValue::getJumpTable(), 0,
2603                       false, false, 0);
2604    Chain = Addr.getValue(1);
2605    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2606    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2607  } else {
2608    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2609                       PseudoSourceValue::getJumpTable(), 0, false, false, 0);
2610    Chain = Addr.getValue(1);
2611    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2612  }
2613}
2614
2615static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2616  DebugLoc dl = Op.getDebugLoc();
2617  unsigned Opc;
2618
2619  switch (Op.getOpcode()) {
2620  default:
2621    assert(0 && "Invalid opcode!");
2622  case ISD::FP_TO_SINT:
2623    Opc = ARMISD::FTOSI;
2624    break;
2625  case ISD::FP_TO_UINT:
2626    Opc = ARMISD::FTOUI;
2627    break;
2628  }
2629  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2630  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2631}
2632
2633static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2634  EVT VT = Op.getValueType();
2635  DebugLoc dl = Op.getDebugLoc();
2636  unsigned Opc;
2637
2638  switch (Op.getOpcode()) {
2639  default:
2640    assert(0 && "Invalid opcode!");
2641  case ISD::SINT_TO_FP:
2642    Opc = ARMISD::SITOF;
2643    break;
2644  case ISD::UINT_TO_FP:
2645    Opc = ARMISD::UITOF;
2646    break;
2647  }
2648
2649  Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2650  return DAG.getNode(Opc, dl, VT, Op);
2651}
2652
2653SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
2654  // Implement fcopysign with a fabs and a conditional fneg.
2655  SDValue Tmp0 = Op.getOperand(0);
2656  SDValue Tmp1 = Op.getOperand(1);
2657  DebugLoc dl = Op.getDebugLoc();
2658  EVT VT = Op.getValueType();
2659  EVT SrcVT = Tmp1.getValueType();
2660  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2661  SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
2662  SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
2663  SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
2664  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2665  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
2666}
2667
2668SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2669  MachineFunction &MF = DAG.getMachineFunction();
2670  MachineFrameInfo *MFI = MF.getFrameInfo();
2671  MFI->setReturnAddressIsTaken(true);
2672
2673  EVT VT = Op.getValueType();
2674  DebugLoc dl = Op.getDebugLoc();
2675  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2676  if (Depth) {
2677    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2678    SDValue Offset = DAG.getConstant(4, MVT::i32);
2679    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2680                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2681                       NULL, 0, false, false, 0);
2682  }
2683
2684  // Return LR, which contains the return address. Mark it an implicit live-in.
2685  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
2686  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2687}
2688
2689SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2690  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2691  MFI->setFrameAddressIsTaken(true);
2692
2693  EVT VT = Op.getValueType();
2694  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2695  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2696  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2697    ? ARM::R7 : ARM::R11;
2698  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2699  while (Depth--)
2700    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
2701                            false, false, 0);
2702  return FrameAddr;
2703}
2704
2705/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2706/// expand a bit convert where either the source or destination type is i64 to
2707/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2708/// operand type is illegal (e.g., v2f32 for a target that doesn't support
2709/// vectors), since the legalizer won't know what to do with that.
2710static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2711  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2712  DebugLoc dl = N->getDebugLoc();
2713  SDValue Op = N->getOperand(0);
2714
2715  // This function is only supposed to be called for i64 types, either as the
2716  // source or destination of the bit convert.
2717  EVT SrcVT = Op.getValueType();
2718  EVT DstVT = N->getValueType(0);
2719  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2720         "ExpandBIT_CONVERT called for non-i64 type");
2721
2722  // Turn i64->f64 into VMOVDRR.
2723  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2724    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2725                             DAG.getConstant(0, MVT::i32));
2726    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2727                             DAG.getConstant(1, MVT::i32));
2728    return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
2729                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
2730  }
2731
2732  // Turn f64->i64 into VMOVRRD.
2733  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2734    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2735                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2736    // Merge the pieces into a single i64 value.
2737    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2738  }
2739
2740  return SDValue();
2741}
2742
2743/// getZeroVector - Returns a vector of specified type with all zero elements.
2744/// Zero vectors are used to represent vector negation and in those cases
2745/// will be implemented with the NEON VNEG instruction.  However, VNEG does
2746/// not support i64 elements, so sometimes the zero vectors will need to be
2747/// explicitly constructed.  Regardless, use a canonical VMOV to create the
2748/// zero vector.
2749static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2750  assert(VT.isVector() && "Expected a vector type");
2751  // The canonical modified immediate encoding of a zero vector is....0!
2752  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
2753  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
2754  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
2755  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
2756}
2757
2758/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2759/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2760SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2761                                                SelectionDAG &DAG) const {
2762  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2763  EVT VT = Op.getValueType();
2764  unsigned VTBits = VT.getSizeInBits();
2765  DebugLoc dl = Op.getDebugLoc();
2766  SDValue ShOpLo = Op.getOperand(0);
2767  SDValue ShOpHi = Op.getOperand(1);
2768  SDValue ShAmt  = Op.getOperand(2);
2769  SDValue ARMcc;
2770  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2771
2772  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2773
2774  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2775                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2776  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2777  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2778                                   DAG.getConstant(VTBits, MVT::i32));
2779  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2780  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2781  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2782
2783  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2784  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2785                          ARMcc, DAG, dl);
2786  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2787  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
2788                           CCR, Cmp);
2789
2790  SDValue Ops[2] = { Lo, Hi };
2791  return DAG.getMergeValues(Ops, 2, dl);
2792}
2793
2794/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2795/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2796SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2797                                               SelectionDAG &DAG) const {
2798  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2799  EVT VT = Op.getValueType();
2800  unsigned VTBits = VT.getSizeInBits();
2801  DebugLoc dl = Op.getDebugLoc();
2802  SDValue ShOpLo = Op.getOperand(0);
2803  SDValue ShOpHi = Op.getOperand(1);
2804  SDValue ShAmt  = Op.getOperand(2);
2805  SDValue ARMcc;
2806
2807  assert(Op.getOpcode() == ISD::SHL_PARTS);
2808  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2809                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2810  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2811  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2812                                   DAG.getConstant(VTBits, MVT::i32));
2813  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2814  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2815
2816  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2817  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2818  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2819                          ARMcc, DAG, dl);
2820  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2821  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
2822                           CCR, Cmp);
2823
2824  SDValue Ops[2] = { Lo, Hi };
2825  return DAG.getMergeValues(Ops, 2, dl);
2826}
2827
2828SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2829                                            SelectionDAG &DAG) const {
2830  // The rounding mode is in bits 23:22 of the FPSCR.
2831  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2832  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2833  // so that the shift + and get folded into a bitfield extract.
2834  DebugLoc dl = Op.getDebugLoc();
2835  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
2836                              DAG.getConstant(Intrinsic::arm_get_fpscr,
2837                                              MVT::i32));
2838  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
2839                                  DAG.getConstant(1U << 22, MVT::i32));
2840  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2841                              DAG.getConstant(22, MVT::i32));
2842  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2843                     DAG.getConstant(3, MVT::i32));
2844}
2845
2846static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2847                         const ARMSubtarget *ST) {
2848  EVT VT = N->getValueType(0);
2849  DebugLoc dl = N->getDebugLoc();
2850
2851  if (!ST->hasV6T2Ops())
2852    return SDValue();
2853
2854  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2855  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2856}
2857
2858static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2859                          const ARMSubtarget *ST) {
2860  EVT VT = N->getValueType(0);
2861  DebugLoc dl = N->getDebugLoc();
2862
2863  // Lower vector shifts on NEON to use VSHL.
2864  if (VT.isVector()) {
2865    assert(ST->hasNEON() && "unexpected vector shift");
2866
2867    // Left shifts translate directly to the vshiftu intrinsic.
2868    if (N->getOpcode() == ISD::SHL)
2869      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2870                         DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2871                         N->getOperand(0), N->getOperand(1));
2872
2873    assert((N->getOpcode() == ISD::SRA ||
2874            N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2875
2876    // NEON uses the same intrinsics for both left and right shifts.  For
2877    // right shifts, the shift amounts are negative, so negate the vector of
2878    // shift amounts.
2879    EVT ShiftVT = N->getOperand(1).getValueType();
2880    SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2881                                       getZeroVector(ShiftVT, DAG, dl),
2882                                       N->getOperand(1));
2883    Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2884                               Intrinsic::arm_neon_vshifts :
2885                               Intrinsic::arm_neon_vshiftu);
2886    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2887                       DAG.getConstant(vshiftInt, MVT::i32),
2888                       N->getOperand(0), NegatedCount);
2889  }
2890
2891  // We can get here for a node like i32 = ISD::SHL i32, i64
2892  if (VT != MVT::i64)
2893    return SDValue();
2894
2895  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2896         "Unknown shift to lower!");
2897
2898  // We only lower SRA, SRL of 1 here, all others use generic lowering.
2899  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2900      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2901    return SDValue();
2902
2903  // If we are in thumb mode, we don't have RRX.
2904  if (ST->isThumb1Only()) return SDValue();
2905
2906  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2907  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2908                           DAG.getConstant(0, MVT::i32));
2909  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2910                           DAG.getConstant(1, MVT::i32));
2911
2912  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2913  // captures the result into a carry flag.
2914  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2915  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2916
2917  // The low part is an ARMISD::RRX operand, which shifts the carry in.
2918  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2919
2920  // Merge the pieces into a single i64 value.
2921 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2922}
2923
2924static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2925  SDValue TmpOp0, TmpOp1;
2926  bool Invert = false;
2927  bool Swap = false;
2928  unsigned Opc = 0;
2929
2930  SDValue Op0 = Op.getOperand(0);
2931  SDValue Op1 = Op.getOperand(1);
2932  SDValue CC = Op.getOperand(2);
2933  EVT VT = Op.getValueType();
2934  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2935  DebugLoc dl = Op.getDebugLoc();
2936
2937  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2938    switch (SetCCOpcode) {
2939    default: llvm_unreachable("Illegal FP comparison"); break;
2940    case ISD::SETUNE:
2941    case ISD::SETNE:  Invert = true; // Fallthrough
2942    case ISD::SETOEQ:
2943    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2944    case ISD::SETOLT:
2945    case ISD::SETLT: Swap = true; // Fallthrough
2946    case ISD::SETOGT:
2947    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2948    case ISD::SETOLE:
2949    case ISD::SETLE:  Swap = true; // Fallthrough
2950    case ISD::SETOGE:
2951    case ISD::SETGE: Opc = ARMISD::VCGE; break;
2952    case ISD::SETUGE: Swap = true; // Fallthrough
2953    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2954    case ISD::SETUGT: Swap = true; // Fallthrough
2955    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2956    case ISD::SETUEQ: Invert = true; // Fallthrough
2957    case ISD::SETONE:
2958      // Expand this to (OLT | OGT).
2959      TmpOp0 = Op0;
2960      TmpOp1 = Op1;
2961      Opc = ISD::OR;
2962      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2963      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2964      break;
2965    case ISD::SETUO: Invert = true; // Fallthrough
2966    case ISD::SETO:
2967      // Expand this to (OLT | OGE).
2968      TmpOp0 = Op0;
2969      TmpOp1 = Op1;
2970      Opc = ISD::OR;
2971      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2972      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2973      break;
2974    }
2975  } else {
2976    // Integer comparisons.
2977    switch (SetCCOpcode) {
2978    default: llvm_unreachable("Illegal integer comparison"); break;
2979    case ISD::SETNE:  Invert = true;
2980    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2981    case ISD::SETLT:  Swap = true;
2982    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2983    case ISD::SETLE:  Swap = true;
2984    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
2985    case ISD::SETULT: Swap = true;
2986    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2987    case ISD::SETULE: Swap = true;
2988    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2989    }
2990
2991    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2992    if (Opc == ARMISD::VCEQ) {
2993
2994      SDValue AndOp;
2995      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2996        AndOp = Op0;
2997      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2998        AndOp = Op1;
2999
3000      // Ignore bitconvert.
3001      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
3002        AndOp = AndOp.getOperand(0);
3003
3004      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
3005        Opc = ARMISD::VTST;
3006        Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
3007        Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
3008        Invert = !Invert;
3009      }
3010    }
3011  }
3012
3013  if (Swap)
3014    std::swap(Op0, Op1);
3015
3016  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3017
3018  if (Invert)
3019    Result = DAG.getNOT(dl, Result, VT);
3020
3021  return Result;
3022}
3023
3024/// isNEONModifiedImm - Check if the specified splat value corresponds to a
3025/// valid vector constant for a NEON instruction with a "modified immediate"
3026/// operand (e.g., VMOV).  If so, return the encoded value.
3027static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3028                                 unsigned SplatBitSize, SelectionDAG &DAG,
3029                                 EVT &VT, bool is128Bits, bool isVMOV) {
3030  unsigned OpCmode, Imm;
3031
3032  // SplatBitSize is set to the smallest size that splats the vector, so a
3033  // zero vector will always have SplatBitSize == 8.  However, NEON modified
3034  // immediate instructions others than VMOV do not support the 8-bit encoding
3035  // of a zero vector, and the default encoding of zero is supposed to be the
3036  // 32-bit version.
3037  if (SplatBits == 0)
3038    SplatBitSize = 32;
3039
3040  switch (SplatBitSize) {
3041  case 8:
3042    if (!isVMOV)
3043      return SDValue();
3044    // Any 1-byte value is OK.  Op=0, Cmode=1110.
3045    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3046    OpCmode = 0xe;
3047    Imm = SplatBits;
3048    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3049    break;
3050
3051  case 16:
3052    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3053    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3054    if ((SplatBits & ~0xff) == 0) {
3055      // Value = 0x00nn: Op=x, Cmode=100x.
3056      OpCmode = 0x8;
3057      Imm = SplatBits;
3058      break;
3059    }
3060    if ((SplatBits & ~0xff00) == 0) {
3061      // Value = 0xnn00: Op=x, Cmode=101x.
3062      OpCmode = 0xa;
3063      Imm = SplatBits >> 8;
3064      break;
3065    }
3066    return SDValue();
3067
3068  case 32:
3069    // NEON's 32-bit VMOV supports splat values where:
3070    // * only one byte is nonzero, or
3071    // * the least significant byte is 0xff and the second byte is nonzero, or
3072    // * the least significant 2 bytes are 0xff and the third is nonzero.
3073    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3074    if ((SplatBits & ~0xff) == 0) {
3075      // Value = 0x000000nn: Op=x, Cmode=000x.
3076      OpCmode = 0;
3077      Imm = SplatBits;
3078      break;
3079    }
3080    if ((SplatBits & ~0xff00) == 0) {
3081      // Value = 0x0000nn00: Op=x, Cmode=001x.
3082      OpCmode = 0x2;
3083      Imm = SplatBits >> 8;
3084      break;
3085    }
3086    if ((SplatBits & ~0xff0000) == 0) {
3087      // Value = 0x00nn0000: Op=x, Cmode=010x.
3088      OpCmode = 0x4;
3089      Imm = SplatBits >> 16;
3090      break;
3091    }
3092    if ((SplatBits & ~0xff000000) == 0) {
3093      // Value = 0xnn000000: Op=x, Cmode=011x.
3094      OpCmode = 0x6;
3095      Imm = SplatBits >> 24;
3096      break;
3097    }
3098
3099    if ((SplatBits & ~0xffff) == 0 &&
3100        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3101      // Value = 0x0000nnff: Op=x, Cmode=1100.
3102      OpCmode = 0xc;
3103      Imm = SplatBits >> 8;
3104      SplatBits |= 0xff;
3105      break;
3106    }
3107
3108    if ((SplatBits & ~0xffffff) == 0 &&
3109        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3110      // Value = 0x00nnffff: Op=x, Cmode=1101.
3111      OpCmode = 0xd;
3112      Imm = SplatBits >> 16;
3113      SplatBits |= 0xffff;
3114      break;
3115    }
3116
3117    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3118    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3119    // VMOV.I32.  A (very) minor optimization would be to replicate the value
3120    // and fall through here to test for a valid 64-bit splat.  But, then the
3121    // caller would also need to check and handle the change in size.
3122    return SDValue();
3123
3124  case 64: {
3125    if (!isVMOV)
3126      return SDValue();
3127    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3128    uint64_t BitMask = 0xff;
3129    uint64_t Val = 0;
3130    unsigned ImmMask = 1;
3131    Imm = 0;
3132    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3133      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3134        Val |= BitMask;
3135        Imm |= ImmMask;
3136      } else if ((SplatBits & BitMask) != 0) {
3137        return SDValue();
3138      }
3139      BitMask <<= 8;
3140      ImmMask <<= 1;
3141    }
3142    // Op=1, Cmode=1110.
3143    OpCmode = 0x1e;
3144    SplatBits = Val;
3145    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3146    break;
3147  }
3148
3149  default:
3150    llvm_unreachable("unexpected size for isNEONModifiedImm");
3151    return SDValue();
3152  }
3153
3154  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3155  return DAG.getTargetConstant(EncodedVal, MVT::i32);
3156}
3157
3158static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
3159                       bool &ReverseVEXT, unsigned &Imm) {
3160  unsigned NumElts = VT.getVectorNumElements();
3161  ReverseVEXT = false;
3162
3163  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3164  if (M[0] < 0)
3165    return false;
3166
3167  Imm = M[0];
3168
3169  // If this is a VEXT shuffle, the immediate value is the index of the first
3170  // element.  The other shuffle indices must be the successive elements after
3171  // the first one.
3172  unsigned ExpectedElt = Imm;
3173  for (unsigned i = 1; i < NumElts; ++i) {
3174    // Increment the expected index.  If it wraps around, it may still be
3175    // a VEXT but the source vectors must be swapped.
3176    ExpectedElt += 1;
3177    if (ExpectedElt == NumElts * 2) {
3178      ExpectedElt = 0;
3179      ReverseVEXT = true;
3180    }
3181
3182    if (M[i] < 0) continue; // ignore UNDEF indices
3183    if (ExpectedElt != static_cast<unsigned>(M[i]))
3184      return false;
3185  }
3186
3187  // Adjust the index value if the source operands will be swapped.
3188  if (ReverseVEXT)
3189    Imm -= NumElts;
3190
3191  return true;
3192}
3193
3194/// isVREVMask - Check if a vector shuffle corresponds to a VREV
3195/// instruction with the specified blocksize.  (The order of the elements
3196/// within each block of the vector is reversed.)
3197static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
3198                       unsigned BlockSize) {
3199  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3200         "Only possible block sizes for VREV are: 16, 32, 64");
3201
3202  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3203  if (EltSz == 64)
3204    return false;
3205
3206  unsigned NumElts = VT.getVectorNumElements();
3207  unsigned BlockElts = M[0] + 1;
3208  // If the first shuffle index is UNDEF, be optimistic.
3209  if (M[0] < 0)
3210    BlockElts = BlockSize / EltSz;
3211
3212  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3213    return false;
3214
3215  for (unsigned i = 0; i < NumElts; ++i) {
3216    if (M[i] < 0) continue; // ignore UNDEF indices
3217    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3218      return false;
3219  }
3220
3221  return true;
3222}
3223
3224static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
3225                       unsigned &WhichResult) {
3226  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3227  if (EltSz == 64)
3228    return false;
3229
3230  unsigned NumElts = VT.getVectorNumElements();
3231  WhichResult = (M[0] == 0 ? 0 : 1);
3232  for (unsigned i = 0; i < NumElts; i += 2) {
3233    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3234        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3235      return false;
3236  }
3237  return true;
3238}
3239
3240/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3241/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3242/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3243static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3244                                unsigned &WhichResult) {
3245  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3246  if (EltSz == 64)
3247    return false;
3248
3249  unsigned NumElts = VT.getVectorNumElements();
3250  WhichResult = (M[0] == 0 ? 0 : 1);
3251  for (unsigned i = 0; i < NumElts; i += 2) {
3252    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3253        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
3254      return false;
3255  }
3256  return true;
3257}
3258
3259static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
3260                       unsigned &WhichResult) {
3261  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3262  if (EltSz == 64)
3263    return false;
3264
3265  unsigned NumElts = VT.getVectorNumElements();
3266  WhichResult = (M[0] == 0 ? 0 : 1);
3267  for (unsigned i = 0; i != NumElts; ++i) {
3268    if (M[i] < 0) continue; // ignore UNDEF indices
3269    if ((unsigned) M[i] != 2 * i + WhichResult)
3270      return false;
3271  }
3272
3273  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3274  if (VT.is64BitVector() && EltSz == 32)
3275    return false;
3276
3277  return true;
3278}
3279
3280/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3281/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3282/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3283static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3284                                unsigned &WhichResult) {
3285  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3286  if (EltSz == 64)
3287    return false;
3288
3289  unsigned Half = VT.getVectorNumElements() / 2;
3290  WhichResult = (M[0] == 0 ? 0 : 1);
3291  for (unsigned j = 0; j != 2; ++j) {
3292    unsigned Idx = WhichResult;
3293    for (unsigned i = 0; i != Half; ++i) {
3294      int MIdx = M[i + j * Half];
3295      if (MIdx >= 0 && (unsigned) MIdx != Idx)
3296        return false;
3297      Idx += 2;
3298    }
3299  }
3300
3301  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3302  if (VT.is64BitVector() && EltSz == 32)
3303    return false;
3304
3305  return true;
3306}
3307
3308static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
3309                       unsigned &WhichResult) {
3310  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3311  if (EltSz == 64)
3312    return false;
3313
3314  unsigned NumElts = VT.getVectorNumElements();
3315  WhichResult = (M[0] == 0 ? 0 : 1);
3316  unsigned Idx = WhichResult * NumElts / 2;
3317  for (unsigned i = 0; i != NumElts; i += 2) {
3318    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3319        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
3320      return false;
3321    Idx += 1;
3322  }
3323
3324  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3325  if (VT.is64BitVector() && EltSz == 32)
3326    return false;
3327
3328  return true;
3329}
3330
3331/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3332/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3333/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3334static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3335                                unsigned &WhichResult) {
3336  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3337  if (EltSz == 64)
3338    return false;
3339
3340  unsigned NumElts = VT.getVectorNumElements();
3341  WhichResult = (M[0] == 0 ? 0 : 1);
3342  unsigned Idx = WhichResult * NumElts / 2;
3343  for (unsigned i = 0; i != NumElts; i += 2) {
3344    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3345        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
3346      return false;
3347    Idx += 1;
3348  }
3349
3350  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3351  if (VT.is64BitVector() && EltSz == 32)
3352    return false;
3353
3354  return true;
3355}
3356
3357// If N is an integer constant that can be moved into a register in one
3358// instruction, return an SDValue of such a constant (will become a MOV
3359// instruction).  Otherwise return null.
3360static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
3361                                     const ARMSubtarget *ST, DebugLoc dl) {
3362  uint64_t Val;
3363  if (!isa<ConstantSDNode>(N))
3364    return SDValue();
3365  Val = cast<ConstantSDNode>(N)->getZExtValue();
3366
3367  if (ST->isThumb1Only()) {
3368    if (Val <= 255 || ~Val <= 255)
3369      return DAG.getConstant(Val, MVT::i32);
3370  } else {
3371    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
3372      return DAG.getConstant(Val, MVT::i32);
3373  }
3374  return SDValue();
3375}
3376
3377// If this is a case we can't handle, return null and let the default
3378// expansion code take care of it.
3379static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3380                                 const ARMSubtarget *ST) {
3381  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3382  DebugLoc dl = Op.getDebugLoc();
3383  EVT VT = Op.getValueType();
3384
3385  APInt SplatBits, SplatUndef;
3386  unsigned SplatBitSize;
3387  bool HasAnyUndefs;
3388  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3389    if (SplatBitSize <= 64) {
3390      // Check if an immediate VMOV works.
3391      EVT VmovVT;
3392      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
3393                                      SplatUndef.getZExtValue(), SplatBitSize,
3394                                      DAG, VmovVT, VT.is128BitVector(), true);
3395      if (Val.getNode()) {
3396        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
3397        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
3398      }
3399
3400      // Try an immediate VMVN.
3401      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
3402                             ((1LL << SplatBitSize) - 1));
3403      Val = isNEONModifiedImm(NegatedImm,
3404                                      SplatUndef.getZExtValue(), SplatBitSize,
3405                                      DAG, VmovVT, VT.is128BitVector(), false);
3406      if (Val.getNode()) {
3407        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
3408        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
3409      }
3410    }
3411  }
3412
3413  // Scan through the operands to see if only one value is used.
3414  unsigned NumElts = VT.getVectorNumElements();
3415  bool isOnlyLowElement = true;
3416  bool usesOnlyOneValue = true;
3417  bool isConstant = true;
3418  SDValue Value;
3419  for (unsigned i = 0; i < NumElts; ++i) {
3420    SDValue V = Op.getOperand(i);
3421    if (V.getOpcode() == ISD::UNDEF)
3422      continue;
3423    if (i > 0)
3424      isOnlyLowElement = false;
3425    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3426      isConstant = false;
3427
3428    if (!Value.getNode())
3429      Value = V;
3430    else if (V != Value)
3431      usesOnlyOneValue = false;
3432  }
3433
3434  if (!Value.getNode())
3435    return DAG.getUNDEF(VT);
3436
3437  if (isOnlyLowElement)
3438    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3439
3440  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3441
3442  if (EnableARMVDUPsplat) {
3443    // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
3444    // i32 and try again.
3445    if (usesOnlyOneValue && EltSize <= 32) {
3446      if (!isConstant)
3447        return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3448      if (VT.getVectorElementType().isFloatingPoint()) {
3449        SmallVector<SDValue, 8> Ops;
3450        for (unsigned i = 0; i < NumElts; ++i)
3451          Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
3452                                    Op.getOperand(i)));
3453        SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
3454                                  NumElts);
3455        return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
3456                           LowerBUILD_VECTOR(Val, DAG, ST));
3457      }
3458      SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
3459      if (Val.getNode())
3460        return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
3461    }
3462  }
3463
3464  // If all elements are constants and the case above didn't get hit, fall back
3465  // to the default expansion, which will generate a load from the constant
3466  // pool.
3467  if (isConstant)
3468    return SDValue();
3469
3470  if (!EnableARMVDUPsplat) {
3471    // Use VDUP for non-constant splats.
3472    if (usesOnlyOneValue && EltSize <= 32)
3473      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3474  }
3475
3476  // Vectors with 32- or 64-bit elements can be built by directly assigning
3477  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
3478  // will be legalized.
3479  if (EltSize >= 32) {
3480    // Do the expansion with floating-point types, since that is what the VFP
3481    // registers are defined to use, and since i64 is not legal.
3482    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3483    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3484    SmallVector<SDValue, 8> Ops;
3485    for (unsigned i = 0; i < NumElts; ++i)
3486      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
3487    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3488    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3489  }
3490
3491  return SDValue();
3492}
3493
3494/// isShuffleMaskLegal - Targets can use this to indicate that they only
3495/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3496/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3497/// are assumed to be legal.
3498bool
3499ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
3500                                      EVT VT) const {
3501  if (VT.getVectorNumElements() == 4 &&
3502      (VT.is128BitVector() || VT.is64BitVector())) {
3503    unsigned PFIndexes[4];
3504    for (unsigned i = 0; i != 4; ++i) {
3505      if (M[i] < 0)
3506        PFIndexes[i] = 8;
3507      else
3508        PFIndexes[i] = M[i];
3509    }
3510
3511    // Compute the index in the perfect shuffle table.
3512    unsigned PFTableIndex =
3513      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3514    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3515    unsigned Cost = (PFEntry >> 30);
3516
3517    if (Cost <= 4)
3518      return true;
3519  }
3520
3521  bool ReverseVEXT;
3522  unsigned Imm, WhichResult;
3523
3524  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3525  return (EltSize >= 32 ||
3526          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3527          isVREVMask(M, VT, 64) ||
3528          isVREVMask(M, VT, 32) ||
3529          isVREVMask(M, VT, 16) ||
3530          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
3531          isVTRNMask(M, VT, WhichResult) ||
3532          isVUZPMask(M, VT, WhichResult) ||
3533          isVZIPMask(M, VT, WhichResult) ||
3534          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
3535          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
3536          isVZIP_v_undef_Mask(M, VT, WhichResult));
3537}
3538
3539/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3540/// the specified operations to build the shuffle.
3541static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3542                                      SDValue RHS, SelectionDAG &DAG,
3543                                      DebugLoc dl) {
3544  unsigned OpNum = (PFEntry >> 26) & 0x0F;
3545  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3546  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3547
3548  enum {
3549    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3550    OP_VREV,
3551    OP_VDUP0,
3552    OP_VDUP1,
3553    OP_VDUP2,
3554    OP_VDUP3,
3555    OP_VEXT1,
3556    OP_VEXT2,
3557    OP_VEXT3,
3558    OP_VUZPL, // VUZP, left result
3559    OP_VUZPR, // VUZP, right result
3560    OP_VZIPL, // VZIP, left result
3561    OP_VZIPR, // VZIP, right result
3562    OP_VTRNL, // VTRN, left result
3563    OP_VTRNR  // VTRN, right result
3564  };
3565
3566  if (OpNum == OP_COPY) {
3567    if (LHSID == (1*9+2)*9+3) return LHS;
3568    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3569    return RHS;
3570  }
3571
3572  SDValue OpLHS, OpRHS;
3573  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3574  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3575  EVT VT = OpLHS.getValueType();
3576
3577  switch (OpNum) {
3578  default: llvm_unreachable("Unknown shuffle opcode!");
3579  case OP_VREV:
3580    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
3581  case OP_VDUP0:
3582  case OP_VDUP1:
3583  case OP_VDUP2:
3584  case OP_VDUP3:
3585    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
3586                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3587  case OP_VEXT1:
3588  case OP_VEXT2:
3589  case OP_VEXT3:
3590    return DAG.getNode(ARMISD::VEXT, dl, VT,
3591                       OpLHS, OpRHS,
3592                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3593  case OP_VUZPL:
3594  case OP_VUZPR:
3595    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3596                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3597  case OP_VZIPL:
3598  case OP_VZIPR:
3599    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3600                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3601  case OP_VTRNL:
3602  case OP_VTRNR:
3603    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3604                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3605  }
3606}
3607
3608static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3609  SDValue V1 = Op.getOperand(0);
3610  SDValue V2 = Op.getOperand(1);
3611  DebugLoc dl = Op.getDebugLoc();
3612  EVT VT = Op.getValueType();
3613  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3614  SmallVector<int, 8> ShuffleMask;
3615
3616  // Convert shuffles that are directly supported on NEON to target-specific
3617  // DAG nodes, instead of keeping them as shuffles and matching them again
3618  // during code selection.  This is more efficient and avoids the possibility
3619  // of inconsistencies between legalization and selection.
3620  // FIXME: floating-point vectors should be canonicalized to integer vectors
3621  // of the same time so that they get CSEd properly.
3622  SVN->getMask(ShuffleMask);
3623
3624  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3625  if (EltSize <= 32) {
3626    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3627      int Lane = SVN->getSplatIndex();
3628      // If this is undef splat, generate it via "just" vdup, if possible.
3629      if (Lane == -1) Lane = 0;
3630
3631      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3632        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3633      }
3634      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3635                         DAG.getConstant(Lane, MVT::i32));
3636    }
3637
3638    bool ReverseVEXT;
3639    unsigned Imm;
3640    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3641      if (ReverseVEXT)
3642        std::swap(V1, V2);
3643      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3644                         DAG.getConstant(Imm, MVT::i32));
3645    }
3646
3647    if (isVREVMask(ShuffleMask, VT, 64))
3648      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3649    if (isVREVMask(ShuffleMask, VT, 32))
3650      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3651    if (isVREVMask(ShuffleMask, VT, 16))
3652      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3653
3654    // Check for Neon shuffles that modify both input vectors in place.
3655    // If both results are used, i.e., if there are two shuffles with the same
3656    // source operands and with masks corresponding to both results of one of
3657    // these operations, DAG memoization will ensure that a single node is
3658    // used for both shuffles.
3659    unsigned WhichResult;
3660    if (isVTRNMask(ShuffleMask, VT, WhichResult))
3661      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3662                         V1, V2).getValue(WhichResult);
3663    if (isVUZPMask(ShuffleMask, VT, WhichResult))
3664      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3665                         V1, V2).getValue(WhichResult);
3666    if (isVZIPMask(ShuffleMask, VT, WhichResult))
3667      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3668                         V1, V2).getValue(WhichResult);
3669
3670    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3671      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3672                         V1, V1).getValue(WhichResult);
3673    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3674      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3675                         V1, V1).getValue(WhichResult);
3676    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3677      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3678                         V1, V1).getValue(WhichResult);
3679  }
3680
3681  // If the shuffle is not directly supported and it has 4 elements, use
3682  // the PerfectShuffle-generated table to synthesize it from other shuffles.
3683  unsigned NumElts = VT.getVectorNumElements();
3684  if (NumElts == 4) {
3685    unsigned PFIndexes[4];
3686    for (unsigned i = 0; i != 4; ++i) {
3687      if (ShuffleMask[i] < 0)
3688        PFIndexes[i] = 8;
3689      else
3690        PFIndexes[i] = ShuffleMask[i];
3691    }
3692
3693    // Compute the index in the perfect shuffle table.
3694    unsigned PFTableIndex =
3695      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3696    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3697    unsigned Cost = (PFEntry >> 30);
3698
3699    if (Cost <= 4)
3700      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3701  }
3702
3703  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
3704  if (EltSize >= 32) {
3705    // Do the expansion with floating-point types, since that is what the VFP
3706    // registers are defined to use, and since i64 is not legal.
3707    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3708    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3709    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
3710    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
3711    SmallVector<SDValue, 8> Ops;
3712    for (unsigned i = 0; i < NumElts; ++i) {
3713      if (ShuffleMask[i] < 0)
3714        Ops.push_back(DAG.getUNDEF(EltVT));
3715      else
3716        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3717                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
3718                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3719                                                  MVT::i32)));
3720    }
3721    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3722    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3723  }
3724
3725  return SDValue();
3726}
3727
3728static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3729  EVT VT = Op.getValueType();
3730  DebugLoc dl = Op.getDebugLoc();
3731  SDValue Vec = Op.getOperand(0);
3732  SDValue Lane = Op.getOperand(1);
3733  assert(VT == MVT::i32 &&
3734         Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3735         "unexpected type for custom-lowering vector extract");
3736  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3737}
3738
3739static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3740  // The only time a CONCAT_VECTORS operation can have legal types is when
3741  // two 64-bit vectors are concatenated to a 128-bit vector.
3742  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3743         "unexpected CONCAT_VECTORS");
3744  DebugLoc dl = Op.getDebugLoc();
3745  SDValue Val = DAG.getUNDEF(MVT::v2f64);
3746  SDValue Op0 = Op.getOperand(0);
3747  SDValue Op1 = Op.getOperand(1);
3748  if (Op0.getOpcode() != ISD::UNDEF)
3749    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3750                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3751                      DAG.getIntPtrConstant(0));
3752  if (Op1.getOpcode() != ISD::UNDEF)
3753    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3754                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3755                      DAG.getIntPtrConstant(1));
3756  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3757}
3758
3759/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
3760/// an extending load, return the unextended value.
3761static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
3762  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
3763    return N->getOperand(0);
3764  LoadSDNode *LD = cast<LoadSDNode>(N);
3765  return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
3766                     LD->getBasePtr(), LD->getSrcValue(),
3767                     LD->getSrcValueOffset(), LD->isVolatile(),
3768                     LD->isNonTemporal(), LD->getAlignment());
3769}
3770
3771static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
3772  // Multiplications are only custom-lowered for 128-bit vectors so that
3773  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
3774  EVT VT = Op.getValueType();
3775  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
3776  SDNode *N0 = Op.getOperand(0).getNode();
3777  SDNode *N1 = Op.getOperand(1).getNode();
3778  unsigned NewOpc = 0;
3779  if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
3780      (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
3781    NewOpc = ARMISD::VMULLs;
3782  } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
3783             (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
3784    NewOpc = ARMISD::VMULLu;
3785  } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
3786    // Fall through to expand this.  It is not legal.
3787    return SDValue();
3788  } else {
3789    // Other vector multiplications are legal.
3790    return Op;
3791  }
3792
3793  // Legalize to a VMULL instruction.
3794  DebugLoc DL = Op.getDebugLoc();
3795  SDValue Op0 = SkipExtension(N0, DAG);
3796  SDValue Op1 = SkipExtension(N1, DAG);
3797
3798  assert(Op0.getValueType().is64BitVector() &&
3799         Op1.getValueType().is64BitVector() &&
3800         "unexpected types for extended operands to VMULL");
3801  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3802}
3803
3804SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3805  switch (Op.getOpcode()) {
3806  default: llvm_unreachable("Don't know how to custom lower this!");
3807  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
3808  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
3809  case ISD::GlobalAddress:
3810    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3811      LowerGlobalAddressELF(Op, DAG);
3812  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3813  case ISD::SELECT:        return LowerSELECT(Op, DAG);
3814  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
3815  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
3816  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
3817  case ISD::VASTART:       return LowerVASTART(Op, DAG);
3818  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
3819  case ISD::SINT_TO_FP:
3820  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
3821  case ISD::FP_TO_SINT:
3822  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
3823  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
3824  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
3825  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
3826  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3827  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
3828  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
3829  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3830                                                               Subtarget);
3831  case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
3832  case ISD::SHL:
3833  case ISD::SRL:
3834  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
3835  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
3836  case ISD::SRL_PARTS:
3837  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
3838  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3839  case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
3840  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
3841  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3842  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3843  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3844  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
3845  case ISD::MUL:           return LowerMUL(Op, DAG);
3846  }
3847  return SDValue();
3848}
3849
3850/// ReplaceNodeResults - Replace the results of node with an illegal result
3851/// type with new values built out of custom code.
3852void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3853                                           SmallVectorImpl<SDValue>&Results,
3854                                           SelectionDAG &DAG) const {
3855  SDValue Res;
3856  switch (N->getOpcode()) {
3857  default:
3858    llvm_unreachable("Don't know how to custom expand this!");
3859    break;
3860  case ISD::BIT_CONVERT:
3861    Res = ExpandBIT_CONVERT(N, DAG);
3862    break;
3863  case ISD::SRL:
3864  case ISD::SRA:
3865    Res = LowerShift(N, DAG, Subtarget);
3866    break;
3867  }
3868  if (Res.getNode())
3869    Results.push_back(Res);
3870}
3871
3872//===----------------------------------------------------------------------===//
3873//                           ARM Scheduler Hooks
3874//===----------------------------------------------------------------------===//
3875
3876MachineBasicBlock *
3877ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3878                                     MachineBasicBlock *BB,
3879                                     unsigned Size) const {
3880  unsigned dest    = MI->getOperand(0).getReg();
3881  unsigned ptr     = MI->getOperand(1).getReg();
3882  unsigned oldval  = MI->getOperand(2).getReg();
3883  unsigned newval  = MI->getOperand(3).getReg();
3884  unsigned scratch = BB->getParent()->getRegInfo()
3885    .createVirtualRegister(ARM::GPRRegisterClass);
3886  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3887  DebugLoc dl = MI->getDebugLoc();
3888  bool isThumb2 = Subtarget->isThumb2();
3889
3890  unsigned ldrOpc, strOpc;
3891  switch (Size) {
3892  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3893  case 1:
3894    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3895    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3896    break;
3897  case 2:
3898    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3899    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3900    break;
3901  case 4:
3902    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3903    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3904    break;
3905  }
3906
3907  MachineFunction *MF = BB->getParent();
3908  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3909  MachineFunction::iterator It = BB;
3910  ++It; // insert the new blocks after the current block
3911
3912  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3913  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3914  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3915  MF->insert(It, loop1MBB);
3916  MF->insert(It, loop2MBB);
3917  MF->insert(It, exitMBB);
3918
3919  // Transfer the remainder of BB and its successor edges to exitMBB.
3920  exitMBB->splice(exitMBB->begin(), BB,
3921                  llvm::next(MachineBasicBlock::iterator(MI)),
3922                  BB->end());
3923  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
3924
3925  //  thisMBB:
3926  //   ...
3927  //   fallthrough --> loop1MBB
3928  BB->addSuccessor(loop1MBB);
3929
3930  // loop1MBB:
3931  //   ldrex dest, [ptr]
3932  //   cmp dest, oldval
3933  //   bne exitMBB
3934  BB = loop1MBB;
3935  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3936  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3937                 .addReg(dest).addReg(oldval));
3938  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3939    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3940  BB->addSuccessor(loop2MBB);
3941  BB->addSuccessor(exitMBB);
3942
3943  // loop2MBB:
3944  //   strex scratch, newval, [ptr]
3945  //   cmp scratch, #0
3946  //   bne loop1MBB
3947  BB = loop2MBB;
3948  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3949                 .addReg(ptr));
3950  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3951                 .addReg(scratch).addImm(0));
3952  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3953    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3954  BB->addSuccessor(loop1MBB);
3955  BB->addSuccessor(exitMBB);
3956
3957  //  exitMBB:
3958  //   ...
3959  BB = exitMBB;
3960
3961  MI->eraseFromParent();   // The instruction is gone now.
3962
3963  return BB;
3964}
3965
3966MachineBasicBlock *
3967ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3968                                    unsigned Size, unsigned BinOpcode) const {
3969  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3970  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3971
3972  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3973  MachineFunction *MF = BB->getParent();
3974  MachineFunction::iterator It = BB;
3975  ++It;
3976
3977  unsigned dest = MI->getOperand(0).getReg();
3978  unsigned ptr = MI->getOperand(1).getReg();
3979  unsigned incr = MI->getOperand(2).getReg();
3980  DebugLoc dl = MI->getDebugLoc();
3981
3982  bool isThumb2 = Subtarget->isThumb2();
3983  unsigned ldrOpc, strOpc;
3984  switch (Size) {
3985  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3986  case 1:
3987    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3988    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
3989    break;
3990  case 2:
3991    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3992    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3993    break;
3994  case 4:
3995    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3996    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3997    break;
3998  }
3999
4000  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4001  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4002  MF->insert(It, loopMBB);
4003  MF->insert(It, exitMBB);
4004
4005  // Transfer the remainder of BB and its successor edges to exitMBB.
4006  exitMBB->splice(exitMBB->begin(), BB,
4007                  llvm::next(MachineBasicBlock::iterator(MI)),
4008                  BB->end());
4009  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
4010
4011  MachineRegisterInfo &RegInfo = MF->getRegInfo();
4012  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
4013  unsigned scratch2 = (!BinOpcode) ? incr :
4014    RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
4015
4016  //  thisMBB:
4017  //   ...
4018  //   fallthrough --> loopMBB
4019  BB->addSuccessor(loopMBB);
4020
4021  //  loopMBB:
4022  //   ldrex dest, ptr
4023  //   <binop> scratch2, dest, incr
4024  //   strex scratch, scratch2, ptr
4025  //   cmp scratch, #0
4026  //   bne- loopMBB
4027  //   fallthrough --> exitMBB
4028  BB = loopMBB;
4029  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
4030  if (BinOpcode) {
4031    // operand order needs to go the other way for NAND
4032    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
4033      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
4034                     addReg(incr).addReg(dest)).addReg(0);
4035    else
4036      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
4037                     addReg(dest).addReg(incr)).addReg(0);
4038  }
4039
4040  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
4041                 .addReg(ptr));
4042  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4043                 .addReg(scratch).addImm(0));
4044  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4045    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
4046
4047  BB->addSuccessor(loopMBB);
4048  BB->addSuccessor(exitMBB);
4049
4050  //  exitMBB:
4051  //   ...
4052  BB = exitMBB;
4053
4054  MI->eraseFromParent();   // The instruction is gone now.
4055
4056  return BB;
4057}
4058
4059static
4060MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
4061  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
4062       E = MBB->succ_end(); I != E; ++I)
4063    if (*I != Succ)
4064      return *I;
4065  llvm_unreachable("Expecting a BB with two successors!");
4066}
4067
4068MachineBasicBlock *
4069ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4070                                               MachineBasicBlock *BB) const {
4071  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4072  DebugLoc dl = MI->getDebugLoc();
4073  bool isThumb2 = Subtarget->isThumb2();
4074  switch (MI->getOpcode()) {
4075  default:
4076    MI->dump();
4077    llvm_unreachable("Unexpected instr type to insert");
4078
4079  case ARM::ATOMIC_LOAD_ADD_I8:
4080     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4081  case ARM::ATOMIC_LOAD_ADD_I16:
4082     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4083  case ARM::ATOMIC_LOAD_ADD_I32:
4084     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4085
4086  case ARM::ATOMIC_LOAD_AND_I8:
4087     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4088  case ARM::ATOMIC_LOAD_AND_I16:
4089     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4090  case ARM::ATOMIC_LOAD_AND_I32:
4091     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4092
4093  case ARM::ATOMIC_LOAD_OR_I8:
4094     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4095  case ARM::ATOMIC_LOAD_OR_I16:
4096     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4097  case ARM::ATOMIC_LOAD_OR_I32:
4098     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4099
4100  case ARM::ATOMIC_LOAD_XOR_I8:
4101     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4102  case ARM::ATOMIC_LOAD_XOR_I16:
4103     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4104  case ARM::ATOMIC_LOAD_XOR_I32:
4105     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4106
4107  case ARM::ATOMIC_LOAD_NAND_I8:
4108     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4109  case ARM::ATOMIC_LOAD_NAND_I16:
4110     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4111  case ARM::ATOMIC_LOAD_NAND_I32:
4112     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4113
4114  case ARM::ATOMIC_LOAD_SUB_I8:
4115     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4116  case ARM::ATOMIC_LOAD_SUB_I16:
4117     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4118  case ARM::ATOMIC_LOAD_SUB_I32:
4119     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4120
4121  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
4122  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
4123  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
4124
4125  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
4126  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
4127  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
4128
4129  case ARM::tMOVCCr_pseudo: {
4130    // To "insert" a SELECT_CC instruction, we actually have to insert the
4131    // diamond control-flow pattern.  The incoming instruction knows the
4132    // destination vreg to set, the condition code register to branch on, the
4133    // true/false values to select between, and a branch opcode to use.
4134    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4135    MachineFunction::iterator It = BB;
4136    ++It;
4137
4138    //  thisMBB:
4139    //  ...
4140    //   TrueVal = ...
4141    //   cmpTY ccX, r1, r2
4142    //   bCC copy1MBB
4143    //   fallthrough --> copy0MBB
4144    MachineBasicBlock *thisMBB  = BB;
4145    MachineFunction *F = BB->getParent();
4146    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4147    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
4148    F->insert(It, copy0MBB);
4149    F->insert(It, sinkMBB);
4150
4151    // Transfer the remainder of BB and its successor edges to sinkMBB.
4152    sinkMBB->splice(sinkMBB->begin(), BB,
4153                    llvm::next(MachineBasicBlock::iterator(MI)),
4154                    BB->end());
4155    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
4156
4157    BB->addSuccessor(copy0MBB);
4158    BB->addSuccessor(sinkMBB);
4159
4160    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
4161      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
4162
4163    //  copy0MBB:
4164    //   %FalseValue = ...
4165    //   # fallthrough to sinkMBB
4166    BB = copy0MBB;
4167
4168    // Update machine-CFG edges
4169    BB->addSuccessor(sinkMBB);
4170
4171    //  sinkMBB:
4172    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4173    //  ...
4174    BB = sinkMBB;
4175    BuildMI(*BB, BB->begin(), dl,
4176            TII->get(ARM::PHI), MI->getOperand(0).getReg())
4177      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4178      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4179
4180    MI->eraseFromParent();   // The pseudo instruction is gone now.
4181    return BB;
4182  }
4183
4184  case ARM::BCCi64:
4185  case ARM::BCCZi64: {
4186    // Compare both parts that make up the double comparison separately for
4187    // equality.
4188    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
4189
4190    unsigned LHS1 = MI->getOperand(1).getReg();
4191    unsigned LHS2 = MI->getOperand(2).getReg();
4192    if (RHSisZero) {
4193      AddDefaultPred(BuildMI(BB, dl,
4194                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4195                     .addReg(LHS1).addImm(0));
4196      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4197        .addReg(LHS2).addImm(0)
4198        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4199    } else {
4200      unsigned RHS1 = MI->getOperand(3).getReg();
4201      unsigned RHS2 = MI->getOperand(4).getReg();
4202      AddDefaultPred(BuildMI(BB, dl,
4203                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4204                     .addReg(LHS1).addReg(RHS1));
4205      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4206        .addReg(LHS2).addReg(RHS2)
4207        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4208    }
4209
4210    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
4211    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
4212    if (MI->getOperand(0).getImm() == ARMCC::NE)
4213      std::swap(destMBB, exitMBB);
4214
4215    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4216      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
4217    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
4218      .addMBB(exitMBB);
4219
4220    MI->eraseFromParent();   // The pseudo instruction is gone now.
4221    return BB;
4222  }
4223  }
4224}
4225
4226//===----------------------------------------------------------------------===//
4227//                           ARM Optimization Hooks
4228//===----------------------------------------------------------------------===//
4229
4230static
4231SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
4232                            TargetLowering::DAGCombinerInfo &DCI) {
4233  SelectionDAG &DAG = DCI.DAG;
4234  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4235  EVT VT = N->getValueType(0);
4236  unsigned Opc = N->getOpcode();
4237  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
4238  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
4239  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
4240  ISD::CondCode CC = ISD::SETCC_INVALID;
4241
4242  if (isSlctCC) {
4243    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
4244  } else {
4245    SDValue CCOp = Slct.getOperand(0);
4246    if (CCOp.getOpcode() == ISD::SETCC)
4247      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
4248  }
4249
4250  bool DoXform = false;
4251  bool InvCC = false;
4252  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
4253          "Bad input!");
4254
4255  if (LHS.getOpcode() == ISD::Constant &&
4256      cast<ConstantSDNode>(LHS)->isNullValue()) {
4257    DoXform = true;
4258  } else if (CC != ISD::SETCC_INVALID &&
4259             RHS.getOpcode() == ISD::Constant &&
4260             cast<ConstantSDNode>(RHS)->isNullValue()) {
4261    std::swap(LHS, RHS);
4262    SDValue Op0 = Slct.getOperand(0);
4263    EVT OpVT = isSlctCC ? Op0.getValueType() :
4264                          Op0.getOperand(0).getValueType();
4265    bool isInt = OpVT.isInteger();
4266    CC = ISD::getSetCCInverse(CC, isInt);
4267
4268    if (!TLI.isCondCodeLegal(CC, OpVT))
4269      return SDValue();         // Inverse operator isn't legal.
4270
4271    DoXform = true;
4272    InvCC = true;
4273  }
4274
4275  if (DoXform) {
4276    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
4277    if (isSlctCC)
4278      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
4279                             Slct.getOperand(0), Slct.getOperand(1), CC);
4280    SDValue CCOp = Slct.getOperand(0);
4281    if (InvCC)
4282      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
4283                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
4284    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
4285                       CCOp, OtherOp, Result);
4286  }
4287  return SDValue();
4288}
4289
4290/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
4291/// operands N0 and N1.  This is a helper for PerformADDCombine that is
4292/// called with the default operands, and if that fails, with commuted
4293/// operands.
4294static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
4295                                         TargetLowering::DAGCombinerInfo &DCI) {
4296  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
4297  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
4298    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
4299    if (Result.getNode()) return Result;
4300  }
4301  return SDValue();
4302}
4303
4304/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
4305///
4306static SDValue PerformADDCombine(SDNode *N,
4307                                 TargetLowering::DAGCombinerInfo &DCI) {
4308  SDValue N0 = N->getOperand(0);
4309  SDValue N1 = N->getOperand(1);
4310
4311  // First try with the default operand order.
4312  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
4313  if (Result.getNode())
4314    return Result;
4315
4316  // If that didn't work, try again with the operands commuted.
4317  return PerformADDCombineWithOperands(N, N1, N0, DCI);
4318}
4319
4320/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
4321///
4322static SDValue PerformSUBCombine(SDNode *N,
4323                                 TargetLowering::DAGCombinerInfo &DCI) {
4324  SDValue N0 = N->getOperand(0);
4325  SDValue N1 = N->getOperand(1);
4326
4327  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
4328  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
4329    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
4330    if (Result.getNode()) return Result;
4331  }
4332
4333  return SDValue();
4334}
4335
4336static SDValue PerformMULCombine(SDNode *N,
4337                                 TargetLowering::DAGCombinerInfo &DCI,
4338                                 const ARMSubtarget *Subtarget) {
4339  SelectionDAG &DAG = DCI.DAG;
4340
4341  if (Subtarget->isThumb1Only())
4342    return SDValue();
4343
4344  if (DAG.getMachineFunction().
4345      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
4346    return SDValue();
4347
4348  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
4349    return SDValue();
4350
4351  EVT VT = N->getValueType(0);
4352  if (VT != MVT::i32)
4353    return SDValue();
4354
4355  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
4356  if (!C)
4357    return SDValue();
4358
4359  uint64_t MulAmt = C->getZExtValue();
4360  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
4361  ShiftAmt = ShiftAmt & (32 - 1);
4362  SDValue V = N->getOperand(0);
4363  DebugLoc DL = N->getDebugLoc();
4364
4365  SDValue Res;
4366  MulAmt >>= ShiftAmt;
4367  if (isPowerOf2_32(MulAmt - 1)) {
4368    // (mul x, 2^N + 1) => (add (shl x, N), x)
4369    Res = DAG.getNode(ISD::ADD, DL, VT,
4370                      V, DAG.getNode(ISD::SHL, DL, VT,
4371                                     V, DAG.getConstant(Log2_32(MulAmt-1),
4372                                                        MVT::i32)));
4373  } else if (isPowerOf2_32(MulAmt + 1)) {
4374    // (mul x, 2^N - 1) => (sub (shl x, N), x)
4375    Res = DAG.getNode(ISD::SUB, DL, VT,
4376                      DAG.getNode(ISD::SHL, DL, VT,
4377                                  V, DAG.getConstant(Log2_32(MulAmt+1),
4378                                                     MVT::i32)),
4379                                                     V);
4380  } else
4381    return SDValue();
4382
4383  if (ShiftAmt != 0)
4384    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
4385                      DAG.getConstant(ShiftAmt, MVT::i32));
4386
4387  // Do not add new nodes to DAG combiner worklist.
4388  DCI.CombineTo(N, Res, false);
4389  return SDValue();
4390}
4391
4392/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
4393static SDValue PerformORCombine(SDNode *N,
4394                                TargetLowering::DAGCombinerInfo &DCI,
4395                                const ARMSubtarget *Subtarget) {
4396  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
4397  // reasonable.
4398
4399  // BFI is only available on V6T2+
4400  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
4401    return SDValue();
4402
4403  SelectionDAG &DAG = DCI.DAG;
4404  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4405  DebugLoc DL = N->getDebugLoc();
4406  // 1) or (and A, mask), val => ARMbfi A, val, mask
4407  //      iff (val & mask) == val
4408  //
4409  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4410  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
4411  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4412  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
4413  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4414  //  (i.e., copy a bitfield value into another bitfield of the same width)
4415  if (N0.getOpcode() != ISD::AND)
4416    return SDValue();
4417
4418  EVT VT = N->getValueType(0);
4419  if (VT != MVT::i32)
4420    return SDValue();
4421
4422
4423  // The value and the mask need to be constants so we can verify this is
4424  // actually a bitfield set. If the mask is 0xffff, we can do better
4425  // via a movt instruction, so don't use BFI in that case.
4426  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4427  if (!C)
4428    return SDValue();
4429  unsigned Mask = C->getZExtValue();
4430  if (Mask == 0xffff)
4431    return SDValue();
4432  SDValue Res;
4433  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
4434  if ((C = dyn_cast<ConstantSDNode>(N1))) {
4435    unsigned Val = C->getZExtValue();
4436    if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
4437      return SDValue();
4438    Val >>= CountTrailingZeros_32(~Mask);
4439
4440    Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
4441                      DAG.getConstant(Val, MVT::i32),
4442                      DAG.getConstant(Mask, MVT::i32));
4443
4444    // Do not add new nodes to DAG combiner worklist.
4445    DCI.CombineTo(N, Res, false);
4446  } else if (N1.getOpcode() == ISD::AND) {
4447    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4448    C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4449    if (!C)
4450      return SDValue();
4451    unsigned Mask2 = C->getZExtValue();
4452
4453    if (ARM::isBitFieldInvertedMask(Mask) &&
4454        ARM::isBitFieldInvertedMask(~Mask2) &&
4455        (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
4456      // The pack halfword instruction works better for masks that fit it,
4457      // so use that when it's available.
4458      if (Subtarget->hasT2ExtractPack() &&
4459          (Mask == 0xffff || Mask == 0xffff0000))
4460        return SDValue();
4461      // 2a
4462      unsigned lsb = CountTrailingZeros_32(Mask2);
4463      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
4464                        DAG.getConstant(lsb, MVT::i32));
4465      Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
4466                        DAG.getConstant(Mask, MVT::i32));
4467      // Do not add new nodes to DAG combiner worklist.
4468      DCI.CombineTo(N, Res, false);
4469    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
4470               ARM::isBitFieldInvertedMask(Mask2) &&
4471               (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
4472      // The pack halfword instruction works better for masks that fit it,
4473      // so use that when it's available.
4474      if (Subtarget->hasT2ExtractPack() &&
4475          (Mask2 == 0xffff || Mask2 == 0xffff0000))
4476        return SDValue();
4477      // 2b
4478      unsigned lsb = CountTrailingZeros_32(Mask);
4479      Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4480                        DAG.getConstant(lsb, MVT::i32));
4481      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
4482                                DAG.getConstant(Mask2, MVT::i32));
4483      // Do not add new nodes to DAG combiner worklist.
4484      DCI.CombineTo(N, Res, false);
4485    }
4486  }
4487
4488  return SDValue();
4489}
4490
4491/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
4492/// ARMISD::VMOVRRD.
4493static SDValue PerformVMOVRRDCombine(SDNode *N,
4494                                   TargetLowering::DAGCombinerInfo &DCI) {
4495  // fmrrd(fmdrr x, y) -> x,y
4496  SDValue InDouble = N->getOperand(0);
4497  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
4498    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
4499  return SDValue();
4500}
4501
4502/// PerformVDUPLANECombine - Target-specific dag combine xforms for
4503/// ARMISD::VDUPLANE.
4504static SDValue PerformVDUPLANECombine(SDNode *N,
4505                                      TargetLowering::DAGCombinerInfo &DCI) {
4506  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
4507  // redundant.
4508  SDValue Op = N->getOperand(0);
4509  EVT VT = N->getValueType(0);
4510
4511  // Ignore bit_converts.
4512  while (Op.getOpcode() == ISD::BIT_CONVERT)
4513    Op = Op.getOperand(0);
4514  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
4515    return SDValue();
4516
4517  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
4518  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
4519  // The canonical VMOV for a zero vector uses a 32-bit element size.
4520  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4521  unsigned EltBits;
4522  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
4523    EltSize = 8;
4524  if (EltSize > VT.getVectorElementType().getSizeInBits())
4525    return SDValue();
4526
4527  SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
4528  return DCI.CombineTo(N, Res, false);
4529}
4530
4531/// getVShiftImm - Check if this is a valid build_vector for the immediate
4532/// operand of a vector shift operation, where all the elements of the
4533/// build_vector must have the same constant integer value.
4534static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
4535  // Ignore bit_converts.
4536  while (Op.getOpcode() == ISD::BIT_CONVERT)
4537    Op = Op.getOperand(0);
4538  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4539  APInt SplatBits, SplatUndef;
4540  unsigned SplatBitSize;
4541  bool HasAnyUndefs;
4542  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
4543                                      HasAnyUndefs, ElementBits) ||
4544      SplatBitSize > ElementBits)
4545    return false;
4546  Cnt = SplatBits.getSExtValue();
4547  return true;
4548}
4549
4550/// isVShiftLImm - Check if this is a valid build_vector for the immediate
4551/// operand of a vector shift left operation.  That value must be in the range:
4552///   0 <= Value < ElementBits for a left shift; or
4553///   0 <= Value <= ElementBits for a long left shift.
4554static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
4555  assert(VT.isVector() && "vector shift count is not a vector type");
4556  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4557  if (! getVShiftImm(Op, ElementBits, Cnt))
4558    return false;
4559  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
4560}
4561
4562/// isVShiftRImm - Check if this is a valid build_vector for the immediate
4563/// operand of a vector shift right operation.  For a shift opcode, the value
4564/// is positive, but for an intrinsic the value count must be negative. The
4565/// absolute value must be in the range:
4566///   1 <= |Value| <= ElementBits for a right shift; or
4567///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
4568static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
4569                         int64_t &Cnt) {
4570  assert(VT.isVector() && "vector shift count is not a vector type");
4571  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4572  if (! getVShiftImm(Op, ElementBits, Cnt))
4573    return false;
4574  if (isIntrinsic)
4575    Cnt = -Cnt;
4576  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
4577}
4578
4579/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
4580static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
4581  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4582  switch (IntNo) {
4583  default:
4584    // Don't do anything for most intrinsics.
4585    break;
4586
4587  // Vector shifts: check for immediate versions and lower them.
4588  // Note: This is done during DAG combining instead of DAG legalizing because
4589  // the build_vectors for 64-bit vector element shift counts are generally
4590  // not legal, and it is hard to see their values after they get legalized to
4591  // loads from a constant pool.
4592  case Intrinsic::arm_neon_vshifts:
4593  case Intrinsic::arm_neon_vshiftu:
4594  case Intrinsic::arm_neon_vshiftls:
4595  case Intrinsic::arm_neon_vshiftlu:
4596  case Intrinsic::arm_neon_vshiftn:
4597  case Intrinsic::arm_neon_vrshifts:
4598  case Intrinsic::arm_neon_vrshiftu:
4599  case Intrinsic::arm_neon_vrshiftn:
4600  case Intrinsic::arm_neon_vqshifts:
4601  case Intrinsic::arm_neon_vqshiftu:
4602  case Intrinsic::arm_neon_vqshiftsu:
4603  case Intrinsic::arm_neon_vqshiftns:
4604  case Intrinsic::arm_neon_vqshiftnu:
4605  case Intrinsic::arm_neon_vqshiftnsu:
4606  case Intrinsic::arm_neon_vqrshiftns:
4607  case Intrinsic::arm_neon_vqrshiftnu:
4608  case Intrinsic::arm_neon_vqrshiftnsu: {
4609    EVT VT = N->getOperand(1).getValueType();
4610    int64_t Cnt;
4611    unsigned VShiftOpc = 0;
4612
4613    switch (IntNo) {
4614    case Intrinsic::arm_neon_vshifts:
4615    case Intrinsic::arm_neon_vshiftu:
4616      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
4617        VShiftOpc = ARMISD::VSHL;
4618        break;
4619      }
4620      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
4621        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
4622                     ARMISD::VSHRs : ARMISD::VSHRu);
4623        break;
4624      }
4625      return SDValue();
4626
4627    case Intrinsic::arm_neon_vshiftls:
4628    case Intrinsic::arm_neon_vshiftlu:
4629      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
4630        break;
4631      llvm_unreachable("invalid shift count for vshll intrinsic");
4632
4633    case Intrinsic::arm_neon_vrshifts:
4634    case Intrinsic::arm_neon_vrshiftu:
4635      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
4636        break;
4637      return SDValue();
4638
4639    case Intrinsic::arm_neon_vqshifts:
4640    case Intrinsic::arm_neon_vqshiftu:
4641      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4642        break;
4643      return SDValue();
4644
4645    case Intrinsic::arm_neon_vqshiftsu:
4646      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4647        break;
4648      llvm_unreachable("invalid shift count for vqshlu intrinsic");
4649
4650    case Intrinsic::arm_neon_vshiftn:
4651    case Intrinsic::arm_neon_vrshiftn:
4652    case Intrinsic::arm_neon_vqshiftns:
4653    case Intrinsic::arm_neon_vqshiftnu:
4654    case Intrinsic::arm_neon_vqshiftnsu:
4655    case Intrinsic::arm_neon_vqrshiftns:
4656    case Intrinsic::arm_neon_vqrshiftnu:
4657    case Intrinsic::arm_neon_vqrshiftnsu:
4658      // Narrowing shifts require an immediate right shift.
4659      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
4660        break;
4661      llvm_unreachable("invalid shift count for narrowing vector shift "
4662                       "intrinsic");
4663
4664    default:
4665      llvm_unreachable("unhandled vector shift");
4666    }
4667
4668    switch (IntNo) {
4669    case Intrinsic::arm_neon_vshifts:
4670    case Intrinsic::arm_neon_vshiftu:
4671      // Opcode already set above.
4672      break;
4673    case Intrinsic::arm_neon_vshiftls:
4674    case Intrinsic::arm_neon_vshiftlu:
4675      if (Cnt == VT.getVectorElementType().getSizeInBits())
4676        VShiftOpc = ARMISD::VSHLLi;
4677      else
4678        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
4679                     ARMISD::VSHLLs : ARMISD::VSHLLu);
4680      break;
4681    case Intrinsic::arm_neon_vshiftn:
4682      VShiftOpc = ARMISD::VSHRN; break;
4683    case Intrinsic::arm_neon_vrshifts:
4684      VShiftOpc = ARMISD::VRSHRs; break;
4685    case Intrinsic::arm_neon_vrshiftu:
4686      VShiftOpc = ARMISD::VRSHRu; break;
4687    case Intrinsic::arm_neon_vrshiftn:
4688      VShiftOpc = ARMISD::VRSHRN; break;
4689    case Intrinsic::arm_neon_vqshifts:
4690      VShiftOpc = ARMISD::VQSHLs; break;
4691    case Intrinsic::arm_neon_vqshiftu:
4692      VShiftOpc = ARMISD::VQSHLu; break;
4693    case Intrinsic::arm_neon_vqshiftsu:
4694      VShiftOpc = ARMISD::VQSHLsu; break;
4695    case Intrinsic::arm_neon_vqshiftns:
4696      VShiftOpc = ARMISD::VQSHRNs; break;
4697    case Intrinsic::arm_neon_vqshiftnu:
4698      VShiftOpc = ARMISD::VQSHRNu; break;
4699    case Intrinsic::arm_neon_vqshiftnsu:
4700      VShiftOpc = ARMISD::VQSHRNsu; break;
4701    case Intrinsic::arm_neon_vqrshiftns:
4702      VShiftOpc = ARMISD::VQRSHRNs; break;
4703    case Intrinsic::arm_neon_vqrshiftnu:
4704      VShiftOpc = ARMISD::VQRSHRNu; break;
4705    case Intrinsic::arm_neon_vqrshiftnsu:
4706      VShiftOpc = ARMISD::VQRSHRNsu; break;
4707    }
4708
4709    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4710                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
4711  }
4712
4713  case Intrinsic::arm_neon_vshiftins: {
4714    EVT VT = N->getOperand(1).getValueType();
4715    int64_t Cnt;
4716    unsigned VShiftOpc = 0;
4717
4718    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
4719      VShiftOpc = ARMISD::VSLI;
4720    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
4721      VShiftOpc = ARMISD::VSRI;
4722    else {
4723      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
4724    }
4725
4726    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4727                       N->getOperand(1), N->getOperand(2),
4728                       DAG.getConstant(Cnt, MVT::i32));
4729  }
4730
4731  case Intrinsic::arm_neon_vqrshifts:
4732  case Intrinsic::arm_neon_vqrshiftu:
4733    // No immediate versions of these to check for.
4734    break;
4735  }
4736
4737  return SDValue();
4738}
4739
4740/// PerformShiftCombine - Checks for immediate versions of vector shifts and
4741/// lowers them.  As with the vector shift intrinsics, this is done during DAG
4742/// combining instead of DAG legalizing because the build_vectors for 64-bit
4743/// vector element shift counts are generally not legal, and it is hard to see
4744/// their values after they get legalized to loads from a constant pool.
4745static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
4746                                   const ARMSubtarget *ST) {
4747  EVT VT = N->getValueType(0);
4748
4749  // Nothing to be done for scalar shifts.
4750  if (! VT.isVector())
4751    return SDValue();
4752
4753  assert(ST->hasNEON() && "unexpected vector shift");
4754  int64_t Cnt;
4755
4756  switch (N->getOpcode()) {
4757  default: llvm_unreachable("unexpected shift opcode");
4758
4759  case ISD::SHL:
4760    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
4761      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
4762                         DAG.getConstant(Cnt, MVT::i32));
4763    break;
4764
4765  case ISD::SRA:
4766  case ISD::SRL:
4767    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
4768      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
4769                            ARMISD::VSHRs : ARMISD::VSHRu);
4770      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
4771                         DAG.getConstant(Cnt, MVT::i32));
4772    }
4773  }
4774  return SDValue();
4775}
4776
4777/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
4778/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
4779static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
4780                                    const ARMSubtarget *ST) {
4781  SDValue N0 = N->getOperand(0);
4782
4783  // Check for sign- and zero-extensions of vector extract operations of 8-
4784  // and 16-bit vector elements.  NEON supports these directly.  They are
4785  // handled during DAG combining because type legalization will promote them
4786  // to 32-bit types and it is messy to recognize the operations after that.
4787  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4788    SDValue Vec = N0.getOperand(0);
4789    SDValue Lane = N0.getOperand(1);
4790    EVT VT = N->getValueType(0);
4791    EVT EltVT = N0.getValueType();
4792    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4793
4794    if (VT == MVT::i32 &&
4795        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
4796        TLI.isTypeLegal(Vec.getValueType())) {
4797
4798      unsigned Opc = 0;
4799      switch (N->getOpcode()) {
4800      default: llvm_unreachable("unexpected opcode");
4801      case ISD::SIGN_EXTEND:
4802        Opc = ARMISD::VGETLANEs;
4803        break;
4804      case ISD::ZERO_EXTEND:
4805      case ISD::ANY_EXTEND:
4806        Opc = ARMISD::VGETLANEu;
4807        break;
4808      }
4809      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
4810    }
4811  }
4812
4813  return SDValue();
4814}
4815
4816/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
4817/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
4818static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
4819                                       const ARMSubtarget *ST) {
4820  // If the target supports NEON, try to use vmax/vmin instructions for f32
4821  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
4822  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
4823  // a NaN; only do the transformation when it matches that behavior.
4824
4825  // For now only do this when using NEON for FP operations; if using VFP, it
4826  // is not obvious that the benefit outweighs the cost of switching to the
4827  // NEON pipeline.
4828  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
4829      N->getValueType(0) != MVT::f32)
4830    return SDValue();
4831
4832  SDValue CondLHS = N->getOperand(0);
4833  SDValue CondRHS = N->getOperand(1);
4834  SDValue LHS = N->getOperand(2);
4835  SDValue RHS = N->getOperand(3);
4836  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4837
4838  unsigned Opcode = 0;
4839  bool IsReversed;
4840  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
4841    IsReversed = false; // x CC y ? x : y
4842  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
4843    IsReversed = true ; // x CC y ? y : x
4844  } else {
4845    return SDValue();
4846  }
4847
4848  bool IsUnordered;
4849  switch (CC) {
4850  default: break;
4851  case ISD::SETOLT:
4852  case ISD::SETOLE:
4853  case ISD::SETLT:
4854  case ISD::SETLE:
4855  case ISD::SETULT:
4856  case ISD::SETULE:
4857    // If LHS is NaN, an ordered comparison will be false and the result will
4858    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
4859    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4860    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4861    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4862      break;
4863    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4864    // will return -0, so vmin can only be used for unsafe math or if one of
4865    // the operands is known to be nonzero.
4866    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4867        !UnsafeFPMath &&
4868        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4869      break;
4870    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4871    break;
4872
4873  case ISD::SETOGT:
4874  case ISD::SETOGE:
4875  case ISD::SETGT:
4876  case ISD::SETGE:
4877  case ISD::SETUGT:
4878  case ISD::SETUGE:
4879    // If LHS is NaN, an ordered comparison will be false and the result will
4880    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
4881    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4882    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4883    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4884      break;
4885    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4886    // will return +0, so vmax can only be used for unsafe math or if one of
4887    // the operands is known to be nonzero.
4888    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4889        !UnsafeFPMath &&
4890        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4891      break;
4892    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4893    break;
4894  }
4895
4896  if (!Opcode)
4897    return SDValue();
4898  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
4899}
4900
4901SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
4902                                             DAGCombinerInfo &DCI) const {
4903  switch (N->getOpcode()) {
4904  default: break;
4905  case ISD::ADD:        return PerformADDCombine(N, DCI);
4906  case ISD::SUB:        return PerformSUBCombine(N, DCI);
4907  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
4908  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
4909  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
4910  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
4911  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
4912  case ISD::SHL:
4913  case ISD::SRA:
4914  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
4915  case ISD::SIGN_EXTEND:
4916  case ISD::ZERO_EXTEND:
4917  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
4918  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
4919  }
4920  return SDValue();
4921}
4922
4923bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
4924  if (!Subtarget->hasV6Ops())
4925    // Pre-v6 does not support unaligned mem access.
4926    return false;
4927
4928  // v6+ may or may not support unaligned mem access depending on the system
4929  // configuration.
4930  // FIXME: This is pretty conservative. Should we provide cmdline option to
4931  // control the behaviour?
4932  if (!Subtarget->isTargetDarwin())
4933    return false;
4934
4935  switch (VT.getSimpleVT().SimpleTy) {
4936  default:
4937    return false;
4938  case MVT::i8:
4939  case MVT::i16:
4940  case MVT::i32:
4941    return true;
4942  // FIXME: VLD1 etc with standard alignment is legal.
4943  }
4944}
4945
4946static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
4947  if (V < 0)
4948    return false;
4949
4950  unsigned Scale = 1;
4951  switch (VT.getSimpleVT().SimpleTy) {
4952  default: return false;
4953  case MVT::i1:
4954  case MVT::i8:
4955    // Scale == 1;
4956    break;
4957  case MVT::i16:
4958    // Scale == 2;
4959    Scale = 2;
4960    break;
4961  case MVT::i32:
4962    // Scale == 4;
4963    Scale = 4;
4964    break;
4965  }
4966
4967  if ((V & (Scale - 1)) != 0)
4968    return false;
4969  V /= Scale;
4970  return V == (V & ((1LL << 5) - 1));
4971}
4972
4973static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
4974                                      const ARMSubtarget *Subtarget) {
4975  bool isNeg = false;
4976  if (V < 0) {
4977    isNeg = true;
4978    V = - V;
4979  }
4980
4981  switch (VT.getSimpleVT().SimpleTy) {
4982  default: return false;
4983  case MVT::i1:
4984  case MVT::i8:
4985  case MVT::i16:
4986  case MVT::i32:
4987    // + imm12 or - imm8
4988    if (isNeg)
4989      return V == (V & ((1LL << 8) - 1));
4990    return V == (V & ((1LL << 12) - 1));
4991  case MVT::f32:
4992  case MVT::f64:
4993    // Same as ARM mode. FIXME: NEON?
4994    if (!Subtarget->hasVFP2())
4995      return false;
4996    if ((V & 3) != 0)
4997      return false;
4998    V >>= 2;
4999    return V == (V & ((1LL << 8) - 1));
5000  }
5001}
5002
5003/// isLegalAddressImmediate - Return true if the integer value can be used
5004/// as the offset of the target addressing mode for load / store of the
5005/// given type.
5006static bool isLegalAddressImmediate(int64_t V, EVT VT,
5007                                    const ARMSubtarget *Subtarget) {
5008  if (V == 0)
5009    return true;
5010
5011  if (!VT.isSimple())
5012    return false;
5013
5014  if (Subtarget->isThumb1Only())
5015    return isLegalT1AddressImmediate(V, VT);
5016  else if (Subtarget->isThumb2())
5017    return isLegalT2AddressImmediate(V, VT, Subtarget);
5018
5019  // ARM mode.
5020  if (V < 0)
5021    V = - V;
5022  switch (VT.getSimpleVT().SimpleTy) {
5023  default: return false;
5024  case MVT::i1:
5025  case MVT::i8:
5026  case MVT::i32:
5027    // +- imm12
5028    return V == (V & ((1LL << 12) - 1));
5029  case MVT::i16:
5030    // +- imm8
5031    return V == (V & ((1LL << 8) - 1));
5032  case MVT::f32:
5033  case MVT::f64:
5034    if (!Subtarget->hasVFP2()) // FIXME: NEON?
5035      return false;
5036    if ((V & 3) != 0)
5037      return false;
5038    V >>= 2;
5039    return V == (V & ((1LL << 8) - 1));
5040  }
5041}
5042
5043bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
5044                                                      EVT VT) const {
5045  int Scale = AM.Scale;
5046  if (Scale < 0)
5047    return false;
5048
5049  switch (VT.getSimpleVT().SimpleTy) {
5050  default: return false;
5051  case MVT::i1:
5052  case MVT::i8:
5053  case MVT::i16:
5054  case MVT::i32:
5055    if (Scale == 1)
5056      return true;
5057    // r + r << imm
5058    Scale = Scale & ~1;
5059    return Scale == 2 || Scale == 4 || Scale == 8;
5060  case MVT::i64:
5061    // r + r
5062    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5063      return true;
5064    return false;
5065  case MVT::isVoid:
5066    // Note, we allow "void" uses (basically, uses that aren't loads or
5067    // stores), because arm allows folding a scale into many arithmetic
5068    // operations.  This should be made more precise and revisited later.
5069
5070    // Allow r << imm, but the imm has to be a multiple of two.
5071    if (Scale & 1) return false;
5072    return isPowerOf2_32(Scale);
5073  }
5074}
5075
5076/// isLegalAddressingMode - Return true if the addressing mode represented
5077/// by AM is legal for this target, for a load/store of the specified type.
5078bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
5079                                              const Type *Ty) const {
5080  EVT VT = getValueType(Ty, true);
5081  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
5082    return false;
5083
5084  // Can never fold addr of global into load/store.
5085  if (AM.BaseGV)
5086    return false;
5087
5088  switch (AM.Scale) {
5089  case 0:  // no scale reg, must be "r+i" or "r", or "i".
5090    break;
5091  case 1:
5092    if (Subtarget->isThumb1Only())
5093      return false;
5094    // FALL THROUGH.
5095  default:
5096    // ARM doesn't support any R+R*scale+imm addr modes.
5097    if (AM.BaseOffs)
5098      return false;
5099
5100    if (!VT.isSimple())
5101      return false;
5102
5103    if (Subtarget->isThumb2())
5104      return isLegalT2ScaledAddressingMode(AM, VT);
5105
5106    int Scale = AM.Scale;
5107    switch (VT.getSimpleVT().SimpleTy) {
5108    default: return false;
5109    case MVT::i1:
5110    case MVT::i8:
5111    case MVT::i32:
5112      if (Scale < 0) Scale = -Scale;
5113      if (Scale == 1)
5114        return true;
5115      // r + r << imm
5116      return isPowerOf2_32(Scale & ~1);
5117    case MVT::i16:
5118    case MVT::i64:
5119      // r + r
5120      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5121        return true;
5122      return false;
5123
5124    case MVT::isVoid:
5125      // Note, we allow "void" uses (basically, uses that aren't loads or
5126      // stores), because arm allows folding a scale into many arithmetic
5127      // operations.  This should be made more precise and revisited later.
5128
5129      // Allow r << imm, but the imm has to be a multiple of two.
5130      if (Scale & 1) return false;
5131      return isPowerOf2_32(Scale);
5132    }
5133    break;
5134  }
5135  return true;
5136}
5137
5138/// isLegalICmpImmediate - Return true if the specified immediate is legal
5139/// icmp immediate, that is the target has icmp instructions which can compare
5140/// a register against the immediate without having to materialize the
5141/// immediate into a register.
5142bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
5143  if (!Subtarget->isThumb())
5144    return ARM_AM::getSOImmVal(Imm) != -1;
5145  if (Subtarget->isThumb2())
5146    return ARM_AM::getT2SOImmVal(Imm) != -1;
5147  return Imm >= 0 && Imm <= 255;
5148}
5149
5150static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
5151                                      bool isSEXTLoad, SDValue &Base,
5152                                      SDValue &Offset, bool &isInc,
5153                                      SelectionDAG &DAG) {
5154  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5155    return false;
5156
5157  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
5158    // AddressingMode 3
5159    Base = Ptr->getOperand(0);
5160    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5161      int RHSC = (int)RHS->getZExtValue();
5162      if (RHSC < 0 && RHSC > -256) {
5163        assert(Ptr->getOpcode() == ISD::ADD);
5164        isInc = false;
5165        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5166        return true;
5167      }
5168    }
5169    isInc = (Ptr->getOpcode() == ISD::ADD);
5170    Offset = Ptr->getOperand(1);
5171    return true;
5172  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
5173    // AddressingMode 2
5174    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5175      int RHSC = (int)RHS->getZExtValue();
5176      if (RHSC < 0 && RHSC > -0x1000) {
5177        assert(Ptr->getOpcode() == ISD::ADD);
5178        isInc = false;
5179        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5180        Base = Ptr->getOperand(0);
5181        return true;
5182      }
5183    }
5184
5185    if (Ptr->getOpcode() == ISD::ADD) {
5186      isInc = true;
5187      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
5188      if (ShOpcVal != ARM_AM::no_shift) {
5189        Base = Ptr->getOperand(1);
5190        Offset = Ptr->getOperand(0);
5191      } else {
5192        Base = Ptr->getOperand(0);
5193        Offset = Ptr->getOperand(1);
5194      }
5195      return true;
5196    }
5197
5198    isInc = (Ptr->getOpcode() == ISD::ADD);
5199    Base = Ptr->getOperand(0);
5200    Offset = Ptr->getOperand(1);
5201    return true;
5202  }
5203
5204  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
5205  return false;
5206}
5207
5208static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
5209                                     bool isSEXTLoad, SDValue &Base,
5210                                     SDValue &Offset, bool &isInc,
5211                                     SelectionDAG &DAG) {
5212  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5213    return false;
5214
5215  Base = Ptr->getOperand(0);
5216  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5217    int RHSC = (int)RHS->getZExtValue();
5218    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
5219      assert(Ptr->getOpcode() == ISD::ADD);
5220      isInc = false;
5221      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5222      return true;
5223    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
5224      isInc = Ptr->getOpcode() == ISD::ADD;
5225      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
5226      return true;
5227    }
5228  }
5229
5230  return false;
5231}
5232
5233/// getPreIndexedAddressParts - returns true by value, base pointer and
5234/// offset pointer and addressing mode by reference if the node's address
5235/// can be legally represented as pre-indexed load / store address.
5236bool
5237ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
5238                                             SDValue &Offset,
5239                                             ISD::MemIndexedMode &AM,
5240                                             SelectionDAG &DAG) const {
5241  if (Subtarget->isThumb1Only())
5242    return false;
5243
5244  EVT VT;
5245  SDValue Ptr;
5246  bool isSEXTLoad = false;
5247  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5248    Ptr = LD->getBasePtr();
5249    VT  = LD->getMemoryVT();
5250    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5251  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5252    Ptr = ST->getBasePtr();
5253    VT  = ST->getMemoryVT();
5254  } else
5255    return false;
5256
5257  bool isInc;
5258  bool isLegal = false;
5259  if (Subtarget->isThumb2())
5260    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5261                                       Offset, isInc, DAG);
5262  else
5263    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5264                                        Offset, isInc, DAG);
5265  if (!isLegal)
5266    return false;
5267
5268  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
5269  return true;
5270}
5271
5272/// getPostIndexedAddressParts - returns true by value, base pointer and
5273/// offset pointer and addressing mode by reference if this node can be
5274/// combined with a load / store to form a post-indexed load / store.
5275bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
5276                                                   SDValue &Base,
5277                                                   SDValue &Offset,
5278                                                   ISD::MemIndexedMode &AM,
5279                                                   SelectionDAG &DAG) const {
5280  if (Subtarget->isThumb1Only())
5281    return false;
5282
5283  EVT VT;
5284  SDValue Ptr;
5285  bool isSEXTLoad = false;
5286  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5287    VT  = LD->getMemoryVT();
5288    Ptr = LD->getBasePtr();
5289    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5290  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5291    VT  = ST->getMemoryVT();
5292    Ptr = ST->getBasePtr();
5293  } else
5294    return false;
5295
5296  bool isInc;
5297  bool isLegal = false;
5298  if (Subtarget->isThumb2())
5299    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5300                                       isInc, DAG);
5301  else
5302    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5303                                        isInc, DAG);
5304  if (!isLegal)
5305    return false;
5306
5307  if (Ptr != Base) {
5308    // Swap base ptr and offset to catch more post-index load / store when
5309    // it's legal. In Thumb2 mode, offset must be an immediate.
5310    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
5311        !Subtarget->isThumb2())
5312      std::swap(Base, Offset);
5313
5314    // Post-indexed load / store update the base pointer.
5315    if (Ptr != Base)
5316      return false;
5317  }
5318
5319  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
5320  return true;
5321}
5322
5323void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
5324                                                       const APInt &Mask,
5325                                                       APInt &KnownZero,
5326                                                       APInt &KnownOne,
5327                                                       const SelectionDAG &DAG,
5328                                                       unsigned Depth) const {
5329  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
5330  switch (Op.getOpcode()) {
5331  default: break;
5332  case ARMISD::CMOV: {
5333    // Bits are known zero/one if known on the LHS and RHS.
5334    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
5335    if (KnownZero == 0 && KnownOne == 0) return;
5336
5337    APInt KnownZeroRHS, KnownOneRHS;
5338    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
5339                          KnownZeroRHS, KnownOneRHS, Depth+1);
5340    KnownZero &= KnownZeroRHS;
5341    KnownOne  &= KnownOneRHS;
5342    return;
5343  }
5344  }
5345}
5346
5347//===----------------------------------------------------------------------===//
5348//                           ARM Inline Assembly Support
5349//===----------------------------------------------------------------------===//
5350
5351/// getConstraintType - Given a constraint letter, return the type of
5352/// constraint it is for this target.
5353ARMTargetLowering::ConstraintType
5354ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
5355  if (Constraint.size() == 1) {
5356    switch (Constraint[0]) {
5357    default:  break;
5358    case 'l': return C_RegisterClass;
5359    case 'w': return C_RegisterClass;
5360    }
5361  }
5362  return TargetLowering::getConstraintType(Constraint);
5363}
5364
5365std::pair<unsigned, const TargetRegisterClass*>
5366ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5367                                                EVT VT) const {
5368  if (Constraint.size() == 1) {
5369    // GCC ARM Constraint Letters
5370    switch (Constraint[0]) {
5371    case 'l':
5372      if (Subtarget->isThumb())
5373        return std::make_pair(0U, ARM::tGPRRegisterClass);
5374      else
5375        return std::make_pair(0U, ARM::GPRRegisterClass);
5376    case 'r':
5377      return std::make_pair(0U, ARM::GPRRegisterClass);
5378    case 'w':
5379      if (VT == MVT::f32)
5380        return std::make_pair(0U, ARM::SPRRegisterClass);
5381      if (VT.getSizeInBits() == 64)
5382        return std::make_pair(0U, ARM::DPRRegisterClass);
5383      if (VT.getSizeInBits() == 128)
5384        return std::make_pair(0U, ARM::QPRRegisterClass);
5385      break;
5386    }
5387  }
5388  if (StringRef("{cc}").equals_lower(Constraint))
5389    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
5390
5391  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5392}
5393
5394std::vector<unsigned> ARMTargetLowering::
5395getRegClassForInlineAsmConstraint(const std::string &Constraint,
5396                                  EVT VT) const {
5397  if (Constraint.size() != 1)
5398    return std::vector<unsigned>();
5399
5400  switch (Constraint[0]) {      // GCC ARM Constraint Letters
5401  default: break;
5402  case 'l':
5403    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5404                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5405                                 0);
5406  case 'r':
5407    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5408                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5409                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
5410                                 ARM::R12, ARM::LR, 0);
5411  case 'w':
5412    if (VT == MVT::f32)
5413      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
5414                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
5415                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
5416                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
5417                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
5418                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
5419                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
5420                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
5421    if (VT.getSizeInBits() == 64)
5422      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
5423                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
5424                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
5425                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
5426    if (VT.getSizeInBits() == 128)
5427      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
5428                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
5429      break;
5430  }
5431
5432  return std::vector<unsigned>();
5433}
5434
5435/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5436/// vector.  If it is invalid, don't add anything to Ops.
5437void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5438                                                     char Constraint,
5439                                                     std::vector<SDValue>&Ops,
5440                                                     SelectionDAG &DAG) const {
5441  SDValue Result(0, 0);
5442
5443  switch (Constraint) {
5444  default: break;
5445  case 'I': case 'J': case 'K': case 'L':
5446  case 'M': case 'N': case 'O':
5447    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5448    if (!C)
5449      return;
5450
5451    int64_t CVal64 = C->getSExtValue();
5452    int CVal = (int) CVal64;
5453    // None of these constraints allow values larger than 32 bits.  Check
5454    // that the value fits in an int.
5455    if (CVal != CVal64)
5456      return;
5457
5458    switch (Constraint) {
5459      case 'I':
5460        if (Subtarget->isThumb1Only()) {
5461          // This must be a constant between 0 and 255, for ADD
5462          // immediates.
5463          if (CVal >= 0 && CVal <= 255)
5464            break;
5465        } else if (Subtarget->isThumb2()) {
5466          // A constant that can be used as an immediate value in a
5467          // data-processing instruction.
5468          if (ARM_AM::getT2SOImmVal(CVal) != -1)
5469            break;
5470        } else {
5471          // A constant that can be used as an immediate value in a
5472          // data-processing instruction.
5473          if (ARM_AM::getSOImmVal(CVal) != -1)
5474            break;
5475        }
5476        return;
5477
5478      case 'J':
5479        if (Subtarget->isThumb()) {  // FIXME thumb2
5480          // This must be a constant between -255 and -1, for negated ADD
5481          // immediates. This can be used in GCC with an "n" modifier that
5482          // prints the negated value, for use with SUB instructions. It is
5483          // not useful otherwise but is implemented for compatibility.
5484          if (CVal >= -255 && CVal <= -1)
5485            break;
5486        } else {
5487          // This must be a constant between -4095 and 4095. It is not clear
5488          // what this constraint is intended for. Implemented for
5489          // compatibility with GCC.
5490          if (CVal >= -4095 && CVal <= 4095)
5491            break;
5492        }
5493        return;
5494
5495      case 'K':
5496        if (Subtarget->isThumb1Only()) {
5497          // A 32-bit value where only one byte has a nonzero value. Exclude
5498          // zero to match GCC. This constraint is used by GCC internally for
5499          // constants that can be loaded with a move/shift combination.
5500          // It is not useful otherwise but is implemented for compatibility.
5501          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
5502            break;
5503        } else if (Subtarget->isThumb2()) {
5504          // A constant whose bitwise inverse can be used as an immediate
5505          // value in a data-processing instruction. This can be used in GCC
5506          // with a "B" modifier that prints the inverted value, for use with
5507          // BIC and MVN instructions. It is not useful otherwise but is
5508          // implemented for compatibility.
5509          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
5510            break;
5511        } else {
5512          // A constant whose bitwise inverse can be used as an immediate
5513          // value in a data-processing instruction. This can be used in GCC
5514          // with a "B" modifier that prints the inverted value, for use with
5515          // BIC and MVN instructions. It is not useful otherwise but is
5516          // implemented for compatibility.
5517          if (ARM_AM::getSOImmVal(~CVal) != -1)
5518            break;
5519        }
5520        return;
5521
5522      case 'L':
5523        if (Subtarget->isThumb1Only()) {
5524          // This must be a constant between -7 and 7,
5525          // for 3-operand ADD/SUB immediate instructions.
5526          if (CVal >= -7 && CVal < 7)
5527            break;
5528        } else if (Subtarget->isThumb2()) {
5529          // A constant whose negation can be used as an immediate value in a
5530          // data-processing instruction. This can be used in GCC with an "n"
5531          // modifier that prints the negated value, for use with SUB
5532          // instructions. It is not useful otherwise but is implemented for
5533          // compatibility.
5534          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
5535            break;
5536        } else {
5537          // A constant whose negation can be used as an immediate value in a
5538          // data-processing instruction. This can be used in GCC with an "n"
5539          // modifier that prints the negated value, for use with SUB
5540          // instructions. It is not useful otherwise but is implemented for
5541          // compatibility.
5542          if (ARM_AM::getSOImmVal(-CVal) != -1)
5543            break;
5544        }
5545        return;
5546
5547      case 'M':
5548        if (Subtarget->isThumb()) { // FIXME thumb2
5549          // This must be a multiple of 4 between 0 and 1020, for
5550          // ADD sp + immediate.
5551          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
5552            break;
5553        } else {
5554          // A power of two or a constant between 0 and 32.  This is used in
5555          // GCC for the shift amount on shifted register operands, but it is
5556          // useful in general for any shift amounts.
5557          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
5558            break;
5559        }
5560        return;
5561
5562      case 'N':
5563        if (Subtarget->isThumb()) {  // FIXME thumb2
5564          // This must be a constant between 0 and 31, for shift amounts.
5565          if (CVal >= 0 && CVal <= 31)
5566            break;
5567        }
5568        return;
5569
5570      case 'O':
5571        if (Subtarget->isThumb()) {  // FIXME thumb2
5572          // This must be a multiple of 4 between -508 and 508, for
5573          // ADD/SUB sp = sp + immediate.
5574          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
5575            break;
5576        }
5577        return;
5578    }
5579    Result = DAG.getTargetConstant(CVal, Op.getValueType());
5580    break;
5581  }
5582
5583  if (Result.getNode()) {
5584    Ops.push_back(Result);
5585    return;
5586  }
5587  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5588}
5589
5590bool
5591ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5592  // The ARM target isn't yet aware of offsets.
5593  return false;
5594}
5595
5596int ARM::getVFPf32Imm(const APFloat &FPImm) {
5597  APInt Imm = FPImm.bitcastToAPInt();
5598  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
5599  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
5600  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
5601
5602  // We can handle 4 bits of mantissa.
5603  // mantissa = (16+UInt(e:f:g:h))/16.
5604  if (Mantissa & 0x7ffff)
5605    return -1;
5606  Mantissa >>= 19;
5607  if ((Mantissa & 0xf) != Mantissa)
5608    return -1;
5609
5610  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5611  if (Exp < -3 || Exp > 4)
5612    return -1;
5613  Exp = ((Exp+3) & 0x7) ^ 4;
5614
5615  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5616}
5617
5618int ARM::getVFPf64Imm(const APFloat &FPImm) {
5619  APInt Imm = FPImm.bitcastToAPInt();
5620  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
5621  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
5622  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
5623
5624  // We can handle 4 bits of mantissa.
5625  // mantissa = (16+UInt(e:f:g:h))/16.
5626  if (Mantissa & 0xffffffffffffLL)
5627    return -1;
5628  Mantissa >>= 48;
5629  if ((Mantissa & 0xf) != Mantissa)
5630    return -1;
5631
5632  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5633  if (Exp < -3 || Exp > 4)
5634    return -1;
5635  Exp = ((Exp+3) & 0x7) ^ 4;
5636
5637  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5638}
5639
5640bool ARM::isBitFieldInvertedMask(unsigned v) {
5641  if (v == 0xffffffff)
5642    return 0;
5643  // there can be 1's on either or both "outsides", all the "inside"
5644  // bits must be 0's
5645  unsigned int lsb = 0, msb = 31;
5646  while (v & (1 << msb)) --msb;
5647  while (v & (1 << lsb)) ++lsb;
5648  for (unsigned int i = lsb; i <= msb; ++i) {
5649    if (v & (1 << i))
5650      return 0;
5651  }
5652  return 1;
5653}
5654
5655/// isFPImmLegal - Returns true if the target can instruction select the
5656/// specified FP immediate natively. If false, the legalizer will
5657/// materialize the FP immediate as a load from a constant pool.
5658bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5659  if (!Subtarget->hasVFP3())
5660    return false;
5661  if (VT == MVT::f32)
5662    return ARM::getVFPf32Imm(Imm) != -1;
5663  if (VT == MVT::f64)
5664    return ARM::getVFPf64Imm(Imm) != -1;
5665  return false;
5666}
5667