ARMISelLowering.cpp revision cf296fa1629f02e63aa1264a619a65cfc66ef173
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-isel"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetMachine.h"
25#include "ARMTargetObjectFile.h"
26#include "llvm/CallingConv.h"
27#include "llvm/Constants.h"
28#include "llvm/Function.h"
29#include "llvm/GlobalValue.h"
30#include "llvm/Instruction.h"
31#include "llvm/Intrinsics.h"
32#include "llvm/Type.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/PseudoSourceValue.h"
40#include "llvm/CodeGen/SelectionDAG.h"
41#include "llvm/MC/MCSectionMachO.h"
42#include "llvm/Target/TargetOptions.h"
43#include "llvm/ADT/VectorExtras.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Support/CommandLine.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/MathExtras.h"
48#include "llvm/Support/raw_ostream.h"
49#include <sstream>
50using namespace llvm;
51
52STATISTIC(NumTailCalls, "Number of tail calls");
53
54// This option should go away when tail calls fully work.
55static cl::opt<bool>
56EnableARMTailCalls("arm-tail-calls", cl::Hidden,
57  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
58  cl::init(false));
59
60static cl::opt<bool>
61EnableARMLongCalls("arm-long-calls", cl::Hidden,
62  cl::desc("Generate calls via indirect call instructions."),
63  cl::init(false));
64
65static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
66                                   CCValAssign::LocInfo &LocInfo,
67                                   ISD::ArgFlagsTy &ArgFlags,
68                                   CCState &State);
69static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
70                                    CCValAssign::LocInfo &LocInfo,
71                                    ISD::ArgFlagsTy &ArgFlags,
72                                    CCState &State);
73static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
74                                      CCValAssign::LocInfo &LocInfo,
75                                      ISD::ArgFlagsTy &ArgFlags,
76                                      CCState &State);
77static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
78                                       CCValAssign::LocInfo &LocInfo,
79                                       ISD::ArgFlagsTy &ArgFlags,
80                                       CCState &State);
81
82void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
83                                       EVT PromotedBitwiseVT) {
84  if (VT != PromotedLdStVT) {
85    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
86    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
87                       PromotedLdStVT.getSimpleVT());
88
89    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
90    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
91                       PromotedLdStVT.getSimpleVT());
92  }
93
94  EVT ElemTy = VT.getVectorElementType();
95  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
96    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
97  if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
98    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
99  if (ElemTy != MVT::i32) {
100    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
101    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
102    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
103    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
104  }
105  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
106  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
107  if (llvm::ModelWithRegSequence())
108    setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
109  else
110    setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
111  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
112  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
113  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
114  if (VT.isInteger()) {
115    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
116    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
117    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
118  }
119
120  // Promote all bit-wise operations.
121  if (VT.isInteger() && VT != PromotedBitwiseVT) {
122    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
123    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
124                       PromotedBitwiseVT.getSimpleVT());
125    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
126    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
127                       PromotedBitwiseVT.getSimpleVT());
128    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
129    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
130                       PromotedBitwiseVT.getSimpleVT());
131  }
132
133  // Neon does not support vector divide/remainder operations.
134  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
135  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
136  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
137  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
138  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
139  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
140}
141
142void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
143  addRegisterClass(VT, ARM::DPRRegisterClass);
144  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
145}
146
147void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
148  addRegisterClass(VT, ARM::QPRRegisterClass);
149  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
150}
151
152static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
153  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
154    return new TargetLoweringObjectFileMachO();
155
156  return new ARMElfTargetObjectFile();
157}
158
159ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
160    : TargetLowering(TM, createTLOF(TM)) {
161  Subtarget = &TM.getSubtarget<ARMSubtarget>();
162
163  if (Subtarget->isTargetDarwin()) {
164    // Uses VFP for Thumb libfuncs if available.
165    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
166      // Single-precision floating-point arithmetic.
167      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
168      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
169      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
170      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
171
172      // Double-precision floating-point arithmetic.
173      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
174      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
175      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
176      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
177
178      // Single-precision comparisons.
179      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
180      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
181      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
182      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
183      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
184      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
185      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
186      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
187
188      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
189      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
190      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
191      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
192      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
193      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
194      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
195      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
196
197      // Double-precision comparisons.
198      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
199      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
200      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
201      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
202      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
203      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
204      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
205      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
206
207      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
208      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
209      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
210      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
211      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
212      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
213      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
214      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
215
216      // Floating-point to integer conversions.
217      // i64 conversions are done via library routines even when generating VFP
218      // instructions, so use the same ones.
219      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
220      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
221      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
222      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
223
224      // Conversions between floating types.
225      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
226      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
227
228      // Integer to floating-point conversions.
229      // i64 conversions are done via library routines even when generating VFP
230      // instructions, so use the same ones.
231      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
232      // e.g., __floatunsidf vs. __floatunssidfvfp.
233      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
234      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
235      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
236      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
237    }
238  }
239
240  // These libcalls are not available in 32-bit.
241  setLibcallName(RTLIB::SHL_I128, 0);
242  setLibcallName(RTLIB::SRL_I128, 0);
243  setLibcallName(RTLIB::SRA_I128, 0);
244
245  // Libcalls should use the AAPCS base standard ABI, even if hard float
246  // is in effect, as per the ARM RTABI specification, section 4.1.2.
247  if (Subtarget->isAAPCS_ABI()) {
248    for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
249      setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
250                            CallingConv::ARM_AAPCS);
251    }
252  }
253
254  if (Subtarget->isThumb1Only())
255    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
256  else
257    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
258  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
259    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
260    addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
261
262    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
263  }
264
265  if (Subtarget->hasNEON()) {
266    addDRTypeForNEON(MVT::v2f32);
267    addDRTypeForNEON(MVT::v8i8);
268    addDRTypeForNEON(MVT::v4i16);
269    addDRTypeForNEON(MVT::v2i32);
270    addDRTypeForNEON(MVT::v1i64);
271
272    addQRTypeForNEON(MVT::v4f32);
273    addQRTypeForNEON(MVT::v2f64);
274    addQRTypeForNEON(MVT::v16i8);
275    addQRTypeForNEON(MVT::v8i16);
276    addQRTypeForNEON(MVT::v4i32);
277    addQRTypeForNEON(MVT::v2i64);
278
279    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
280    // neither Neon nor VFP support any arithmetic operations on it.
281    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
282    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
283    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
284    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
285    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
286    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
287    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
288    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
289    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
290    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
291    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
292    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
293    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
294    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
295    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
296    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
297    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
298    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
299    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
300    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
301    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
302    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
303    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
304    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
305
306    // Neon does not support some operations on v1i64 and v2i64 types.
307    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
308    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
309    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
310    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
311
312    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
313    setTargetDAGCombine(ISD::SHL);
314    setTargetDAGCombine(ISD::SRL);
315    setTargetDAGCombine(ISD::SRA);
316    setTargetDAGCombine(ISD::SIGN_EXTEND);
317    setTargetDAGCombine(ISD::ZERO_EXTEND);
318    setTargetDAGCombine(ISD::ANY_EXTEND);
319    setTargetDAGCombine(ISD::SELECT_CC);
320  }
321
322  computeRegisterProperties();
323
324  // ARM does not have f32 extending load.
325  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
326
327  // ARM does not have i1 sign extending load.
328  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
329
330  // ARM supports all 4 flavors of integer indexed load / store.
331  if (!Subtarget->isThumb1Only()) {
332    for (unsigned im = (unsigned)ISD::PRE_INC;
333         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
334      setIndexedLoadAction(im,  MVT::i1,  Legal);
335      setIndexedLoadAction(im,  MVT::i8,  Legal);
336      setIndexedLoadAction(im,  MVT::i16, Legal);
337      setIndexedLoadAction(im,  MVT::i32, Legal);
338      setIndexedStoreAction(im, MVT::i1,  Legal);
339      setIndexedStoreAction(im, MVT::i8,  Legal);
340      setIndexedStoreAction(im, MVT::i16, Legal);
341      setIndexedStoreAction(im, MVT::i32, Legal);
342    }
343  }
344
345  // i64 operation support.
346  if (Subtarget->isThumb1Only()) {
347    setOperationAction(ISD::MUL,     MVT::i64, Expand);
348    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
349    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
350    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
351    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
352  } else {
353    setOperationAction(ISD::MUL,     MVT::i64, Expand);
354    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
355    if (!Subtarget->hasV6Ops())
356      setOperationAction(ISD::MULHS, MVT::i32, Expand);
357  }
358  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
359  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
360  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
361  setOperationAction(ISD::SRL,       MVT::i64, Custom);
362  setOperationAction(ISD::SRA,       MVT::i64, Custom);
363
364  // ARM does not have ROTL.
365  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
366  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
367  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
368  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
369    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
370
371  // Only ARMv6 has BSWAP.
372  if (!Subtarget->hasV6Ops())
373    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
374
375  // These are expanded into libcalls.
376  if (!Subtarget->hasDivide()) {
377    // v7M has a hardware divider
378    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
379    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
380  }
381  setOperationAction(ISD::SREM,  MVT::i32, Expand);
382  setOperationAction(ISD::UREM,  MVT::i32, Expand);
383  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
384  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
385
386  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
387  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
388  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
389  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
390  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
391
392  setOperationAction(ISD::TRAP, MVT::Other, Legal);
393
394  // Use the default implementation.
395  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
396  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
397  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
398  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
399  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
400  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
401  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
402  // FIXME: Shouldn't need this, since no register is used, but the legalizer
403  // doesn't yet know how to not do that for SjLj.
404  setExceptionSelectorRegister(ARM::R0);
405  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
406  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
407
408  // If the subtarget does not have extract instructions, sign_extend_inreg
409  // needs to be expanded. Extract is available in ARM mode on v6 and up,
410  // and on most Thumb2 implementations.
411  if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
412      || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
413    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
414    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
415  }
416  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
417
418  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
419    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
420    // iff target supports vfp2.
421    setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
422
423  // We want to custom lower some of our intrinsics.
424  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
425
426  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
427  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
428  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
429  setOperationAction(ISD::SELECT,    MVT::i32, Expand);
430  setOperationAction(ISD::SELECT,    MVT::f32, Expand);
431  setOperationAction(ISD::SELECT,    MVT::f64, Expand);
432  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
433  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435
436  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
437  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
438  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
439  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
440  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
441
442  // We don't support sin/cos/fmod/copysign/pow
443  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
444  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
445  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
446  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
447  setOperationAction(ISD::FREM,      MVT::f64, Expand);
448  setOperationAction(ISD::FREM,      MVT::f32, Expand);
449  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
450    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
451    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
452  }
453  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
454  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
455
456  // Various VFP goodness
457  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
458    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
459    if (Subtarget->hasVFP2()) {
460      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
461      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
462      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
463      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
464    }
465    // Special handling for half-precision FP.
466    if (!Subtarget->hasFP16()) {
467      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
468      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
469    }
470  }
471
472  // We have target-specific dag combine patterns for the following nodes:
473  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
474  setTargetDAGCombine(ISD::ADD);
475  setTargetDAGCombine(ISD::SUB);
476  setTargetDAGCombine(ISD::MUL);
477
478  setStackPointerRegisterToSaveRestore(ARM::SP);
479
480  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
481    setSchedulingPreference(Sched::RegPressure);
482  else
483    setSchedulingPreference(Sched::Hybrid);
484
485  // FIXME: If-converter should use instruction latency to determine
486  // profitability rather than relying on fixed limits.
487  if (Subtarget->getCPUString() == "generic") {
488    // Generic (and overly aggressive) if-conversion limits.
489    setIfCvtBlockSizeLimit(10);
490    setIfCvtDupBlockSizeLimit(2);
491  } else if (Subtarget->hasV7Ops()) {
492    setIfCvtBlockSizeLimit(3);
493    setIfCvtDupBlockSizeLimit(1);
494  } else if (Subtarget->hasV6Ops()) {
495    setIfCvtBlockSizeLimit(2);
496    setIfCvtDupBlockSizeLimit(1);
497  } else {
498    setIfCvtBlockSizeLimit(3);
499    setIfCvtDupBlockSizeLimit(2);
500  }
501
502  maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
503  // Do not enable CodePlacementOpt for now: it currently runs after the
504  // ARMConstantIslandPass and messes up branch relaxation and placement
505  // of constant islands.
506  // benefitFromCodePlacementOpt = true;
507}
508
509const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
510  switch (Opcode) {
511  default: return 0;
512  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
513  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
514  case ARMISD::CALL:          return "ARMISD::CALL";
515  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
516  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
517  case ARMISD::tCALL:         return "ARMISD::tCALL";
518  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
519  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
520  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
521  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
522  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
523  case ARMISD::CMP:           return "ARMISD::CMP";
524  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
525  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
526  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
527  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
528  case ARMISD::CMOV:          return "ARMISD::CMOV";
529  case ARMISD::CNEG:          return "ARMISD::CNEG";
530
531  case ARMISD::RBIT:          return "ARMISD::RBIT";
532
533  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
534  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
535  case ARMISD::SITOF:         return "ARMISD::SITOF";
536  case ARMISD::UITOF:         return "ARMISD::UITOF";
537
538  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
539  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
540  case ARMISD::RRX:           return "ARMISD::RRX";
541
542  case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
543  case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
544
545  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
546  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
547
548  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
549
550  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
551
552  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
553
554  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
555  case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
556
557  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
558  case ARMISD::VCGE:          return "ARMISD::VCGE";
559  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
560  case ARMISD::VCGT:          return "ARMISD::VCGT";
561  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
562  case ARMISD::VTST:          return "ARMISD::VTST";
563
564  case ARMISD::VSHL:          return "ARMISD::VSHL";
565  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
566  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
567  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
568  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
569  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
570  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
571  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
572  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
573  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
574  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
575  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
576  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
577  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
578  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
579  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
580  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
581  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
582  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
583  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
584  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
585  case ARMISD::VDUP:          return "ARMISD::VDUP";
586  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
587  case ARMISD::VEXT:          return "ARMISD::VEXT";
588  case ARMISD::VREV64:        return "ARMISD::VREV64";
589  case ARMISD::VREV32:        return "ARMISD::VREV32";
590  case ARMISD::VREV16:        return "ARMISD::VREV16";
591  case ARMISD::VZIP:          return "ARMISD::VZIP";
592  case ARMISD::VUZP:          return "ARMISD::VUZP";
593  case ARMISD::VTRN:          return "ARMISD::VTRN";
594  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
595  case ARMISD::FMAX:          return "ARMISD::FMAX";
596  case ARMISD::FMIN:          return "ARMISD::FMIN";
597  }
598}
599
600/// getRegClassFor - Return the register class that should be used for the
601/// specified value type.
602TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
603  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
604  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
605  // load / store 4 to 8 consecutive D registers.
606  if (Subtarget->hasNEON()) {
607    if (VT == MVT::v4i64)
608      return ARM::QQPRRegisterClass;
609    else if (VT == MVT::v8i64)
610      return ARM::QQQQPRRegisterClass;
611  }
612  return TargetLowering::getRegClassFor(VT);
613}
614
615/// getFunctionAlignment - Return the Log2 alignment of this function.
616unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
617  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
618}
619
620Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
621  unsigned NumVals = N->getNumValues();
622  if (!NumVals)
623    return Sched::RegPressure;
624
625  for (unsigned i = 0; i != NumVals; ++i) {
626    EVT VT = N->getValueType(i);
627    if (VT.isFloatingPoint() || VT.isVector())
628      return Sched::Latency;
629  }
630
631  if (!N->isMachineOpcode())
632    return Sched::RegPressure;
633
634  // Load are scheduled for latency even if there instruction itinerary
635  // is not available.
636  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
637  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
638  if (TID.mayLoad())
639    return Sched::Latency;
640
641  const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
642  if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
643    return Sched::Latency;
644  return Sched::RegPressure;
645}
646
647//===----------------------------------------------------------------------===//
648// Lowering Code
649//===----------------------------------------------------------------------===//
650
651/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
652static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
653  switch (CC) {
654  default: llvm_unreachable("Unknown condition code!");
655  case ISD::SETNE:  return ARMCC::NE;
656  case ISD::SETEQ:  return ARMCC::EQ;
657  case ISD::SETGT:  return ARMCC::GT;
658  case ISD::SETGE:  return ARMCC::GE;
659  case ISD::SETLT:  return ARMCC::LT;
660  case ISD::SETLE:  return ARMCC::LE;
661  case ISD::SETUGT: return ARMCC::HI;
662  case ISD::SETUGE: return ARMCC::HS;
663  case ISD::SETULT: return ARMCC::LO;
664  case ISD::SETULE: return ARMCC::LS;
665  }
666}
667
668/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
669static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
670                        ARMCC::CondCodes &CondCode2) {
671  CondCode2 = ARMCC::AL;
672  switch (CC) {
673  default: llvm_unreachable("Unknown FP condition!");
674  case ISD::SETEQ:
675  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
676  case ISD::SETGT:
677  case ISD::SETOGT: CondCode = ARMCC::GT; break;
678  case ISD::SETGE:
679  case ISD::SETOGE: CondCode = ARMCC::GE; break;
680  case ISD::SETOLT: CondCode = ARMCC::MI; break;
681  case ISD::SETOLE: CondCode = ARMCC::LS; break;
682  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
683  case ISD::SETO:   CondCode = ARMCC::VC; break;
684  case ISD::SETUO:  CondCode = ARMCC::VS; break;
685  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
686  case ISD::SETUGT: CondCode = ARMCC::HI; break;
687  case ISD::SETUGE: CondCode = ARMCC::PL; break;
688  case ISD::SETLT:
689  case ISD::SETULT: CondCode = ARMCC::LT; break;
690  case ISD::SETLE:
691  case ISD::SETULE: CondCode = ARMCC::LE; break;
692  case ISD::SETNE:
693  case ISD::SETUNE: CondCode = ARMCC::NE; break;
694  }
695}
696
697//===----------------------------------------------------------------------===//
698//                      Calling Convention Implementation
699//===----------------------------------------------------------------------===//
700
701#include "ARMGenCallingConv.inc"
702
703// APCS f64 is in register pairs, possibly split to stack
704static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
705                          CCValAssign::LocInfo &LocInfo,
706                          CCState &State, bool CanFail) {
707  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
708
709  // Try to get the first register.
710  if (unsigned Reg = State.AllocateReg(RegList, 4))
711    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
712  else {
713    // For the 2nd half of a v2f64, do not fail.
714    if (CanFail)
715      return false;
716
717    // Put the whole thing on the stack.
718    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
719                                           State.AllocateStack(8, 4),
720                                           LocVT, LocInfo));
721    return true;
722  }
723
724  // Try to get the second register.
725  if (unsigned Reg = State.AllocateReg(RegList, 4))
726    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
727  else
728    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
729                                           State.AllocateStack(4, 4),
730                                           LocVT, LocInfo));
731  return true;
732}
733
734static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
735                                   CCValAssign::LocInfo &LocInfo,
736                                   ISD::ArgFlagsTy &ArgFlags,
737                                   CCState &State) {
738  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
739    return false;
740  if (LocVT == MVT::v2f64 &&
741      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
742    return false;
743  return true;  // we handled it
744}
745
746// AAPCS f64 is in aligned register pairs
747static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
748                           CCValAssign::LocInfo &LocInfo,
749                           CCState &State, bool CanFail) {
750  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
751  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
752
753  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
754  if (Reg == 0) {
755    // For the 2nd half of a v2f64, do not just fail.
756    if (CanFail)
757      return false;
758
759    // Put the whole thing on the stack.
760    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
761                                           State.AllocateStack(8, 8),
762                                           LocVT, LocInfo));
763    return true;
764  }
765
766  unsigned i;
767  for (i = 0; i < 2; ++i)
768    if (HiRegList[i] == Reg)
769      break;
770
771  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
772  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
773                                         LocVT, LocInfo));
774  return true;
775}
776
777static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
778                                    CCValAssign::LocInfo &LocInfo,
779                                    ISD::ArgFlagsTy &ArgFlags,
780                                    CCState &State) {
781  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
782    return false;
783  if (LocVT == MVT::v2f64 &&
784      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
785    return false;
786  return true;  // we handled it
787}
788
789static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
790                         CCValAssign::LocInfo &LocInfo, CCState &State) {
791  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
792  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
793
794  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
795  if (Reg == 0)
796    return false; // we didn't handle it
797
798  unsigned i;
799  for (i = 0; i < 2; ++i)
800    if (HiRegList[i] == Reg)
801      break;
802
803  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
804  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
805                                         LocVT, LocInfo));
806  return true;
807}
808
809static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
810                                      CCValAssign::LocInfo &LocInfo,
811                                      ISD::ArgFlagsTy &ArgFlags,
812                                      CCState &State) {
813  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
814    return false;
815  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
816    return false;
817  return true;  // we handled it
818}
819
820static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
821                                       CCValAssign::LocInfo &LocInfo,
822                                       ISD::ArgFlagsTy &ArgFlags,
823                                       CCState &State) {
824  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
825                                   State);
826}
827
828/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
829/// given CallingConvention value.
830CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
831                                                 bool Return,
832                                                 bool isVarArg) const {
833  switch (CC) {
834  default:
835    llvm_unreachable("Unsupported calling convention");
836  case CallingConv::C:
837  case CallingConv::Fast:
838    // Use target triple & subtarget features to do actual dispatch.
839    if (Subtarget->isAAPCS_ABI()) {
840      if (Subtarget->hasVFP2() &&
841          FloatABIType == FloatABI::Hard && !isVarArg)
842        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
843      else
844        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
845    } else
846        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
847  case CallingConv::ARM_AAPCS_VFP:
848    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
849  case CallingConv::ARM_AAPCS:
850    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
851  case CallingConv::ARM_APCS:
852    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
853  }
854}
855
856/// LowerCallResult - Lower the result values of a call into the
857/// appropriate copies out of appropriate physical registers.
858SDValue
859ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
860                                   CallingConv::ID CallConv, bool isVarArg,
861                                   const SmallVectorImpl<ISD::InputArg> &Ins,
862                                   DebugLoc dl, SelectionDAG &DAG,
863                                   SmallVectorImpl<SDValue> &InVals) const {
864
865  // Assign locations to each value returned by this call.
866  SmallVector<CCValAssign, 16> RVLocs;
867  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
868                 RVLocs, *DAG.getContext());
869  CCInfo.AnalyzeCallResult(Ins,
870                           CCAssignFnForNode(CallConv, /* Return*/ true,
871                                             isVarArg));
872
873  // Copy all of the result registers out of their specified physreg.
874  for (unsigned i = 0; i != RVLocs.size(); ++i) {
875    CCValAssign VA = RVLocs[i];
876
877    SDValue Val;
878    if (VA.needsCustom()) {
879      // Handle f64 or half of a v2f64.
880      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
881                                      InFlag);
882      Chain = Lo.getValue(1);
883      InFlag = Lo.getValue(2);
884      VA = RVLocs[++i]; // skip ahead to next loc
885      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
886                                      InFlag);
887      Chain = Hi.getValue(1);
888      InFlag = Hi.getValue(2);
889      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
890
891      if (VA.getLocVT() == MVT::v2f64) {
892        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
893        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
894                          DAG.getConstant(0, MVT::i32));
895
896        VA = RVLocs[++i]; // skip ahead to next loc
897        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
898        Chain = Lo.getValue(1);
899        InFlag = Lo.getValue(2);
900        VA = RVLocs[++i]; // skip ahead to next loc
901        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
902        Chain = Hi.getValue(1);
903        InFlag = Hi.getValue(2);
904        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
905        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
906                          DAG.getConstant(1, MVT::i32));
907      }
908    } else {
909      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
910                               InFlag);
911      Chain = Val.getValue(1);
912      InFlag = Val.getValue(2);
913    }
914
915    switch (VA.getLocInfo()) {
916    default: llvm_unreachable("Unknown loc info!");
917    case CCValAssign::Full: break;
918    case CCValAssign::BCvt:
919      Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
920      break;
921    }
922
923    InVals.push_back(Val);
924  }
925
926  return Chain;
927}
928
929/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
930/// by "Src" to address "Dst" of size "Size".  Alignment information is
931/// specified by the specific parameter attribute.  The copy will be passed as
932/// a byval function parameter.
933/// Sometimes what we are copying is the end of a larger object, the part that
934/// does not fit in registers.
935static SDValue
936CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
937                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
938                          DebugLoc dl) {
939  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
940  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
941                       /*isVolatile=*/false, /*AlwaysInline=*/false,
942                       NULL, 0, NULL, 0);
943}
944
945/// LowerMemOpCallTo - Store the argument to the stack.
946SDValue
947ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
948                                    SDValue StackPtr, SDValue Arg,
949                                    DebugLoc dl, SelectionDAG &DAG,
950                                    const CCValAssign &VA,
951                                    ISD::ArgFlagsTy Flags) const {
952  unsigned LocMemOffset = VA.getLocMemOffset();
953  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
954  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
955  if (Flags.isByVal()) {
956    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
957  }
958  return DAG.getStore(Chain, dl, Arg, PtrOff,
959                      PseudoSourceValue::getStack(), LocMemOffset,
960                      false, false, 0);
961}
962
963void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
964                                         SDValue Chain, SDValue &Arg,
965                                         RegsToPassVector &RegsToPass,
966                                         CCValAssign &VA, CCValAssign &NextVA,
967                                         SDValue &StackPtr,
968                                         SmallVector<SDValue, 8> &MemOpChains,
969                                         ISD::ArgFlagsTy Flags) const {
970
971  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
972                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
973  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
974
975  if (NextVA.isRegLoc())
976    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
977  else {
978    assert(NextVA.isMemLoc());
979    if (StackPtr.getNode() == 0)
980      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
981
982    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
983                                           dl, DAG, NextVA,
984                                           Flags));
985  }
986}
987
988/// LowerCall - Lowering a call into a callseq_start <-
989/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
990/// nodes.
991SDValue
992ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
993                             CallingConv::ID CallConv, bool isVarArg,
994                             bool &isTailCall,
995                             const SmallVectorImpl<ISD::OutputArg> &Outs,
996                             const SmallVectorImpl<ISD::InputArg> &Ins,
997                             DebugLoc dl, SelectionDAG &DAG,
998                             SmallVectorImpl<SDValue> &InVals) const {
999  MachineFunction &MF = DAG.getMachineFunction();
1000  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1001  bool IsSibCall = false;
1002  // Temporarily disable tail calls so things don't break.
1003  if (!EnableARMTailCalls)
1004    isTailCall = false;
1005  if (isTailCall) {
1006    // Check if it's really possible to do a tail call.
1007    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1008                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1009                                                   Outs, Ins, DAG);
1010    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1011    // detected sibcalls.
1012    if (isTailCall) {
1013      ++NumTailCalls;
1014      IsSibCall = true;
1015    }
1016  }
1017
1018  // Analyze operands of the call, assigning locations to each operand.
1019  SmallVector<CCValAssign, 16> ArgLocs;
1020  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1021                 *DAG.getContext());
1022  CCInfo.AnalyzeCallOperands(Outs,
1023                             CCAssignFnForNode(CallConv, /* Return*/ false,
1024                                               isVarArg));
1025
1026  // Get a count of how many bytes are to be pushed on the stack.
1027  unsigned NumBytes = CCInfo.getNextStackOffset();
1028
1029  // For tail calls, memory operands are available in our caller's stack.
1030  if (IsSibCall)
1031    NumBytes = 0;
1032
1033  // Adjust the stack pointer for the new arguments...
1034  // These operations are automatically eliminated by the prolog/epilog pass
1035  if (!IsSibCall)
1036    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1037
1038  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1039
1040  RegsToPassVector RegsToPass;
1041  SmallVector<SDValue, 8> MemOpChains;
1042
1043  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1044  // of tail call optimization, arguments are handled later.
1045  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1046       i != e;
1047       ++i, ++realArgIdx) {
1048    CCValAssign &VA = ArgLocs[i];
1049    SDValue Arg = Outs[realArgIdx].Val;
1050    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1051
1052    // Promote the value if needed.
1053    switch (VA.getLocInfo()) {
1054    default: llvm_unreachable("Unknown loc info!");
1055    case CCValAssign::Full: break;
1056    case CCValAssign::SExt:
1057      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1058      break;
1059    case CCValAssign::ZExt:
1060      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1061      break;
1062    case CCValAssign::AExt:
1063      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1064      break;
1065    case CCValAssign::BCvt:
1066      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1067      break;
1068    }
1069
1070    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1071    if (VA.needsCustom()) {
1072      if (VA.getLocVT() == MVT::v2f64) {
1073        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1074                                  DAG.getConstant(0, MVT::i32));
1075        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1076                                  DAG.getConstant(1, MVT::i32));
1077
1078        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1079                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1080
1081        VA = ArgLocs[++i]; // skip ahead to next loc
1082        if (VA.isRegLoc()) {
1083          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1084                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1085        } else {
1086          assert(VA.isMemLoc());
1087
1088          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1089                                                 dl, DAG, VA, Flags));
1090        }
1091      } else {
1092        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1093                         StackPtr, MemOpChains, Flags);
1094      }
1095    } else if (VA.isRegLoc()) {
1096      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1097    } else {
1098      assert(VA.isMemLoc());
1099
1100      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1101                                             dl, DAG, VA, Flags));
1102    }
1103  }
1104
1105  if (!MemOpChains.empty())
1106    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1107                        &MemOpChains[0], MemOpChains.size());
1108
1109  // Build a sequence of copy-to-reg nodes chained together with token chain
1110  // and flag operands which copy the outgoing args into the appropriate regs.
1111  SDValue InFlag;
1112  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1113    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1114                             RegsToPass[i].second, InFlag);
1115    InFlag = Chain.getValue(1);
1116  }
1117
1118  // For tail calls lower the arguments to the 'real' stack slot.
1119  if (isTailCall) {
1120    // Force all the incoming stack arguments to be loaded from the stack
1121    // before any new outgoing arguments are stored to the stack, because the
1122    // outgoing stack slots may alias the incoming argument stack slots, and
1123    // the alias isn't otherwise explicit. This is slightly more conservative
1124    // than necessary, because it means that each store effectively depends
1125    // on every argument instead of just those arguments it would clobber.
1126
1127    // Do not flag preceeding copytoreg stuff together with the following stuff.
1128    InFlag = SDValue();
1129    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1130      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1131                               RegsToPass[i].second, InFlag);
1132      InFlag = Chain.getValue(1);
1133    }
1134    InFlag =SDValue();
1135  }
1136
1137  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1138  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1139  // node so that legalize doesn't hack it.
1140  bool isDirect = false;
1141  bool isARMFunc = false;
1142  bool isLocalARMFunc = false;
1143  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1144
1145  if (EnableARMLongCalls) {
1146    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1147            && "long-calls with non-static relocation model!");
1148    // Handle a global address or an external symbol. If it's not one of
1149    // those, the target's already in a register, so we don't need to do
1150    // anything extra.
1151    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1152      const GlobalValue *GV = G->getGlobal();
1153      // Create a constant pool entry for the callee address
1154      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1155      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1156                                                           ARMPCLabelIndex,
1157                                                           ARMCP::CPValue, 0);
1158      // Get the address of the callee into a register
1159      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1160      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1161      Callee = DAG.getLoad(getPointerTy(), dl,
1162                           DAG.getEntryNode(), CPAddr,
1163                           PseudoSourceValue::getConstantPool(), 0,
1164                           false, false, 0);
1165    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1166      const char *Sym = S->getSymbol();
1167
1168      // Create a constant pool entry for the callee address
1169      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1170      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1171                                                       Sym, ARMPCLabelIndex, 0);
1172      // Get the address of the callee into a register
1173      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1174      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1175      Callee = DAG.getLoad(getPointerTy(), dl,
1176                           DAG.getEntryNode(), CPAddr,
1177                           PseudoSourceValue::getConstantPool(), 0,
1178                           false, false, 0);
1179    }
1180  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1181    const GlobalValue *GV = G->getGlobal();
1182    isDirect = true;
1183    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1184    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1185                   getTargetMachine().getRelocationModel() != Reloc::Static;
1186    isARMFunc = !Subtarget->isThumb() || isStub;
1187    // ARM call to a local ARM function is predicable.
1188    isLocalARMFunc = !Subtarget->isThumb() && !isExt;
1189    // tBX takes a register source operand.
1190    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1191      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1192      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1193                                                           ARMPCLabelIndex,
1194                                                           ARMCP::CPValue, 4);
1195      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1196      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1197      Callee = DAG.getLoad(getPointerTy(), dl,
1198                           DAG.getEntryNode(), CPAddr,
1199                           PseudoSourceValue::getConstantPool(), 0,
1200                           false, false, 0);
1201      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1202      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1203                           getPointerTy(), Callee, PICLabel);
1204    } else
1205      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
1206  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1207    isDirect = true;
1208    bool isStub = Subtarget->isTargetDarwin() &&
1209                  getTargetMachine().getRelocationModel() != Reloc::Static;
1210    isARMFunc = !Subtarget->isThumb() || isStub;
1211    // tBX takes a register source operand.
1212    const char *Sym = S->getSymbol();
1213    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1214      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1215      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1216                                                       Sym, ARMPCLabelIndex, 4);
1217      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1218      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1219      Callee = DAG.getLoad(getPointerTy(), dl,
1220                           DAG.getEntryNode(), CPAddr,
1221                           PseudoSourceValue::getConstantPool(), 0,
1222                           false, false, 0);
1223      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1224      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1225                           getPointerTy(), Callee, PICLabel);
1226    } else
1227      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1228  }
1229
1230  // FIXME: handle tail calls differently.
1231  unsigned CallOpc;
1232  if (Subtarget->isThumb()) {
1233    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1234      CallOpc = ARMISD::CALL_NOLINK;
1235    else
1236      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1237  } else {
1238    CallOpc = (isDirect || Subtarget->hasV5TOps())
1239      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1240      : ARMISD::CALL_NOLINK;
1241  }
1242  if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
1243    // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
1244    Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
1245    InFlag = Chain.getValue(1);
1246  }
1247
1248  std::vector<SDValue> Ops;
1249  Ops.push_back(Chain);
1250  Ops.push_back(Callee);
1251
1252  // Add argument registers to the end of the list so that they are known live
1253  // into the call.
1254  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1255    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1256                                  RegsToPass[i].second.getValueType()));
1257
1258  if (InFlag.getNode())
1259    Ops.push_back(InFlag);
1260
1261  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1262  if (isTailCall)
1263    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1264
1265  // Returns a chain and a flag for retval copy to use.
1266  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1267  InFlag = Chain.getValue(1);
1268
1269  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1270                             DAG.getIntPtrConstant(0, true), InFlag);
1271  if (!Ins.empty())
1272    InFlag = Chain.getValue(1);
1273
1274  // Handle result values, copying them out of physregs into vregs that we
1275  // return.
1276  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1277                         dl, DAG, InVals);
1278}
1279
1280/// MatchingStackOffset - Return true if the given stack call argument is
1281/// already available in the same position (relatively) of the caller's
1282/// incoming argument stack.
1283static
1284bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1285                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1286                         const ARMInstrInfo *TII) {
1287  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1288  int FI = INT_MAX;
1289  if (Arg.getOpcode() == ISD::CopyFromReg) {
1290    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1291    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
1292      return false;
1293    MachineInstr *Def = MRI->getVRegDef(VR);
1294    if (!Def)
1295      return false;
1296    if (!Flags.isByVal()) {
1297      if (!TII->isLoadFromStackSlot(Def, FI))
1298        return false;
1299    } else {
1300//      unsigned Opcode = Def->getOpcode();
1301//      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
1302//          Def->getOperand(1).isFI()) {
1303//        FI = Def->getOperand(1).getIndex();
1304//        Bytes = Flags.getByValSize();
1305//      } else
1306        return false;
1307    }
1308  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1309    if (Flags.isByVal())
1310      // ByVal argument is passed in as a pointer but it's now being
1311      // dereferenced. e.g.
1312      // define @foo(%struct.X* %A) {
1313      //   tail call @bar(%struct.X* byval %A)
1314      // }
1315      return false;
1316    SDValue Ptr = Ld->getBasePtr();
1317    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1318    if (!FINode)
1319      return false;
1320    FI = FINode->getIndex();
1321  } else
1322    return false;
1323
1324  assert(FI != INT_MAX);
1325  if (!MFI->isFixedObjectIndex(FI))
1326    return false;
1327  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1328}
1329
1330/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1331/// for tail call optimization. Targets which want to do tail call
1332/// optimization should implement this function.
1333bool
1334ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1335                                                     CallingConv::ID CalleeCC,
1336                                                     bool isVarArg,
1337                                                     bool isCalleeStructRet,
1338                                                     bool isCallerStructRet,
1339                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1340                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1341                                                     SelectionDAG& DAG) const {
1342
1343  const Function *CallerF = DAG.getMachineFunction().getFunction();
1344  CallingConv::ID CallerCC = CallerF->getCallingConv();
1345  bool CCMatch = CallerCC == CalleeCC;
1346
1347  // Look for obvious safe cases to perform tail call optimization that do not
1348  // require ABI changes. This is what gcc calls sibcall.
1349
1350  // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
1351  // emit a special epilogue.
1352  // Not sure yet if this is true on ARM.
1353//??  if (RegInfo->needsStackRealignment(MF))
1354//??    return false;
1355
1356  // Do not sibcall optimize vararg calls unless the call site is not passing any
1357  // arguments.
1358  if (isVarArg && !Outs.empty())
1359    return false;
1360
1361  // Also avoid sibcall optimization if either caller or callee uses struct
1362  // return semantics.
1363  if (isCalleeStructRet || isCallerStructRet)
1364    return false;
1365
1366  // If the calling conventions do not match, then we'd better make sure the
1367  // results are returned in the same way as what the caller expects.
1368  if (!CCMatch) {
1369    SmallVector<CCValAssign, 16> RVLocs1;
1370    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
1371                    RVLocs1, *DAG.getContext());
1372    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1373
1374    SmallVector<CCValAssign, 16> RVLocs2;
1375    CCState CCInfo2(CallerCC, false, getTargetMachine(),
1376                    RVLocs2, *DAG.getContext());
1377    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1378
1379    if (RVLocs1.size() != RVLocs2.size())
1380      return false;
1381    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1382      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1383        return false;
1384      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1385        return false;
1386      if (RVLocs1[i].isRegLoc()) {
1387        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1388          return false;
1389      } else {
1390        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1391          return false;
1392      }
1393    }
1394  }
1395
1396  // If the callee takes no arguments then go on to check the results of the
1397  // call.
1398  if (!Outs.empty()) {
1399    // Check if stack adjustment is needed. For now, do not do this if any
1400    // argument is passed on the stack.
1401    SmallVector<CCValAssign, 16> ArgLocs;
1402    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
1403                   ArgLocs, *DAG.getContext());
1404    CCInfo.AnalyzeCallOperands(Outs,
1405                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1406    if (CCInfo.getNextStackOffset()) {
1407      MachineFunction &MF = DAG.getMachineFunction();
1408
1409      // Check if the arguments are already laid out in the right way as
1410      // the caller's fixed stack objects.
1411      MachineFrameInfo *MFI = MF.getFrameInfo();
1412      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1413      const ARMInstrInfo *TII =
1414        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1415      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1416           i != e;
1417           ++i, ++realArgIdx) {
1418        CCValAssign &VA = ArgLocs[i];
1419        EVT RegVT = VA.getLocVT();
1420        SDValue Arg = Outs[realArgIdx].Val;
1421        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1422        if (VA.getLocInfo() == CCValAssign::Indirect)
1423          return false;
1424        if (VA.needsCustom()) {
1425          // f64 and vector types are split into multiple registers or
1426          // register/stack-slot combinations.  The types will not match
1427          // the registers; give up on memory f64 refs until we figure
1428          // out what to do about this.
1429          if (!VA.isRegLoc())
1430            return false;
1431          if (!ArgLocs[++i].isRegLoc())
1432            return false;
1433          if (RegVT == MVT::v2f64) {
1434            if (!ArgLocs[++i].isRegLoc())
1435              return false;
1436            if (!ArgLocs[++i].isRegLoc())
1437              return false;
1438          }
1439        } else if (!VA.isRegLoc()) {
1440          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1441                                   MFI, MRI, TII))
1442            return false;
1443        }
1444      }
1445    }
1446  }
1447
1448  return true;
1449}
1450
1451SDValue
1452ARMTargetLowering::LowerReturn(SDValue Chain,
1453                               CallingConv::ID CallConv, bool isVarArg,
1454                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1455                               DebugLoc dl, SelectionDAG &DAG) const {
1456
1457  // CCValAssign - represent the assignment of the return value to a location.
1458  SmallVector<CCValAssign, 16> RVLocs;
1459
1460  // CCState - Info about the registers and stack slots.
1461  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1462                 *DAG.getContext());
1463
1464  // Analyze outgoing return values.
1465  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1466                                               isVarArg));
1467
1468  // If this is the first return lowered for this function, add
1469  // the regs to the liveout set for the function.
1470  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1471    for (unsigned i = 0; i != RVLocs.size(); ++i)
1472      if (RVLocs[i].isRegLoc())
1473        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1474  }
1475
1476  SDValue Flag;
1477
1478  // Copy the result values into the output registers.
1479  for (unsigned i = 0, realRVLocIdx = 0;
1480       i != RVLocs.size();
1481       ++i, ++realRVLocIdx) {
1482    CCValAssign &VA = RVLocs[i];
1483    assert(VA.isRegLoc() && "Can only return in registers!");
1484
1485    SDValue Arg = Outs[realRVLocIdx].Val;
1486
1487    switch (VA.getLocInfo()) {
1488    default: llvm_unreachable("Unknown loc info!");
1489    case CCValAssign::Full: break;
1490    case CCValAssign::BCvt:
1491      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1492      break;
1493    }
1494
1495    if (VA.needsCustom()) {
1496      if (VA.getLocVT() == MVT::v2f64) {
1497        // Extract the first half and return it in two registers.
1498        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1499                                   DAG.getConstant(0, MVT::i32));
1500        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1501                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
1502
1503        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1504        Flag = Chain.getValue(1);
1505        VA = RVLocs[++i]; // skip ahead to next loc
1506        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1507                                 HalfGPRs.getValue(1), Flag);
1508        Flag = Chain.getValue(1);
1509        VA = RVLocs[++i]; // skip ahead to next loc
1510
1511        // Extract the 2nd half and fall through to handle it as an f64 value.
1512        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1513                          DAG.getConstant(1, MVT::i32));
1514      }
1515      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1516      // available.
1517      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1518                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1519      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1520      Flag = Chain.getValue(1);
1521      VA = RVLocs[++i]; // skip ahead to next loc
1522      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1523                               Flag);
1524    } else
1525      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1526
1527    // Guarantee that all emitted copies are
1528    // stuck together, avoiding something bad.
1529    Flag = Chain.getValue(1);
1530  }
1531
1532  SDValue result;
1533  if (Flag.getNode())
1534    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1535  else // Return Void
1536    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1537
1538  return result;
1539}
1540
1541// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1542// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1543// one of the above mentioned nodes. It has to be wrapped because otherwise
1544// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1545// be used to form addressing mode. These wrapped nodes will be selected
1546// into MOVi.
1547static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1548  EVT PtrVT = Op.getValueType();
1549  // FIXME there is no actual debug info here
1550  DebugLoc dl = Op.getDebugLoc();
1551  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1552  SDValue Res;
1553  if (CP->isMachineConstantPoolEntry())
1554    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1555                                    CP->getAlignment());
1556  else
1557    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1558                                    CP->getAlignment());
1559  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1560}
1561
1562SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1563                                             SelectionDAG &DAG) const {
1564  MachineFunction &MF = DAG.getMachineFunction();
1565  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1566  unsigned ARMPCLabelIndex = 0;
1567  DebugLoc DL = Op.getDebugLoc();
1568  EVT PtrVT = getPointerTy();
1569  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1570  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1571  SDValue CPAddr;
1572  if (RelocM == Reloc::Static) {
1573    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1574  } else {
1575    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1576    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1577    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1578                                                         ARMCP::CPBlockAddress,
1579                                                         PCAdj);
1580    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1581  }
1582  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1583  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1584                               PseudoSourceValue::getConstantPool(), 0,
1585                               false, false, 0);
1586  if (RelocM == Reloc::Static)
1587    return Result;
1588  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1589  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1590}
1591
1592// Lower ISD::GlobalTLSAddress using the "general dynamic" model
1593SDValue
1594ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1595                                                 SelectionDAG &DAG) const {
1596  DebugLoc dl = GA->getDebugLoc();
1597  EVT PtrVT = getPointerTy();
1598  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1599  MachineFunction &MF = DAG.getMachineFunction();
1600  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1601  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1602  ARMConstantPoolValue *CPV =
1603    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1604                             ARMCP::CPValue, PCAdj, "tlsgd", true);
1605  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1606  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1607  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1608                         PseudoSourceValue::getConstantPool(), 0,
1609                         false, false, 0);
1610  SDValue Chain = Argument.getValue(1);
1611
1612  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1613  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1614
1615  // call __tls_get_addr.
1616  ArgListTy Args;
1617  ArgListEntry Entry;
1618  Entry.Node = Argument;
1619  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1620  Args.push_back(Entry);
1621  // FIXME: is there useful debug info available here?
1622  std::pair<SDValue, SDValue> CallResult =
1623    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1624                false, false, false, false,
1625                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1626                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1627  return CallResult.first;
1628}
1629
1630// Lower ISD::GlobalTLSAddress using the "initial exec" or
1631// "local exec" model.
1632SDValue
1633ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1634                                        SelectionDAG &DAG) const {
1635  const GlobalValue *GV = GA->getGlobal();
1636  DebugLoc dl = GA->getDebugLoc();
1637  SDValue Offset;
1638  SDValue Chain = DAG.getEntryNode();
1639  EVT PtrVT = getPointerTy();
1640  // Get the Thread Pointer
1641  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1642
1643  if (GV->isDeclaration()) {
1644    MachineFunction &MF = DAG.getMachineFunction();
1645    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1646    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1647    // Initial exec model.
1648    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1649    ARMConstantPoolValue *CPV =
1650      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1651                               ARMCP::CPValue, PCAdj, "gottpoff", true);
1652    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1653    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1654    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1655                         PseudoSourceValue::getConstantPool(), 0,
1656                         false, false, 0);
1657    Chain = Offset.getValue(1);
1658
1659    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1660    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1661
1662    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1663                         PseudoSourceValue::getConstantPool(), 0,
1664                         false, false, 0);
1665  } else {
1666    // local exec model
1667    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1668    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1669    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1670    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1671                         PseudoSourceValue::getConstantPool(), 0,
1672                         false, false, 0);
1673  }
1674
1675  // The address of the thread local variable is the add of the thread
1676  // pointer with the offset of the variable.
1677  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1678}
1679
1680SDValue
1681ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1682  // TODO: implement the "local dynamic" model
1683  assert(Subtarget->isTargetELF() &&
1684         "TLS not implemented for non-ELF targets");
1685  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1686  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1687  // otherwise use the "Local Exec" TLS Model
1688  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1689    return LowerToTLSGeneralDynamicModel(GA, DAG);
1690  else
1691    return LowerToTLSExecModels(GA, DAG);
1692}
1693
1694SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1695                                                 SelectionDAG &DAG) const {
1696  EVT PtrVT = getPointerTy();
1697  DebugLoc dl = Op.getDebugLoc();
1698  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1699  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1700  if (RelocM == Reloc::PIC_) {
1701    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1702    ARMConstantPoolValue *CPV =
1703      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1704    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1705    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1706    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1707                                 CPAddr,
1708                                 PseudoSourceValue::getConstantPool(), 0,
1709                                 false, false, 0);
1710    SDValue Chain = Result.getValue(1);
1711    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1712    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1713    if (!UseGOTOFF)
1714      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1715                           PseudoSourceValue::getGOT(), 0,
1716                           false, false, 0);
1717    return Result;
1718  } else {
1719    // If we have T2 ops, we can materialize the address directly via movt/movw
1720    // pair. This is always cheaper.
1721    if (Subtarget->useMovt()) {
1722      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1723                         DAG.getTargetGlobalAddress(GV, PtrVT));
1724    } else {
1725      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1726      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1727      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1728                         PseudoSourceValue::getConstantPool(), 0,
1729                         false, false, 0);
1730    }
1731  }
1732}
1733
1734SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1735                                                    SelectionDAG &DAG) const {
1736  MachineFunction &MF = DAG.getMachineFunction();
1737  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1738  unsigned ARMPCLabelIndex = 0;
1739  EVT PtrVT = getPointerTy();
1740  DebugLoc dl = Op.getDebugLoc();
1741  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1742  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1743  SDValue CPAddr;
1744  if (RelocM == Reloc::Static)
1745    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1746  else {
1747    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1748    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1749    ARMConstantPoolValue *CPV =
1750      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1751    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1752  }
1753  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1754
1755  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1756                               PseudoSourceValue::getConstantPool(), 0,
1757                               false, false, 0);
1758  SDValue Chain = Result.getValue(1);
1759
1760  if (RelocM == Reloc::PIC_) {
1761    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1762    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1763  }
1764
1765  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1766    Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1767                         PseudoSourceValue::getGOT(), 0,
1768                         false, false, 0);
1769
1770  return Result;
1771}
1772
1773SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1774                                                    SelectionDAG &DAG) const {
1775  assert(Subtarget->isTargetELF() &&
1776         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1777  MachineFunction &MF = DAG.getMachineFunction();
1778  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1779  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1780  EVT PtrVT = getPointerTy();
1781  DebugLoc dl = Op.getDebugLoc();
1782  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1783  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1784                                                       "_GLOBAL_OFFSET_TABLE_",
1785                                                       ARMPCLabelIndex, PCAdj);
1786  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1787  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1788  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1789                               PseudoSourceValue::getConstantPool(), 0,
1790                               false, false, 0);
1791  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1792  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1793}
1794
1795SDValue
1796ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
1797  DebugLoc dl = Op.getDebugLoc();
1798  SDValue Val = DAG.getConstant(0, MVT::i32);
1799  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
1800                     Op.getOperand(1), Val);
1801}
1802
1803SDValue
1804ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1805  DebugLoc dl = Op.getDebugLoc();
1806  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1807                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
1808}
1809
1810SDValue
1811ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1812                                           const ARMSubtarget *Subtarget)
1813                                             const {
1814  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1815  DebugLoc dl = Op.getDebugLoc();
1816  switch (IntNo) {
1817  default: return SDValue();    // Don't custom lower most intrinsics.
1818  case Intrinsic::arm_thread_pointer: {
1819    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1820    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1821  }
1822  case Intrinsic::eh_sjlj_lsda: {
1823    MachineFunction &MF = DAG.getMachineFunction();
1824    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1825    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1826    EVT PtrVT = getPointerTy();
1827    DebugLoc dl = Op.getDebugLoc();
1828    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1829    SDValue CPAddr;
1830    unsigned PCAdj = (RelocM != Reloc::PIC_)
1831      ? 0 : (Subtarget->isThumb() ? 4 : 8);
1832    ARMConstantPoolValue *CPV =
1833      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
1834                               ARMCP::CPLSDA, PCAdj);
1835    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1836    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1837    SDValue Result =
1838      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1839                  PseudoSourceValue::getConstantPool(), 0,
1840                  false, false, 0);
1841    SDValue Chain = Result.getValue(1);
1842
1843    if (RelocM == Reloc::PIC_) {
1844      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1845      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1846    }
1847    return Result;
1848  }
1849  }
1850}
1851
1852static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
1853                          const ARMSubtarget *Subtarget) {
1854  DebugLoc dl = Op.getDebugLoc();
1855  SDValue Op5 = Op.getOperand(5);
1856  SDValue Res;
1857  unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
1858  if (isDeviceBarrier) {
1859    if (Subtarget->hasV7Ops())
1860      Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
1861    else
1862      Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
1863                        DAG.getConstant(0, MVT::i32));
1864  } else {
1865    if (Subtarget->hasV7Ops())
1866      Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
1867    else
1868      Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
1869                        DAG.getConstant(0, MVT::i32));
1870  }
1871  return Res;
1872}
1873
1874static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
1875  MachineFunction &MF = DAG.getMachineFunction();
1876  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
1877
1878  // vastart just stores the address of the VarArgsFrameIndex slot into the
1879  // memory location argument.
1880  DebugLoc dl = Op.getDebugLoc();
1881  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1882  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1883  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1884  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
1885                      false, false, 0);
1886}
1887
1888SDValue
1889ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1890                                           SelectionDAG &DAG) const {
1891  SDNode *Node = Op.getNode();
1892  DebugLoc dl = Node->getDebugLoc();
1893  EVT VT = Node->getValueType(0);
1894  SDValue Chain = Op.getOperand(0);
1895  SDValue Size  = Op.getOperand(1);
1896  SDValue Align = Op.getOperand(2);
1897
1898  // Chain the dynamic stack allocation so that it doesn't modify the stack
1899  // pointer when other instructions are using the stack.
1900  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
1901
1902  unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
1903  unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
1904  if (AlignVal > StackAlign)
1905    // Do this now since selection pass cannot introduce new target
1906    // independent node.
1907    Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
1908
1909  // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
1910  // using a "add r, sp, r" instead. Negate the size now so we don't have to
1911  // do even more horrible hack later.
1912  MachineFunction &MF = DAG.getMachineFunction();
1913  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1914  if (AFI->isThumb1OnlyFunction()) {
1915    bool Negate = true;
1916    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
1917    if (C) {
1918      uint32_t Val = C->getZExtValue();
1919      if (Val <= 508 && ((Val & 3) == 0))
1920        Negate = false;
1921    }
1922    if (Negate)
1923      Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
1924  }
1925
1926  SDVTList VTList = DAG.getVTList(VT, MVT::Other);
1927  SDValue Ops1[] = { Chain, Size, Align };
1928  SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
1929  Chain = Res.getValue(1);
1930  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
1931                             DAG.getIntPtrConstant(0, true), SDValue());
1932  SDValue Ops2[] = { Res, Chain };
1933  return DAG.getMergeValues(Ops2, 2, dl);
1934}
1935
1936SDValue
1937ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
1938                                        SDValue &Root, SelectionDAG &DAG,
1939                                        DebugLoc dl) const {
1940  MachineFunction &MF = DAG.getMachineFunction();
1941  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1942
1943  TargetRegisterClass *RC;
1944  if (AFI->isThumb1OnlyFunction())
1945    RC = ARM::tGPRRegisterClass;
1946  else
1947    RC = ARM::GPRRegisterClass;
1948
1949  // Transform the arguments stored in physical registers into virtual ones.
1950  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1951  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1952
1953  SDValue ArgValue2;
1954  if (NextVA.isMemLoc()) {
1955    MachineFrameInfo *MFI = MF.getFrameInfo();
1956    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
1957
1958    // Create load node to retrieve arguments from the stack.
1959    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1960    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
1961                            PseudoSourceValue::getFixedStack(FI), 0,
1962                            false, false, 0);
1963  } else {
1964    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1965    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1966  }
1967
1968  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
1969}
1970
1971SDValue
1972ARMTargetLowering::LowerFormalArguments(SDValue Chain,
1973                                        CallingConv::ID CallConv, bool isVarArg,
1974                                        const SmallVectorImpl<ISD::InputArg>
1975                                          &Ins,
1976                                        DebugLoc dl, SelectionDAG &DAG,
1977                                        SmallVectorImpl<SDValue> &InVals)
1978                                          const {
1979
1980  MachineFunction &MF = DAG.getMachineFunction();
1981  MachineFrameInfo *MFI = MF.getFrameInfo();
1982
1983  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1984
1985  // Assign locations to all of the incoming arguments.
1986  SmallVector<CCValAssign, 16> ArgLocs;
1987  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1988                 *DAG.getContext());
1989  CCInfo.AnalyzeFormalArguments(Ins,
1990                                CCAssignFnForNode(CallConv, /* Return*/ false,
1991                                                  isVarArg));
1992
1993  SmallVector<SDValue, 16> ArgValues;
1994
1995  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1996    CCValAssign &VA = ArgLocs[i];
1997
1998    // Arguments stored in registers.
1999    if (VA.isRegLoc()) {
2000      EVT RegVT = VA.getLocVT();
2001
2002      SDValue ArgValue;
2003      if (VA.needsCustom()) {
2004        // f64 and vector types are split up into multiple registers or
2005        // combinations of registers and stack slots.
2006        if (VA.getLocVT() == MVT::v2f64) {
2007          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2008                                                   Chain, DAG, dl);
2009          VA = ArgLocs[++i]; // skip ahead to next loc
2010          SDValue ArgValue2;
2011          if (VA.isMemLoc()) {
2012            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
2013                                            true, false);
2014            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2015            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2016                                    PseudoSourceValue::getFixedStack(FI), 0,
2017                                    false, false, 0);
2018          } else {
2019            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2020                                             Chain, DAG, dl);
2021          }
2022          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2023          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2024                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2025          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2026                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2027        } else
2028          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2029
2030      } else {
2031        TargetRegisterClass *RC;
2032
2033        if (RegVT == MVT::f32)
2034          RC = ARM::SPRRegisterClass;
2035        else if (RegVT == MVT::f64)
2036          RC = ARM::DPRRegisterClass;
2037        else if (RegVT == MVT::v2f64)
2038          RC = ARM::QPRRegisterClass;
2039        else if (RegVT == MVT::i32)
2040          RC = (AFI->isThumb1OnlyFunction() ?
2041                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2042        else
2043          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2044
2045        // Transform the arguments in physical registers into virtual ones.
2046        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2047        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2048      }
2049
2050      // If this is an 8 or 16-bit value, it is really passed promoted
2051      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2052      // truncate to the right size.
2053      switch (VA.getLocInfo()) {
2054      default: llvm_unreachable("Unknown loc info!");
2055      case CCValAssign::Full: break;
2056      case CCValAssign::BCvt:
2057        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
2058        break;
2059      case CCValAssign::SExt:
2060        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2061                               DAG.getValueType(VA.getValVT()));
2062        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2063        break;
2064      case CCValAssign::ZExt:
2065        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2066                               DAG.getValueType(VA.getValVT()));
2067        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2068        break;
2069      }
2070
2071      InVals.push_back(ArgValue);
2072
2073    } else { // VA.isRegLoc()
2074
2075      // sanity check
2076      assert(VA.isMemLoc());
2077      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2078
2079      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
2080      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2081                                      true, false);
2082
2083      // Create load nodes to retrieve arguments from the stack.
2084      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2085      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2086                                   PseudoSourceValue::getFixedStack(FI), 0,
2087                                   false, false, 0));
2088    }
2089  }
2090
2091  // varargs
2092  if (isVarArg) {
2093    static const unsigned GPRArgRegs[] = {
2094      ARM::R0, ARM::R1, ARM::R2, ARM::R3
2095    };
2096
2097    unsigned NumGPRs = CCInfo.getFirstUnallocated
2098      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2099
2100    unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
2101    unsigned VARegSize = (4 - NumGPRs) * 4;
2102    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2103    unsigned ArgOffset = CCInfo.getNextStackOffset();
2104    if (VARegSaveSize) {
2105      // If this function is vararg, store any remaining integer argument regs
2106      // to their spots on the stack so that they may be loaded by deferencing
2107      // the result of va_next.
2108      AFI->setVarArgsRegSaveSize(VARegSaveSize);
2109      AFI->setVarArgsFrameIndex(
2110        MFI->CreateFixedObject(VARegSaveSize,
2111                               ArgOffset + VARegSaveSize - VARegSize,
2112                               true, false));
2113      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2114                                      getPointerTy());
2115
2116      SmallVector<SDValue, 4> MemOps;
2117      for (; NumGPRs < 4; ++NumGPRs) {
2118        TargetRegisterClass *RC;
2119        if (AFI->isThumb1OnlyFunction())
2120          RC = ARM::tGPRRegisterClass;
2121        else
2122          RC = ARM::GPRRegisterClass;
2123
2124        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
2125        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2126        SDValue Store =
2127          DAG.getStore(Val.getValue(1), dl, Val, FIN,
2128               PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
2129               0, false, false, 0);
2130        MemOps.push_back(Store);
2131        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2132                          DAG.getConstant(4, getPointerTy()));
2133      }
2134      if (!MemOps.empty())
2135        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2136                            &MemOps[0], MemOps.size());
2137    } else
2138      // This will point to the next argument passed via stack.
2139      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
2140                                                       true, false));
2141  }
2142
2143  return Chain;
2144}
2145
2146/// isFloatingPointZero - Return true if this is +0.0.
2147static bool isFloatingPointZero(SDValue Op) {
2148  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2149    return CFP->getValueAPF().isPosZero();
2150  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2151    // Maybe this has already been legalized into the constant pool?
2152    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2153      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2154      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2155        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2156          return CFP->getValueAPF().isPosZero();
2157    }
2158  }
2159  return false;
2160}
2161
2162/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2163/// the given operands.
2164SDValue
2165ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2166                             SDValue &ARMCC, SelectionDAG &DAG,
2167                             DebugLoc dl) const {
2168  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2169    unsigned C = RHSC->getZExtValue();
2170    if (!isLegalICmpImmediate(C)) {
2171      // Constant does not fit, try adjusting it by one?
2172      switch (CC) {
2173      default: break;
2174      case ISD::SETLT:
2175      case ISD::SETGE:
2176        if (isLegalICmpImmediate(C-1)) {
2177          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2178          RHS = DAG.getConstant(C-1, MVT::i32);
2179        }
2180        break;
2181      case ISD::SETULT:
2182      case ISD::SETUGE:
2183        if (C > 0 && isLegalICmpImmediate(C-1)) {
2184          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2185          RHS = DAG.getConstant(C-1, MVT::i32);
2186        }
2187        break;
2188      case ISD::SETLE:
2189      case ISD::SETGT:
2190        if (isLegalICmpImmediate(C+1)) {
2191          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2192          RHS = DAG.getConstant(C+1, MVT::i32);
2193        }
2194        break;
2195      case ISD::SETULE:
2196      case ISD::SETUGT:
2197        if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
2198          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2199          RHS = DAG.getConstant(C+1, MVT::i32);
2200        }
2201        break;
2202      }
2203    }
2204  }
2205
2206  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2207  ARMISD::NodeType CompareType;
2208  switch (CondCode) {
2209  default:
2210    CompareType = ARMISD::CMP;
2211    break;
2212  case ARMCC::EQ:
2213  case ARMCC::NE:
2214    // Uses only Z Flag
2215    CompareType = ARMISD::CMPZ;
2216    break;
2217  }
2218  ARMCC = DAG.getConstant(CondCode, MVT::i32);
2219  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2220}
2221
2222/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2223static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2224                         DebugLoc dl) {
2225  SDValue Cmp;
2226  if (!isFloatingPointZero(RHS))
2227    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
2228  else
2229    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
2230  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
2231}
2232
2233SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2234  EVT VT = Op.getValueType();
2235  SDValue LHS = Op.getOperand(0);
2236  SDValue RHS = Op.getOperand(1);
2237  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2238  SDValue TrueVal = Op.getOperand(2);
2239  SDValue FalseVal = Op.getOperand(3);
2240  DebugLoc dl = Op.getDebugLoc();
2241
2242  if (LHS.getValueType() == MVT::i32) {
2243    SDValue ARMCC;
2244    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2245    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2246    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
2247  }
2248
2249  ARMCC::CondCodes CondCode, CondCode2;
2250  FPCCToARMCC(CC, CondCode, CondCode2);
2251
2252  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2253  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2254  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2255  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2256                                 ARMCC, CCR, Cmp);
2257  if (CondCode2 != ARMCC::AL) {
2258    SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
2259    // FIXME: Needs another CMP because flag can have but one use.
2260    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2261    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2262                         Result, TrueVal, ARMCC2, CCR, Cmp2);
2263  }
2264  return Result;
2265}
2266
2267SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2268  SDValue  Chain = Op.getOperand(0);
2269  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2270  SDValue    LHS = Op.getOperand(2);
2271  SDValue    RHS = Op.getOperand(3);
2272  SDValue   Dest = Op.getOperand(4);
2273  DebugLoc dl = Op.getDebugLoc();
2274
2275  if (LHS.getValueType() == MVT::i32) {
2276    SDValue ARMCC;
2277    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2278    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2279    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2280                       Chain, Dest, ARMCC, CCR,Cmp);
2281  }
2282
2283  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2284  ARMCC::CondCodes CondCode, CondCode2;
2285  FPCCToARMCC(CC, CondCode, CondCode2);
2286
2287  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2288  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2289  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2290  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2291  SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
2292  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2293  if (CondCode2 != ARMCC::AL) {
2294    ARMCC = DAG.getConstant(CondCode2, MVT::i32);
2295    SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
2296    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2297  }
2298  return Res;
2299}
2300
2301SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2302  SDValue Chain = Op.getOperand(0);
2303  SDValue Table = Op.getOperand(1);
2304  SDValue Index = Op.getOperand(2);
2305  DebugLoc dl = Op.getDebugLoc();
2306
2307  EVT PTy = getPointerTy();
2308  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2309  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2310  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2311  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2312  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2313  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2314  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2315  if (Subtarget->isThumb2()) {
2316    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2317    // which does another jump to the destination. This also makes it easier
2318    // to translate it to TBB / TBH later.
2319    // FIXME: This might not work if the function is extremely large.
2320    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2321                       Addr, Op.getOperand(2), JTI, UId);
2322  }
2323  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2324    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2325                       PseudoSourceValue::getJumpTable(), 0,
2326                       false, false, 0);
2327    Chain = Addr.getValue(1);
2328    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2329    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2330  } else {
2331    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2332                       PseudoSourceValue::getJumpTable(), 0, false, false, 0);
2333    Chain = Addr.getValue(1);
2334    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2335  }
2336}
2337
2338static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2339  DebugLoc dl = Op.getDebugLoc();
2340  unsigned Opc;
2341
2342  switch (Op.getOpcode()) {
2343  default:
2344    assert(0 && "Invalid opcode!");
2345  case ISD::FP_TO_SINT:
2346    Opc = ARMISD::FTOSI;
2347    break;
2348  case ISD::FP_TO_UINT:
2349    Opc = ARMISD::FTOUI;
2350    break;
2351  }
2352  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2353  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2354}
2355
2356static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2357  EVT VT = Op.getValueType();
2358  DebugLoc dl = Op.getDebugLoc();
2359  unsigned Opc;
2360
2361  switch (Op.getOpcode()) {
2362  default:
2363    assert(0 && "Invalid opcode!");
2364  case ISD::SINT_TO_FP:
2365    Opc = ARMISD::SITOF;
2366    break;
2367  case ISD::UINT_TO_FP:
2368    Opc = ARMISD::UITOF;
2369    break;
2370  }
2371
2372  Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2373  return DAG.getNode(Opc, dl, VT, Op);
2374}
2375
2376static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
2377  // Implement fcopysign with a fabs and a conditional fneg.
2378  SDValue Tmp0 = Op.getOperand(0);
2379  SDValue Tmp1 = Op.getOperand(1);
2380  DebugLoc dl = Op.getDebugLoc();
2381  EVT VT = Op.getValueType();
2382  EVT SrcVT = Tmp1.getValueType();
2383  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2384  SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
2385  SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
2386  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2387  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
2388}
2389
2390SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2391  MachineFunction &MF = DAG.getMachineFunction();
2392  MachineFrameInfo *MFI = MF.getFrameInfo();
2393  MFI->setReturnAddressIsTaken(true);
2394
2395  EVT VT = Op.getValueType();
2396  DebugLoc dl = Op.getDebugLoc();
2397  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2398  if (Depth) {
2399    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2400    SDValue Offset = DAG.getConstant(4, MVT::i32);
2401    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2402                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2403                       NULL, 0, false, false, 0);
2404  }
2405
2406  // Return LR, which contains the return address. Mark it an implicit live-in.
2407  unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
2408  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2409}
2410
2411SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2412  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2413  MFI->setFrameAddressIsTaken(true);
2414
2415  EVT VT = Op.getValueType();
2416  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2417  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2418  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2419    ? ARM::R7 : ARM::R11;
2420  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2421  while (Depth--)
2422    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
2423                            false, false, 0);
2424  return FrameAddr;
2425}
2426
2427/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2428/// expand a bit convert where either the source or destination type is i64 to
2429/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2430/// operand type is illegal (e.g., v2f32 for a target that doesn't support
2431/// vectors), since the legalizer won't know what to do with that.
2432static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2433  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2434  DebugLoc dl = N->getDebugLoc();
2435  SDValue Op = N->getOperand(0);
2436
2437  // This function is only supposed to be called for i64 types, either as the
2438  // source or destination of the bit convert.
2439  EVT SrcVT = Op.getValueType();
2440  EVT DstVT = N->getValueType(0);
2441  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2442         "ExpandBIT_CONVERT called for non-i64 type");
2443
2444  // Turn i64->f64 into VMOVDRR.
2445  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2446    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2447                             DAG.getConstant(0, MVT::i32));
2448    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2449                             DAG.getConstant(1, MVT::i32));
2450    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2451  }
2452
2453  // Turn f64->i64 into VMOVRRD.
2454  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2455    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2456                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2457    // Merge the pieces into a single i64 value.
2458    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2459  }
2460
2461  return SDValue();
2462}
2463
2464/// getZeroVector - Returns a vector of specified type with all zero elements.
2465///
2466static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2467  assert(VT.isVector() && "Expected a vector type");
2468
2469  // Zero vectors are used to represent vector negation and in those cases
2470  // will be implemented with the NEON VNEG instruction.  However, VNEG does
2471  // not support i64 elements, so sometimes the zero vectors will need to be
2472  // explicitly constructed.  For those cases, and potentially other uses in
2473  // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
2474  // to their dest type.  This ensures they get CSE'd.
2475  SDValue Vec;
2476  SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
2477  SmallVector<SDValue, 8> Ops;
2478  MVT TVT;
2479
2480  if (VT.getSizeInBits() == 64) {
2481    Ops.assign(8, Cst); TVT = MVT::v8i8;
2482  } else {
2483    Ops.assign(16, Cst); TVT = MVT::v16i8;
2484  }
2485  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2486
2487  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2488}
2489
2490/// getOnesVector - Returns a vector of specified type with all bits set.
2491///
2492static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2493  assert(VT.isVector() && "Expected a vector type");
2494
2495  // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
2496  // dest type. This ensures they get CSE'd.
2497  SDValue Vec;
2498  SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
2499  SmallVector<SDValue, 8> Ops;
2500  MVT TVT;
2501
2502  if (VT.getSizeInBits() == 64) {
2503    Ops.assign(8, Cst); TVT = MVT::v8i8;
2504  } else {
2505    Ops.assign(16, Cst); TVT = MVT::v16i8;
2506  }
2507  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2508
2509  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2510}
2511
2512/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2513/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2514SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2515                                                SelectionDAG &DAG) const {
2516  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2517  EVT VT = Op.getValueType();
2518  unsigned VTBits = VT.getSizeInBits();
2519  DebugLoc dl = Op.getDebugLoc();
2520  SDValue ShOpLo = Op.getOperand(0);
2521  SDValue ShOpHi = Op.getOperand(1);
2522  SDValue ShAmt  = Op.getOperand(2);
2523  SDValue ARMCC;
2524  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2525
2526  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2527
2528  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2529                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2530  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2531  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2532                                   DAG.getConstant(VTBits, MVT::i32));
2533  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2534  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2535  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2536
2537  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2538  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2539                          ARMCC, DAG, dl);
2540  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2541  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
2542                           CCR, Cmp);
2543
2544  SDValue Ops[2] = { Lo, Hi };
2545  return DAG.getMergeValues(Ops, 2, dl);
2546}
2547
2548/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2549/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2550SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2551                                               SelectionDAG &DAG) const {
2552  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2553  EVT VT = Op.getValueType();
2554  unsigned VTBits = VT.getSizeInBits();
2555  DebugLoc dl = Op.getDebugLoc();
2556  SDValue ShOpLo = Op.getOperand(0);
2557  SDValue ShOpHi = Op.getOperand(1);
2558  SDValue ShAmt  = Op.getOperand(2);
2559  SDValue ARMCC;
2560
2561  assert(Op.getOpcode() == ISD::SHL_PARTS);
2562  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2563                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2564  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2565  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2566                                   DAG.getConstant(VTBits, MVT::i32));
2567  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2568  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2569
2570  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2571  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2572  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2573                          ARMCC, DAG, dl);
2574  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2575  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
2576                           CCR, Cmp);
2577
2578  SDValue Ops[2] = { Lo, Hi };
2579  return DAG.getMergeValues(Ops, 2, dl);
2580}
2581
2582static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2583                         const ARMSubtarget *ST) {
2584  EVT VT = N->getValueType(0);
2585  DebugLoc dl = N->getDebugLoc();
2586
2587  if (!ST->hasV6T2Ops())
2588    return SDValue();
2589
2590  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2591  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2592}
2593
2594static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2595                          const ARMSubtarget *ST) {
2596  EVT VT = N->getValueType(0);
2597  DebugLoc dl = N->getDebugLoc();
2598
2599  // Lower vector shifts on NEON to use VSHL.
2600  if (VT.isVector()) {
2601    assert(ST->hasNEON() && "unexpected vector shift");
2602
2603    // Left shifts translate directly to the vshiftu intrinsic.
2604    if (N->getOpcode() == ISD::SHL)
2605      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2606                         DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2607                         N->getOperand(0), N->getOperand(1));
2608
2609    assert((N->getOpcode() == ISD::SRA ||
2610            N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2611
2612    // NEON uses the same intrinsics for both left and right shifts.  For
2613    // right shifts, the shift amounts are negative, so negate the vector of
2614    // shift amounts.
2615    EVT ShiftVT = N->getOperand(1).getValueType();
2616    SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2617                                       getZeroVector(ShiftVT, DAG, dl),
2618                                       N->getOperand(1));
2619    Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2620                               Intrinsic::arm_neon_vshifts :
2621                               Intrinsic::arm_neon_vshiftu);
2622    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2623                       DAG.getConstant(vshiftInt, MVT::i32),
2624                       N->getOperand(0), NegatedCount);
2625  }
2626
2627  // We can get here for a node like i32 = ISD::SHL i32, i64
2628  if (VT != MVT::i64)
2629    return SDValue();
2630
2631  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2632         "Unknown shift to lower!");
2633
2634  // We only lower SRA, SRL of 1 here, all others use generic lowering.
2635  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2636      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2637    return SDValue();
2638
2639  // If we are in thumb mode, we don't have RRX.
2640  if (ST->isThumb1Only()) return SDValue();
2641
2642  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2643  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2644                           DAG.getConstant(0, MVT::i32));
2645  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2646                           DAG.getConstant(1, MVT::i32));
2647
2648  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2649  // captures the result into a carry flag.
2650  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2651  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2652
2653  // The low part is an ARMISD::RRX operand, which shifts the carry in.
2654  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2655
2656  // Merge the pieces into a single i64 value.
2657 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2658}
2659
2660static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2661  SDValue TmpOp0, TmpOp1;
2662  bool Invert = false;
2663  bool Swap = false;
2664  unsigned Opc = 0;
2665
2666  SDValue Op0 = Op.getOperand(0);
2667  SDValue Op1 = Op.getOperand(1);
2668  SDValue CC = Op.getOperand(2);
2669  EVT VT = Op.getValueType();
2670  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2671  DebugLoc dl = Op.getDebugLoc();
2672
2673  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2674    switch (SetCCOpcode) {
2675    default: llvm_unreachable("Illegal FP comparison"); break;
2676    case ISD::SETUNE:
2677    case ISD::SETNE:  Invert = true; // Fallthrough
2678    case ISD::SETOEQ:
2679    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2680    case ISD::SETOLT:
2681    case ISD::SETLT: Swap = true; // Fallthrough
2682    case ISD::SETOGT:
2683    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2684    case ISD::SETOLE:
2685    case ISD::SETLE:  Swap = true; // Fallthrough
2686    case ISD::SETOGE:
2687    case ISD::SETGE: Opc = ARMISD::VCGE; break;
2688    case ISD::SETUGE: Swap = true; // Fallthrough
2689    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2690    case ISD::SETUGT: Swap = true; // Fallthrough
2691    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2692    case ISD::SETUEQ: Invert = true; // Fallthrough
2693    case ISD::SETONE:
2694      // Expand this to (OLT | OGT).
2695      TmpOp0 = Op0;
2696      TmpOp1 = Op1;
2697      Opc = ISD::OR;
2698      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2699      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2700      break;
2701    case ISD::SETUO: Invert = true; // Fallthrough
2702    case ISD::SETO:
2703      // Expand this to (OLT | OGE).
2704      TmpOp0 = Op0;
2705      TmpOp1 = Op1;
2706      Opc = ISD::OR;
2707      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2708      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2709      break;
2710    }
2711  } else {
2712    // Integer comparisons.
2713    switch (SetCCOpcode) {
2714    default: llvm_unreachable("Illegal integer comparison"); break;
2715    case ISD::SETNE:  Invert = true;
2716    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2717    case ISD::SETLT:  Swap = true;
2718    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2719    case ISD::SETLE:  Swap = true;
2720    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
2721    case ISD::SETULT: Swap = true;
2722    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2723    case ISD::SETULE: Swap = true;
2724    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2725    }
2726
2727    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2728    if (Opc == ARMISD::VCEQ) {
2729
2730      SDValue AndOp;
2731      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2732        AndOp = Op0;
2733      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2734        AndOp = Op1;
2735
2736      // Ignore bitconvert.
2737      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
2738        AndOp = AndOp.getOperand(0);
2739
2740      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
2741        Opc = ARMISD::VTST;
2742        Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
2743        Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
2744        Invert = !Invert;
2745      }
2746    }
2747  }
2748
2749  if (Swap)
2750    std::swap(Op0, Op1);
2751
2752  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
2753
2754  if (Invert)
2755    Result = DAG.getNOT(dl, Result, VT);
2756
2757  return Result;
2758}
2759
2760/// isVMOVSplat - Check if the specified splat value corresponds to an immediate
2761/// VMOV instruction, and if so, return the constant being splatted.
2762static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
2763                           unsigned SplatBitSize, SelectionDAG &DAG) {
2764  switch (SplatBitSize) {
2765  case 8:
2766    // Any 1-byte value is OK.
2767    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
2768    return DAG.getTargetConstant(SplatBits, MVT::i8);
2769
2770  case 16:
2771    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2772    if ((SplatBits & ~0xff) == 0 ||
2773        (SplatBits & ~0xff00) == 0)
2774      return DAG.getTargetConstant(SplatBits, MVT::i16);
2775    break;
2776
2777  case 32:
2778    // NEON's 32-bit VMOV supports splat values where:
2779    // * only one byte is nonzero, or
2780    // * the least significant byte is 0xff and the second byte is nonzero, or
2781    // * the least significant 2 bytes are 0xff and the third is nonzero.
2782    if ((SplatBits & ~0xff) == 0 ||
2783        (SplatBits & ~0xff00) == 0 ||
2784        (SplatBits & ~0xff0000) == 0 ||
2785        (SplatBits & ~0xff000000) == 0)
2786      return DAG.getTargetConstant(SplatBits, MVT::i32);
2787
2788    if ((SplatBits & ~0xffff) == 0 &&
2789        ((SplatBits | SplatUndef) & 0xff) == 0xff)
2790      return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
2791
2792    if ((SplatBits & ~0xffffff) == 0 &&
2793        ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
2794      return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
2795
2796    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2797    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2798    // VMOV.I32.  A (very) minor optimization would be to replicate the value
2799    // and fall through here to test for a valid 64-bit splat.  But, then the
2800    // caller would also need to check and handle the change in size.
2801    break;
2802
2803  case 64: {
2804    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2805    uint64_t BitMask = 0xff;
2806    uint64_t Val = 0;
2807    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
2808      if (((SplatBits | SplatUndef) & BitMask) == BitMask)
2809        Val |= BitMask;
2810      else if ((SplatBits & BitMask) != 0)
2811        return SDValue();
2812      BitMask <<= 8;
2813    }
2814    return DAG.getTargetConstant(Val, MVT::i64);
2815  }
2816
2817  default:
2818    llvm_unreachable("unexpected size for isVMOVSplat");
2819    break;
2820  }
2821
2822  return SDValue();
2823}
2824
2825/// getVMOVImm - If this is a build_vector of constants which can be
2826/// formed by using a VMOV instruction of the specified element size,
2827/// return the constant being splatted.  The ByteSize field indicates the
2828/// number of bytes of each element [1248].
2829SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2830  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
2831  APInt SplatBits, SplatUndef;
2832  unsigned SplatBitSize;
2833  bool HasAnyUndefs;
2834  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
2835                                      HasAnyUndefs, ByteSize * 8))
2836    return SDValue();
2837
2838  if (SplatBitSize > ByteSize * 8)
2839    return SDValue();
2840
2841  return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
2842                     SplatBitSize, DAG);
2843}
2844
2845static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
2846                       bool &ReverseVEXT, unsigned &Imm) {
2847  unsigned NumElts = VT.getVectorNumElements();
2848  ReverseVEXT = false;
2849  Imm = M[0];
2850
2851  // If this is a VEXT shuffle, the immediate value is the index of the first
2852  // element.  The other shuffle indices must be the successive elements after
2853  // the first one.
2854  unsigned ExpectedElt = Imm;
2855  for (unsigned i = 1; i < NumElts; ++i) {
2856    // Increment the expected index.  If it wraps around, it may still be
2857    // a VEXT but the source vectors must be swapped.
2858    ExpectedElt += 1;
2859    if (ExpectedElt == NumElts * 2) {
2860      ExpectedElt = 0;
2861      ReverseVEXT = true;
2862    }
2863
2864    if (ExpectedElt != static_cast<unsigned>(M[i]))
2865      return false;
2866  }
2867
2868  // Adjust the index value if the source operands will be swapped.
2869  if (ReverseVEXT)
2870    Imm -= NumElts;
2871
2872  return true;
2873}
2874
2875/// isVREVMask - Check if a vector shuffle corresponds to a VREV
2876/// instruction with the specified blocksize.  (The order of the elements
2877/// within each block of the vector is reversed.)
2878static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
2879                       unsigned BlockSize) {
2880  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
2881         "Only possible block sizes for VREV are: 16, 32, 64");
2882
2883  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2884  if (EltSz == 64)
2885    return false;
2886
2887  unsigned NumElts = VT.getVectorNumElements();
2888  unsigned BlockElts = M[0] + 1;
2889
2890  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
2891    return false;
2892
2893  for (unsigned i = 0; i < NumElts; ++i) {
2894    if ((unsigned) M[i] !=
2895        (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
2896      return false;
2897  }
2898
2899  return true;
2900}
2901
2902static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
2903                       unsigned &WhichResult) {
2904  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2905  if (EltSz == 64)
2906    return false;
2907
2908  unsigned NumElts = VT.getVectorNumElements();
2909  WhichResult = (M[0] == 0 ? 0 : 1);
2910  for (unsigned i = 0; i < NumElts; i += 2) {
2911    if ((unsigned) M[i] != i + WhichResult ||
2912        (unsigned) M[i+1] != i + NumElts + WhichResult)
2913      return false;
2914  }
2915  return true;
2916}
2917
2918/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
2919/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2920/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
2921static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2922                                unsigned &WhichResult) {
2923  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2924  if (EltSz == 64)
2925    return false;
2926
2927  unsigned NumElts = VT.getVectorNumElements();
2928  WhichResult = (M[0] == 0 ? 0 : 1);
2929  for (unsigned i = 0; i < NumElts; i += 2) {
2930    if ((unsigned) M[i] != i + WhichResult ||
2931        (unsigned) M[i+1] != i + WhichResult)
2932      return false;
2933  }
2934  return true;
2935}
2936
2937static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
2938                       unsigned &WhichResult) {
2939  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2940  if (EltSz == 64)
2941    return false;
2942
2943  unsigned NumElts = VT.getVectorNumElements();
2944  WhichResult = (M[0] == 0 ? 0 : 1);
2945  for (unsigned i = 0; i != NumElts; ++i) {
2946    if ((unsigned) M[i] != 2 * i + WhichResult)
2947      return false;
2948  }
2949
2950  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2951  if (VT.is64BitVector() && EltSz == 32)
2952    return false;
2953
2954  return true;
2955}
2956
2957/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
2958/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2959/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
2960static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2961                                unsigned &WhichResult) {
2962  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2963  if (EltSz == 64)
2964    return false;
2965
2966  unsigned Half = VT.getVectorNumElements() / 2;
2967  WhichResult = (M[0] == 0 ? 0 : 1);
2968  for (unsigned j = 0; j != 2; ++j) {
2969    unsigned Idx = WhichResult;
2970    for (unsigned i = 0; i != Half; ++i) {
2971      if ((unsigned) M[i + j * Half] != Idx)
2972        return false;
2973      Idx += 2;
2974    }
2975  }
2976
2977  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2978  if (VT.is64BitVector() && EltSz == 32)
2979    return false;
2980
2981  return true;
2982}
2983
2984static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
2985                       unsigned &WhichResult) {
2986  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2987  if (EltSz == 64)
2988    return false;
2989
2990  unsigned NumElts = VT.getVectorNumElements();
2991  WhichResult = (M[0] == 0 ? 0 : 1);
2992  unsigned Idx = WhichResult * NumElts / 2;
2993  for (unsigned i = 0; i != NumElts; i += 2) {
2994    if ((unsigned) M[i] != Idx ||
2995        (unsigned) M[i+1] != Idx + NumElts)
2996      return false;
2997    Idx += 1;
2998  }
2999
3000  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3001  if (VT.is64BitVector() && EltSz == 32)
3002    return false;
3003
3004  return true;
3005}
3006
3007/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3008/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3009/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3010static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3011                                unsigned &WhichResult) {
3012  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3013  if (EltSz == 64)
3014    return false;
3015
3016  unsigned NumElts = VT.getVectorNumElements();
3017  WhichResult = (M[0] == 0 ? 0 : 1);
3018  unsigned Idx = WhichResult * NumElts / 2;
3019  for (unsigned i = 0; i != NumElts; i += 2) {
3020    if ((unsigned) M[i] != Idx ||
3021        (unsigned) M[i+1] != Idx)
3022      return false;
3023    Idx += 1;
3024  }
3025
3026  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3027  if (VT.is64BitVector() && EltSz == 32)
3028    return false;
3029
3030  return true;
3031}
3032
3033
3034static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
3035  // Canonicalize all-zeros and all-ones vectors.
3036  ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
3037  if (ConstVal->isNullValue())
3038    return getZeroVector(VT, DAG, dl);
3039  if (ConstVal->isAllOnesValue())
3040    return getOnesVector(VT, DAG, dl);
3041
3042  EVT CanonicalVT;
3043  if (VT.is64BitVector()) {
3044    switch (Val.getValueType().getSizeInBits()) {
3045    case 8:  CanonicalVT = MVT::v8i8; break;
3046    case 16: CanonicalVT = MVT::v4i16; break;
3047    case 32: CanonicalVT = MVT::v2i32; break;
3048    case 64: CanonicalVT = MVT::v1i64; break;
3049    default: llvm_unreachable("unexpected splat element type"); break;
3050    }
3051  } else {
3052    assert(VT.is128BitVector() && "unknown splat vector size");
3053    switch (Val.getValueType().getSizeInBits()) {
3054    case 8:  CanonicalVT = MVT::v16i8; break;
3055    case 16: CanonicalVT = MVT::v8i16; break;
3056    case 32: CanonicalVT = MVT::v4i32; break;
3057    case 64: CanonicalVT = MVT::v2i64; break;
3058    default: llvm_unreachable("unexpected splat element type"); break;
3059    }
3060  }
3061
3062  // Build a canonical splat for this value.
3063  SmallVector<SDValue, 8> Ops;
3064  Ops.assign(CanonicalVT.getVectorNumElements(), Val);
3065  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
3066                            Ops.size());
3067  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
3068}
3069
3070// If this is a case we can't handle, return null and let the default
3071// expansion code take care of it.
3072static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
3073  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3074  DebugLoc dl = Op.getDebugLoc();
3075  EVT VT = Op.getValueType();
3076
3077  APInt SplatBits, SplatUndef;
3078  unsigned SplatBitSize;
3079  bool HasAnyUndefs;
3080  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3081    if (SplatBitSize <= 64) {
3082      SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
3083                                SplatUndef.getZExtValue(), SplatBitSize, DAG);
3084      if (Val.getNode())
3085        return BuildSplat(Val, VT, DAG, dl);
3086    }
3087  }
3088
3089  // Scan through the operands to see if only one value is used.
3090  unsigned NumElts = VT.getVectorNumElements();
3091  bool isOnlyLowElement = true;
3092  bool usesOnlyOneValue = true;
3093  bool isConstant = true;
3094  SDValue Value;
3095  for (unsigned i = 0; i < NumElts; ++i) {
3096    SDValue V = Op.getOperand(i);
3097    if (V.getOpcode() == ISD::UNDEF)
3098      continue;
3099    if (i > 0)
3100      isOnlyLowElement = false;
3101    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3102      isConstant = false;
3103
3104    if (!Value.getNode())
3105      Value = V;
3106    else if (V != Value)
3107      usesOnlyOneValue = false;
3108  }
3109
3110  if (!Value.getNode())
3111    return DAG.getUNDEF(VT);
3112
3113  if (isOnlyLowElement)
3114    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3115
3116  // If all elements are constants, fall back to the default expansion, which
3117  // will generate a load from the constant pool.
3118  if (isConstant)
3119    return SDValue();
3120
3121  // Use VDUP for non-constant splats.
3122  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3123  if (usesOnlyOneValue && EltSize <= 32)
3124    return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3125
3126  // Vectors with 32- or 64-bit elements can be built by directly assigning
3127  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
3128  // will be legalized.
3129  if (EltSize >= 32) {
3130    // Do the expansion with floating-point types, since that is what the VFP
3131    // registers are defined to use, and since i64 is not legal.
3132    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3133    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3134    SmallVector<SDValue, 8> Ops;
3135    for (unsigned i = 0; i < NumElts; ++i)
3136      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
3137    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3138    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3139  }
3140
3141  return SDValue();
3142}
3143
3144/// isShuffleMaskLegal - Targets can use this to indicate that they only
3145/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3146/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3147/// are assumed to be legal.
3148bool
3149ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
3150                                      EVT VT) const {
3151  if (VT.getVectorNumElements() == 4 &&
3152      (VT.is128BitVector() || VT.is64BitVector())) {
3153    unsigned PFIndexes[4];
3154    for (unsigned i = 0; i != 4; ++i) {
3155      if (M[i] < 0)
3156        PFIndexes[i] = 8;
3157      else
3158        PFIndexes[i] = M[i];
3159    }
3160
3161    // Compute the index in the perfect shuffle table.
3162    unsigned PFTableIndex =
3163      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3164    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3165    unsigned Cost = (PFEntry >> 30);
3166
3167    if (Cost <= 4)
3168      return true;
3169  }
3170
3171  bool ReverseVEXT;
3172  unsigned Imm, WhichResult;
3173
3174  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3175          isVREVMask(M, VT, 64) ||
3176          isVREVMask(M, VT, 32) ||
3177          isVREVMask(M, VT, 16) ||
3178          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
3179          isVTRNMask(M, VT, WhichResult) ||
3180          isVUZPMask(M, VT, WhichResult) ||
3181          isVZIPMask(M, VT, WhichResult) ||
3182          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
3183          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
3184          isVZIP_v_undef_Mask(M, VT, WhichResult));
3185}
3186
3187/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3188/// the specified operations to build the shuffle.
3189static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3190                                      SDValue RHS, SelectionDAG &DAG,
3191                                      DebugLoc dl) {
3192  unsigned OpNum = (PFEntry >> 26) & 0x0F;
3193  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3194  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3195
3196  enum {
3197    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3198    OP_VREV,
3199    OP_VDUP0,
3200    OP_VDUP1,
3201    OP_VDUP2,
3202    OP_VDUP3,
3203    OP_VEXT1,
3204    OP_VEXT2,
3205    OP_VEXT3,
3206    OP_VUZPL, // VUZP, left result
3207    OP_VUZPR, // VUZP, right result
3208    OP_VZIPL, // VZIP, left result
3209    OP_VZIPR, // VZIP, right result
3210    OP_VTRNL, // VTRN, left result
3211    OP_VTRNR  // VTRN, right result
3212  };
3213
3214  if (OpNum == OP_COPY) {
3215    if (LHSID == (1*9+2)*9+3) return LHS;
3216    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3217    return RHS;
3218  }
3219
3220  SDValue OpLHS, OpRHS;
3221  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3222  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3223  EVT VT = OpLHS.getValueType();
3224
3225  switch (OpNum) {
3226  default: llvm_unreachable("Unknown shuffle opcode!");
3227  case OP_VREV:
3228    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
3229  case OP_VDUP0:
3230  case OP_VDUP1:
3231  case OP_VDUP2:
3232  case OP_VDUP3:
3233    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
3234                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3235  case OP_VEXT1:
3236  case OP_VEXT2:
3237  case OP_VEXT3:
3238    return DAG.getNode(ARMISD::VEXT, dl, VT,
3239                       OpLHS, OpRHS,
3240                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3241  case OP_VUZPL:
3242  case OP_VUZPR:
3243    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3244                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3245  case OP_VZIPL:
3246  case OP_VZIPR:
3247    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3248                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3249  case OP_VTRNL:
3250  case OP_VTRNR:
3251    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3252                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3253  }
3254}
3255
3256static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3257  SDValue V1 = Op.getOperand(0);
3258  SDValue V2 = Op.getOperand(1);
3259  DebugLoc dl = Op.getDebugLoc();
3260  EVT VT = Op.getValueType();
3261  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3262  SmallVector<int, 8> ShuffleMask;
3263
3264  // Convert shuffles that are directly supported on NEON to target-specific
3265  // DAG nodes, instead of keeping them as shuffles and matching them again
3266  // during code selection.  This is more efficient and avoids the possibility
3267  // of inconsistencies between legalization and selection.
3268  // FIXME: floating-point vectors should be canonicalized to integer vectors
3269  // of the same time so that they get CSEd properly.
3270  SVN->getMask(ShuffleMask);
3271
3272  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3273    int Lane = SVN->getSplatIndex();
3274    // If this is undef splat, generate it via "just" vdup, if possible.
3275    if (Lane == -1) Lane = 0;
3276
3277    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3278      return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3279    }
3280    return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3281                       DAG.getConstant(Lane, MVT::i32));
3282  }
3283
3284  bool ReverseVEXT;
3285  unsigned Imm;
3286  if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3287    if (ReverseVEXT)
3288      std::swap(V1, V2);
3289    return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3290                       DAG.getConstant(Imm, MVT::i32));
3291  }
3292
3293  if (isVREVMask(ShuffleMask, VT, 64))
3294    return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3295  if (isVREVMask(ShuffleMask, VT, 32))
3296    return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3297  if (isVREVMask(ShuffleMask, VT, 16))
3298    return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3299
3300  // Check for Neon shuffles that modify both input vectors in place.
3301  // If both results are used, i.e., if there are two shuffles with the same
3302  // source operands and with masks corresponding to both results of one of
3303  // these operations, DAG memoization will ensure that a single node is
3304  // used for both shuffles.
3305  unsigned WhichResult;
3306  if (isVTRNMask(ShuffleMask, VT, WhichResult))
3307    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3308                       V1, V2).getValue(WhichResult);
3309  if (isVUZPMask(ShuffleMask, VT, WhichResult))
3310    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3311                       V1, V2).getValue(WhichResult);
3312  if (isVZIPMask(ShuffleMask, VT, WhichResult))
3313    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3314                       V1, V2).getValue(WhichResult);
3315
3316  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3317    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3318                       V1, V1).getValue(WhichResult);
3319  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3320    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3321                       V1, V1).getValue(WhichResult);
3322  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3323    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3324                       V1, V1).getValue(WhichResult);
3325
3326  // If the shuffle is not directly supported and it has 4 elements, use
3327  // the PerfectShuffle-generated table to synthesize it from other shuffles.
3328  unsigned NumElts = VT.getVectorNumElements();
3329  if (NumElts == 4) {
3330    unsigned PFIndexes[4];
3331    for (unsigned i = 0; i != 4; ++i) {
3332      if (ShuffleMask[i] < 0)
3333        PFIndexes[i] = 8;
3334      else
3335        PFIndexes[i] = ShuffleMask[i];
3336    }
3337
3338    // Compute the index in the perfect shuffle table.
3339    unsigned PFTableIndex =
3340      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3341    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3342    unsigned Cost = (PFEntry >> 30);
3343
3344    if (Cost <= 4)
3345      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3346  }
3347
3348  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
3349  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3350  if (EltSize >= 32) {
3351    // Do the expansion with floating-point types, since that is what the VFP
3352    // registers are defined to use, and since i64 is not legal.
3353    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3354    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3355    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
3356    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
3357    SmallVector<SDValue, 8> Ops;
3358    for (unsigned i = 0; i < NumElts; ++i) {
3359      if (ShuffleMask[i] < 0)
3360        Ops.push_back(DAG.getUNDEF(EltVT));
3361      else
3362        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3363                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
3364                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3365                                                  MVT::i32)));
3366    }
3367    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3368    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3369  }
3370
3371  return SDValue();
3372}
3373
3374static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3375  EVT VT = Op.getValueType();
3376  DebugLoc dl = Op.getDebugLoc();
3377  SDValue Vec = Op.getOperand(0);
3378  SDValue Lane = Op.getOperand(1);
3379  assert(VT == MVT::i32 &&
3380         Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3381         "unexpected type for custom-lowering vector extract");
3382  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3383}
3384
3385static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3386  // The only time a CONCAT_VECTORS operation can have legal types is when
3387  // two 64-bit vectors are concatenated to a 128-bit vector.
3388  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3389         "unexpected CONCAT_VECTORS");
3390  DebugLoc dl = Op.getDebugLoc();
3391  SDValue Val = DAG.getUNDEF(MVT::v2f64);
3392  SDValue Op0 = Op.getOperand(0);
3393  SDValue Op1 = Op.getOperand(1);
3394  if (Op0.getOpcode() != ISD::UNDEF)
3395    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3396                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3397                      DAG.getIntPtrConstant(0));
3398  if (Op1.getOpcode() != ISD::UNDEF)
3399    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3400                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3401                      DAG.getIntPtrConstant(1));
3402  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3403}
3404
3405SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3406  switch (Op.getOpcode()) {
3407  default: llvm_unreachable("Don't know how to custom lower this!");
3408  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
3409  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
3410  case ISD::GlobalAddress:
3411    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3412      LowerGlobalAddressELF(Op, DAG);
3413  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3414  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
3415  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
3416  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
3417  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3418  case ISD::VASTART:       return LowerVASTART(Op, DAG);
3419  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
3420  case ISD::SINT_TO_FP:
3421  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
3422  case ISD::FP_TO_SINT:
3423  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
3424  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
3425  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
3426  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
3427  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3428  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
3429  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
3430  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3431                                                               Subtarget);
3432  case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
3433  case ISD::SHL:
3434  case ISD::SRL:
3435  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
3436  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
3437  case ISD::SRL_PARTS:
3438  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
3439  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3440  case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
3441  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
3442  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3443  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3444  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3445  }
3446  return SDValue();
3447}
3448
3449/// ReplaceNodeResults - Replace the results of node with an illegal result
3450/// type with new values built out of custom code.
3451void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3452                                           SmallVectorImpl<SDValue>&Results,
3453                                           SelectionDAG &DAG) const {
3454  SDValue Res;
3455  switch (N->getOpcode()) {
3456  default:
3457    llvm_unreachable("Don't know how to custom expand this!");
3458    break;
3459  case ISD::BIT_CONVERT:
3460    Res = ExpandBIT_CONVERT(N, DAG);
3461    break;
3462  case ISD::SRL:
3463  case ISD::SRA:
3464    Res = LowerShift(N, DAG, Subtarget);
3465    break;
3466  }
3467  if (Res.getNode())
3468    Results.push_back(Res);
3469}
3470
3471//===----------------------------------------------------------------------===//
3472//                           ARM Scheduler Hooks
3473//===----------------------------------------------------------------------===//
3474
3475MachineBasicBlock *
3476ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3477                                     MachineBasicBlock *BB,
3478                                     unsigned Size) const {
3479  unsigned dest    = MI->getOperand(0).getReg();
3480  unsigned ptr     = MI->getOperand(1).getReg();
3481  unsigned oldval  = MI->getOperand(2).getReg();
3482  unsigned newval  = MI->getOperand(3).getReg();
3483  unsigned scratch = BB->getParent()->getRegInfo()
3484    .createVirtualRegister(ARM::GPRRegisterClass);
3485  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3486  DebugLoc dl = MI->getDebugLoc();
3487  bool isThumb2 = Subtarget->isThumb2();
3488
3489  unsigned ldrOpc, strOpc;
3490  switch (Size) {
3491  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3492  case 1:
3493    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3494    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3495    break;
3496  case 2:
3497    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3498    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3499    break;
3500  case 4:
3501    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3502    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3503    break;
3504  }
3505
3506  MachineFunction *MF = BB->getParent();
3507  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3508  MachineFunction::iterator It = BB;
3509  ++It; // insert the new blocks after the current block
3510
3511  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3512  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3513  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3514  MF->insert(It, loop1MBB);
3515  MF->insert(It, loop2MBB);
3516  MF->insert(It, exitMBB);
3517  exitMBB->transferSuccessors(BB);
3518
3519  //  thisMBB:
3520  //   ...
3521  //   fallthrough --> loop1MBB
3522  BB->addSuccessor(loop1MBB);
3523
3524  // loop1MBB:
3525  //   ldrex dest, [ptr]
3526  //   cmp dest, oldval
3527  //   bne exitMBB
3528  BB = loop1MBB;
3529  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3530  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3531                 .addReg(dest).addReg(oldval));
3532  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3533    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3534  BB->addSuccessor(loop2MBB);
3535  BB->addSuccessor(exitMBB);
3536
3537  // loop2MBB:
3538  //   strex scratch, newval, [ptr]
3539  //   cmp scratch, #0
3540  //   bne loop1MBB
3541  BB = loop2MBB;
3542  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3543                 .addReg(ptr));
3544  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3545                 .addReg(scratch).addImm(0));
3546  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3547    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3548  BB->addSuccessor(loop1MBB);
3549  BB->addSuccessor(exitMBB);
3550
3551  //  exitMBB:
3552  //   ...
3553  BB = exitMBB;
3554
3555  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
3556
3557  return BB;
3558}
3559
3560MachineBasicBlock *
3561ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3562                                    unsigned Size, unsigned BinOpcode) const {
3563  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3564  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3565
3566  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3567  MachineFunction *MF = BB->getParent();
3568  MachineFunction::iterator It = BB;
3569  ++It;
3570
3571  unsigned dest = MI->getOperand(0).getReg();
3572  unsigned ptr = MI->getOperand(1).getReg();
3573  unsigned incr = MI->getOperand(2).getReg();
3574  DebugLoc dl = MI->getDebugLoc();
3575
3576  bool isThumb2 = Subtarget->isThumb2();
3577  unsigned ldrOpc, strOpc;
3578  switch (Size) {
3579  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3580  case 1:
3581    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3582    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
3583    break;
3584  case 2:
3585    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3586    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3587    break;
3588  case 4:
3589    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3590    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3591    break;
3592  }
3593
3594  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3595  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3596  MF->insert(It, loopMBB);
3597  MF->insert(It, exitMBB);
3598  exitMBB->transferSuccessors(BB);
3599
3600  MachineRegisterInfo &RegInfo = MF->getRegInfo();
3601  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3602  unsigned scratch2 = (!BinOpcode) ? incr :
3603    RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3604
3605  //  thisMBB:
3606  //   ...
3607  //   fallthrough --> loopMBB
3608  BB->addSuccessor(loopMBB);
3609
3610  //  loopMBB:
3611  //   ldrex dest, ptr
3612  //   <binop> scratch2, dest, incr
3613  //   strex scratch, scratch2, ptr
3614  //   cmp scratch, #0
3615  //   bne- loopMBB
3616  //   fallthrough --> exitMBB
3617  BB = loopMBB;
3618  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3619  if (BinOpcode) {
3620    // operand order needs to go the other way for NAND
3621    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
3622      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3623                     addReg(incr).addReg(dest)).addReg(0);
3624    else
3625      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3626                     addReg(dest).addReg(incr)).addReg(0);
3627  }
3628
3629  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
3630                 .addReg(ptr));
3631  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3632                 .addReg(scratch).addImm(0));
3633  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3634    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3635
3636  BB->addSuccessor(loopMBB);
3637  BB->addSuccessor(exitMBB);
3638
3639  //  exitMBB:
3640  //   ...
3641  BB = exitMBB;
3642
3643  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
3644
3645  return BB;
3646}
3647
3648MachineBasicBlock *
3649ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
3650                                               MachineBasicBlock *BB) const {
3651  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3652  DebugLoc dl = MI->getDebugLoc();
3653  bool isThumb2 = Subtarget->isThumb2();
3654  switch (MI->getOpcode()) {
3655  default:
3656    MI->dump();
3657    llvm_unreachable("Unexpected instr type to insert");
3658
3659  case ARM::ATOMIC_LOAD_ADD_I8:
3660     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3661  case ARM::ATOMIC_LOAD_ADD_I16:
3662     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3663  case ARM::ATOMIC_LOAD_ADD_I32:
3664     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3665
3666  case ARM::ATOMIC_LOAD_AND_I8:
3667     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3668  case ARM::ATOMIC_LOAD_AND_I16:
3669     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3670  case ARM::ATOMIC_LOAD_AND_I32:
3671     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3672
3673  case ARM::ATOMIC_LOAD_OR_I8:
3674     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3675  case ARM::ATOMIC_LOAD_OR_I16:
3676     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3677  case ARM::ATOMIC_LOAD_OR_I32:
3678     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3679
3680  case ARM::ATOMIC_LOAD_XOR_I8:
3681     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3682  case ARM::ATOMIC_LOAD_XOR_I16:
3683     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3684  case ARM::ATOMIC_LOAD_XOR_I32:
3685     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3686
3687  case ARM::ATOMIC_LOAD_NAND_I8:
3688     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3689  case ARM::ATOMIC_LOAD_NAND_I16:
3690     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3691  case ARM::ATOMIC_LOAD_NAND_I32:
3692     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3693
3694  case ARM::ATOMIC_LOAD_SUB_I8:
3695     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3696  case ARM::ATOMIC_LOAD_SUB_I16:
3697     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3698  case ARM::ATOMIC_LOAD_SUB_I32:
3699     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3700
3701  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
3702  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
3703  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
3704
3705  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
3706  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
3707  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
3708
3709  case ARM::tMOVCCr_pseudo: {
3710    // To "insert" a SELECT_CC instruction, we actually have to insert the
3711    // diamond control-flow pattern.  The incoming instruction knows the
3712    // destination vreg to set, the condition code register to branch on, the
3713    // true/false values to select between, and a branch opcode to use.
3714    const BasicBlock *LLVM_BB = BB->getBasicBlock();
3715    MachineFunction::iterator It = BB;
3716    ++It;
3717
3718    //  thisMBB:
3719    //  ...
3720    //   TrueVal = ...
3721    //   cmpTY ccX, r1, r2
3722    //   bCC copy1MBB
3723    //   fallthrough --> copy0MBB
3724    MachineBasicBlock *thisMBB  = BB;
3725    MachineFunction *F = BB->getParent();
3726    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
3727    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
3728    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
3729      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
3730    F->insert(It, copy0MBB);
3731    F->insert(It, sinkMBB);
3732    // Update machine-CFG edges by first adding all successors of the current
3733    // block to the new block which will contain the Phi node for the select.
3734    for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
3735           E = BB->succ_end(); I != E; ++I)
3736      sinkMBB->addSuccessor(*I);
3737    // Next, remove all successors of the current block, and add the true
3738    // and fallthrough blocks as its successors.
3739    while (!BB->succ_empty())
3740      BB->removeSuccessor(BB->succ_begin());
3741    BB->addSuccessor(copy0MBB);
3742    BB->addSuccessor(sinkMBB);
3743
3744    //  copy0MBB:
3745    //   %FalseValue = ...
3746    //   # fallthrough to sinkMBB
3747    BB = copy0MBB;
3748
3749    // Update machine-CFG edges
3750    BB->addSuccessor(sinkMBB);
3751
3752    //  sinkMBB:
3753    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
3754    //  ...
3755    BB = sinkMBB;
3756    BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
3757      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
3758      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
3759
3760    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
3761    return BB;
3762  }
3763
3764  case ARM::tANDsp:
3765  case ARM::tADDspr_:
3766  case ARM::tSUBspi_:
3767  case ARM::t2SUBrSPi_:
3768  case ARM::t2SUBrSPi12_:
3769  case ARM::t2SUBrSPs_: {
3770    MachineFunction *MF = BB->getParent();
3771    unsigned DstReg = MI->getOperand(0).getReg();
3772    unsigned SrcReg = MI->getOperand(1).getReg();
3773    bool DstIsDead = MI->getOperand(0).isDead();
3774    bool SrcIsKill = MI->getOperand(1).isKill();
3775
3776    if (SrcReg != ARM::SP) {
3777      // Copy the source to SP from virtual register.
3778      const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
3779      unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3780        ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
3781      BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
3782        .addReg(SrcReg, getKillRegState(SrcIsKill));
3783    }
3784
3785    unsigned OpOpc = 0;
3786    bool NeedPred = false, NeedCC = false, NeedOp3 = false;
3787    switch (MI->getOpcode()) {
3788    default:
3789      llvm_unreachable("Unexpected pseudo instruction!");
3790    case ARM::tANDsp:
3791      OpOpc = ARM::tAND;
3792      NeedPred = true;
3793      break;
3794    case ARM::tADDspr_:
3795      OpOpc = ARM::tADDspr;
3796      break;
3797    case ARM::tSUBspi_:
3798      OpOpc = ARM::tSUBspi;
3799      break;
3800    case ARM::t2SUBrSPi_:
3801      OpOpc = ARM::t2SUBrSPi;
3802      NeedPred = true; NeedCC = true;
3803      break;
3804    case ARM::t2SUBrSPi12_:
3805      OpOpc = ARM::t2SUBrSPi12;
3806      NeedPred = true;
3807      break;
3808    case ARM::t2SUBrSPs_:
3809      OpOpc = ARM::t2SUBrSPs;
3810      NeedPred = true; NeedCC = true; NeedOp3 = true;
3811      break;
3812    }
3813    MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
3814    if (OpOpc == ARM::tAND)
3815      AddDefaultT1CC(MIB);
3816    MIB.addReg(ARM::SP);
3817    MIB.addOperand(MI->getOperand(2));
3818    if (NeedOp3)
3819      MIB.addOperand(MI->getOperand(3));
3820    if (NeedPred)
3821      AddDefaultPred(MIB);
3822    if (NeedCC)
3823      AddDefaultCC(MIB);
3824
3825    // Copy the result from SP to virtual register.
3826    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
3827    unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3828      ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
3829    BuildMI(BB, dl, TII->get(CopyOpc))
3830      .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
3831      .addReg(ARM::SP);
3832    MF->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
3833    return BB;
3834  }
3835  }
3836}
3837
3838//===----------------------------------------------------------------------===//
3839//                           ARM Optimization Hooks
3840//===----------------------------------------------------------------------===//
3841
3842static
3843SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
3844                            TargetLowering::DAGCombinerInfo &DCI) {
3845  SelectionDAG &DAG = DCI.DAG;
3846  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3847  EVT VT = N->getValueType(0);
3848  unsigned Opc = N->getOpcode();
3849  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
3850  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
3851  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
3852  ISD::CondCode CC = ISD::SETCC_INVALID;
3853
3854  if (isSlctCC) {
3855    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
3856  } else {
3857    SDValue CCOp = Slct.getOperand(0);
3858    if (CCOp.getOpcode() == ISD::SETCC)
3859      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
3860  }
3861
3862  bool DoXform = false;
3863  bool InvCC = false;
3864  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
3865          "Bad input!");
3866
3867  if (LHS.getOpcode() == ISD::Constant &&
3868      cast<ConstantSDNode>(LHS)->isNullValue()) {
3869    DoXform = true;
3870  } else if (CC != ISD::SETCC_INVALID &&
3871             RHS.getOpcode() == ISD::Constant &&
3872             cast<ConstantSDNode>(RHS)->isNullValue()) {
3873    std::swap(LHS, RHS);
3874    SDValue Op0 = Slct.getOperand(0);
3875    EVT OpVT = isSlctCC ? Op0.getValueType() :
3876                          Op0.getOperand(0).getValueType();
3877    bool isInt = OpVT.isInteger();
3878    CC = ISD::getSetCCInverse(CC, isInt);
3879
3880    if (!TLI.isCondCodeLegal(CC, OpVT))
3881      return SDValue();         // Inverse operator isn't legal.
3882
3883    DoXform = true;
3884    InvCC = true;
3885  }
3886
3887  if (DoXform) {
3888    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
3889    if (isSlctCC)
3890      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
3891                             Slct.getOperand(0), Slct.getOperand(1), CC);
3892    SDValue CCOp = Slct.getOperand(0);
3893    if (InvCC)
3894      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
3895                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
3896    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
3897                       CCOp, OtherOp, Result);
3898  }
3899  return SDValue();
3900}
3901
3902/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3903static SDValue PerformADDCombine(SDNode *N,
3904                                 TargetLowering::DAGCombinerInfo &DCI) {
3905  // added by evan in r37685 with no testcase.
3906  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3907
3908  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
3909  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
3910    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
3911    if (Result.getNode()) return Result;
3912  }
3913  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
3914    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
3915    if (Result.getNode()) return Result;
3916  }
3917
3918  return SDValue();
3919}
3920
3921/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
3922static SDValue PerformSUBCombine(SDNode *N,
3923                                 TargetLowering::DAGCombinerInfo &DCI) {
3924  // added by evan in r37685 with no testcase.
3925  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3926
3927  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
3928  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
3929    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
3930    if (Result.getNode()) return Result;
3931  }
3932
3933  return SDValue();
3934}
3935
3936static SDValue PerformMULCombine(SDNode *N,
3937                                 TargetLowering::DAGCombinerInfo &DCI,
3938                                 const ARMSubtarget *Subtarget) {
3939  SelectionDAG &DAG = DCI.DAG;
3940
3941  if (Subtarget->isThumb1Only())
3942    return SDValue();
3943
3944  if (DAG.getMachineFunction().
3945      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
3946    return SDValue();
3947
3948  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
3949    return SDValue();
3950
3951  EVT VT = N->getValueType(0);
3952  if (VT != MVT::i32)
3953    return SDValue();
3954
3955  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3956  if (!C)
3957    return SDValue();
3958
3959  uint64_t MulAmt = C->getZExtValue();
3960  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
3961  ShiftAmt = ShiftAmt & (32 - 1);
3962  SDValue V = N->getOperand(0);
3963  DebugLoc DL = N->getDebugLoc();
3964
3965  SDValue Res;
3966  MulAmt >>= ShiftAmt;
3967  if (isPowerOf2_32(MulAmt - 1)) {
3968    // (mul x, 2^N + 1) => (add (shl x, N), x)
3969    Res = DAG.getNode(ISD::ADD, DL, VT,
3970                      V, DAG.getNode(ISD::SHL, DL, VT,
3971                                     V, DAG.getConstant(Log2_32(MulAmt-1),
3972                                                        MVT::i32)));
3973  } else if (isPowerOf2_32(MulAmt + 1)) {
3974    // (mul x, 2^N - 1) => (sub (shl x, N), x)
3975    Res = DAG.getNode(ISD::SUB, DL, VT,
3976                      DAG.getNode(ISD::SHL, DL, VT,
3977                                  V, DAG.getConstant(Log2_32(MulAmt+1),
3978                                                     MVT::i32)),
3979                                                     V);
3980  } else
3981    return SDValue();
3982
3983  if (ShiftAmt != 0)
3984    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
3985                      DAG.getConstant(ShiftAmt, MVT::i32));
3986
3987  // Do not add new nodes to DAG combiner worklist.
3988  DCI.CombineTo(N, Res, false);
3989  return SDValue();
3990}
3991
3992/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
3993/// ARMISD::VMOVRRD.
3994static SDValue PerformVMOVRRDCombine(SDNode *N,
3995                                   TargetLowering::DAGCombinerInfo &DCI) {
3996  // fmrrd(fmdrr x, y) -> x,y
3997  SDValue InDouble = N->getOperand(0);
3998  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
3999    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
4000  return SDValue();
4001}
4002
4003/// getVShiftImm - Check if this is a valid build_vector for the immediate
4004/// operand of a vector shift operation, where all the elements of the
4005/// build_vector must have the same constant integer value.
4006static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
4007  // Ignore bit_converts.
4008  while (Op.getOpcode() == ISD::BIT_CONVERT)
4009    Op = Op.getOperand(0);
4010  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4011  APInt SplatBits, SplatUndef;
4012  unsigned SplatBitSize;
4013  bool HasAnyUndefs;
4014  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
4015                                      HasAnyUndefs, ElementBits) ||
4016      SplatBitSize > ElementBits)
4017    return false;
4018  Cnt = SplatBits.getSExtValue();
4019  return true;
4020}
4021
4022/// isVShiftLImm - Check if this is a valid build_vector for the immediate
4023/// operand of a vector shift left operation.  That value must be in the range:
4024///   0 <= Value < ElementBits for a left shift; or
4025///   0 <= Value <= ElementBits for a long left shift.
4026static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
4027  assert(VT.isVector() && "vector shift count is not a vector type");
4028  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4029  if (! getVShiftImm(Op, ElementBits, Cnt))
4030    return false;
4031  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
4032}
4033
4034/// isVShiftRImm - Check if this is a valid build_vector for the immediate
4035/// operand of a vector shift right operation.  For a shift opcode, the value
4036/// is positive, but for an intrinsic the value count must be negative. The
4037/// absolute value must be in the range:
4038///   1 <= |Value| <= ElementBits for a right shift; or
4039///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
4040static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
4041                         int64_t &Cnt) {
4042  assert(VT.isVector() && "vector shift count is not a vector type");
4043  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4044  if (! getVShiftImm(Op, ElementBits, Cnt))
4045    return false;
4046  if (isIntrinsic)
4047    Cnt = -Cnt;
4048  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
4049}
4050
4051/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
4052static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
4053  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4054  switch (IntNo) {
4055  default:
4056    // Don't do anything for most intrinsics.
4057    break;
4058
4059  // Vector shifts: check for immediate versions and lower them.
4060  // Note: This is done during DAG combining instead of DAG legalizing because
4061  // the build_vectors for 64-bit vector element shift counts are generally
4062  // not legal, and it is hard to see their values after they get legalized to
4063  // loads from a constant pool.
4064  case Intrinsic::arm_neon_vshifts:
4065  case Intrinsic::arm_neon_vshiftu:
4066  case Intrinsic::arm_neon_vshiftls:
4067  case Intrinsic::arm_neon_vshiftlu:
4068  case Intrinsic::arm_neon_vshiftn:
4069  case Intrinsic::arm_neon_vrshifts:
4070  case Intrinsic::arm_neon_vrshiftu:
4071  case Intrinsic::arm_neon_vrshiftn:
4072  case Intrinsic::arm_neon_vqshifts:
4073  case Intrinsic::arm_neon_vqshiftu:
4074  case Intrinsic::arm_neon_vqshiftsu:
4075  case Intrinsic::arm_neon_vqshiftns:
4076  case Intrinsic::arm_neon_vqshiftnu:
4077  case Intrinsic::arm_neon_vqshiftnsu:
4078  case Intrinsic::arm_neon_vqrshiftns:
4079  case Intrinsic::arm_neon_vqrshiftnu:
4080  case Intrinsic::arm_neon_vqrshiftnsu: {
4081    EVT VT = N->getOperand(1).getValueType();
4082    int64_t Cnt;
4083    unsigned VShiftOpc = 0;
4084
4085    switch (IntNo) {
4086    case Intrinsic::arm_neon_vshifts:
4087    case Intrinsic::arm_neon_vshiftu:
4088      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
4089        VShiftOpc = ARMISD::VSHL;
4090        break;
4091      }
4092      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
4093        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
4094                     ARMISD::VSHRs : ARMISD::VSHRu);
4095        break;
4096      }
4097      return SDValue();
4098
4099    case Intrinsic::arm_neon_vshiftls:
4100    case Intrinsic::arm_neon_vshiftlu:
4101      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
4102        break;
4103      llvm_unreachable("invalid shift count for vshll intrinsic");
4104
4105    case Intrinsic::arm_neon_vrshifts:
4106    case Intrinsic::arm_neon_vrshiftu:
4107      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
4108        break;
4109      return SDValue();
4110
4111    case Intrinsic::arm_neon_vqshifts:
4112    case Intrinsic::arm_neon_vqshiftu:
4113      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4114        break;
4115      return SDValue();
4116
4117    case Intrinsic::arm_neon_vqshiftsu:
4118      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4119        break;
4120      llvm_unreachable("invalid shift count for vqshlu intrinsic");
4121
4122    case Intrinsic::arm_neon_vshiftn:
4123    case Intrinsic::arm_neon_vrshiftn:
4124    case Intrinsic::arm_neon_vqshiftns:
4125    case Intrinsic::arm_neon_vqshiftnu:
4126    case Intrinsic::arm_neon_vqshiftnsu:
4127    case Intrinsic::arm_neon_vqrshiftns:
4128    case Intrinsic::arm_neon_vqrshiftnu:
4129    case Intrinsic::arm_neon_vqrshiftnsu:
4130      // Narrowing shifts require an immediate right shift.
4131      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
4132        break;
4133      llvm_unreachable("invalid shift count for narrowing vector shift "
4134                       "intrinsic");
4135
4136    default:
4137      llvm_unreachable("unhandled vector shift");
4138    }
4139
4140    switch (IntNo) {
4141    case Intrinsic::arm_neon_vshifts:
4142    case Intrinsic::arm_neon_vshiftu:
4143      // Opcode already set above.
4144      break;
4145    case Intrinsic::arm_neon_vshiftls:
4146    case Intrinsic::arm_neon_vshiftlu:
4147      if (Cnt == VT.getVectorElementType().getSizeInBits())
4148        VShiftOpc = ARMISD::VSHLLi;
4149      else
4150        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
4151                     ARMISD::VSHLLs : ARMISD::VSHLLu);
4152      break;
4153    case Intrinsic::arm_neon_vshiftn:
4154      VShiftOpc = ARMISD::VSHRN; break;
4155    case Intrinsic::arm_neon_vrshifts:
4156      VShiftOpc = ARMISD::VRSHRs; break;
4157    case Intrinsic::arm_neon_vrshiftu:
4158      VShiftOpc = ARMISD::VRSHRu; break;
4159    case Intrinsic::arm_neon_vrshiftn:
4160      VShiftOpc = ARMISD::VRSHRN; break;
4161    case Intrinsic::arm_neon_vqshifts:
4162      VShiftOpc = ARMISD::VQSHLs; break;
4163    case Intrinsic::arm_neon_vqshiftu:
4164      VShiftOpc = ARMISD::VQSHLu; break;
4165    case Intrinsic::arm_neon_vqshiftsu:
4166      VShiftOpc = ARMISD::VQSHLsu; break;
4167    case Intrinsic::arm_neon_vqshiftns:
4168      VShiftOpc = ARMISD::VQSHRNs; break;
4169    case Intrinsic::arm_neon_vqshiftnu:
4170      VShiftOpc = ARMISD::VQSHRNu; break;
4171    case Intrinsic::arm_neon_vqshiftnsu:
4172      VShiftOpc = ARMISD::VQSHRNsu; break;
4173    case Intrinsic::arm_neon_vqrshiftns:
4174      VShiftOpc = ARMISD::VQRSHRNs; break;
4175    case Intrinsic::arm_neon_vqrshiftnu:
4176      VShiftOpc = ARMISD::VQRSHRNu; break;
4177    case Intrinsic::arm_neon_vqrshiftnsu:
4178      VShiftOpc = ARMISD::VQRSHRNsu; break;
4179    }
4180
4181    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4182                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
4183  }
4184
4185  case Intrinsic::arm_neon_vshiftins: {
4186    EVT VT = N->getOperand(1).getValueType();
4187    int64_t Cnt;
4188    unsigned VShiftOpc = 0;
4189
4190    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
4191      VShiftOpc = ARMISD::VSLI;
4192    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
4193      VShiftOpc = ARMISD::VSRI;
4194    else {
4195      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
4196    }
4197
4198    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4199                       N->getOperand(1), N->getOperand(2),
4200                       DAG.getConstant(Cnt, MVT::i32));
4201  }
4202
4203  case Intrinsic::arm_neon_vqrshifts:
4204  case Intrinsic::arm_neon_vqrshiftu:
4205    // No immediate versions of these to check for.
4206    break;
4207  }
4208
4209  return SDValue();
4210}
4211
4212/// PerformShiftCombine - Checks for immediate versions of vector shifts and
4213/// lowers them.  As with the vector shift intrinsics, this is done during DAG
4214/// combining instead of DAG legalizing because the build_vectors for 64-bit
4215/// vector element shift counts are generally not legal, and it is hard to see
4216/// their values after they get legalized to loads from a constant pool.
4217static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
4218                                   const ARMSubtarget *ST) {
4219  EVT VT = N->getValueType(0);
4220
4221  // Nothing to be done for scalar shifts.
4222  if (! VT.isVector())
4223    return SDValue();
4224
4225  assert(ST->hasNEON() && "unexpected vector shift");
4226  int64_t Cnt;
4227
4228  switch (N->getOpcode()) {
4229  default: llvm_unreachable("unexpected shift opcode");
4230
4231  case ISD::SHL:
4232    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
4233      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
4234                         DAG.getConstant(Cnt, MVT::i32));
4235    break;
4236
4237  case ISD::SRA:
4238  case ISD::SRL:
4239    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
4240      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
4241                            ARMISD::VSHRs : ARMISD::VSHRu);
4242      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
4243                         DAG.getConstant(Cnt, MVT::i32));
4244    }
4245  }
4246  return SDValue();
4247}
4248
4249/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
4250/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
4251static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
4252                                    const ARMSubtarget *ST) {
4253  SDValue N0 = N->getOperand(0);
4254
4255  // Check for sign- and zero-extensions of vector extract operations of 8-
4256  // and 16-bit vector elements.  NEON supports these directly.  They are
4257  // handled during DAG combining because type legalization will promote them
4258  // to 32-bit types and it is messy to recognize the operations after that.
4259  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4260    SDValue Vec = N0.getOperand(0);
4261    SDValue Lane = N0.getOperand(1);
4262    EVT VT = N->getValueType(0);
4263    EVT EltVT = N0.getValueType();
4264    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4265
4266    if (VT == MVT::i32 &&
4267        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
4268        TLI.isTypeLegal(Vec.getValueType())) {
4269
4270      unsigned Opc = 0;
4271      switch (N->getOpcode()) {
4272      default: llvm_unreachable("unexpected opcode");
4273      case ISD::SIGN_EXTEND:
4274        Opc = ARMISD::VGETLANEs;
4275        break;
4276      case ISD::ZERO_EXTEND:
4277      case ISD::ANY_EXTEND:
4278        Opc = ARMISD::VGETLANEu;
4279        break;
4280      }
4281      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
4282    }
4283  }
4284
4285  return SDValue();
4286}
4287
4288/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
4289/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
4290static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
4291                                       const ARMSubtarget *ST) {
4292  // If the target supports NEON, try to use vmax/vmin instructions for f32
4293  // selects like "x < y ? x : y".  Unless the FiniteOnlyFPMath option is set,
4294  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
4295  // a NaN; only do the transformation when it matches that behavior.
4296
4297  // For now only do this when using NEON for FP operations; if using VFP, it
4298  // is not obvious that the benefit outweighs the cost of switching to the
4299  // NEON pipeline.
4300  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
4301      N->getValueType(0) != MVT::f32)
4302    return SDValue();
4303
4304  SDValue CondLHS = N->getOperand(0);
4305  SDValue CondRHS = N->getOperand(1);
4306  SDValue LHS = N->getOperand(2);
4307  SDValue RHS = N->getOperand(3);
4308  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4309
4310  unsigned Opcode = 0;
4311  bool IsReversed;
4312  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
4313    IsReversed = false; // x CC y ? x : y
4314  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
4315    IsReversed = true ; // x CC y ? y : x
4316  } else {
4317    return SDValue();
4318  }
4319
4320  bool IsUnordered;
4321  switch (CC) {
4322  default: break;
4323  case ISD::SETOLT:
4324  case ISD::SETOLE:
4325  case ISD::SETLT:
4326  case ISD::SETLE:
4327  case ISD::SETULT:
4328  case ISD::SETULE:
4329    // If LHS is NaN, an ordered comparison will be false and the result will
4330    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
4331    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4332    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4333    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4334      break;
4335    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4336    // will return -0, so vmin can only be used for unsafe math or if one of
4337    // the operands is known to be nonzero.
4338    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4339        !UnsafeFPMath &&
4340        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4341      break;
4342    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4343    break;
4344
4345  case ISD::SETOGT:
4346  case ISD::SETOGE:
4347  case ISD::SETGT:
4348  case ISD::SETGE:
4349  case ISD::SETUGT:
4350  case ISD::SETUGE:
4351    // If LHS is NaN, an ordered comparison will be false and the result will
4352    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
4353    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4354    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4355    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4356      break;
4357    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4358    // will return +0, so vmax can only be used for unsafe math or if one of
4359    // the operands is known to be nonzero.
4360    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4361        !UnsafeFPMath &&
4362        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4363      break;
4364    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4365    break;
4366  }
4367
4368  if (!Opcode)
4369    return SDValue();
4370  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
4371}
4372
4373SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
4374                                             DAGCombinerInfo &DCI) const {
4375  switch (N->getOpcode()) {
4376  default: break;
4377  case ISD::ADD:        return PerformADDCombine(N, DCI);
4378  case ISD::SUB:        return PerformSUBCombine(N, DCI);
4379  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
4380  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
4381  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
4382  case ISD::SHL:
4383  case ISD::SRA:
4384  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
4385  case ISD::SIGN_EXTEND:
4386  case ISD::ZERO_EXTEND:
4387  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
4388  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
4389  }
4390  return SDValue();
4391}
4392
4393bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
4394  if (!Subtarget->hasV6Ops())
4395    // Pre-v6 does not support unaligned mem access.
4396    return false;
4397  else {
4398    // v6+ may or may not support unaligned mem access depending on the system
4399    // configuration.
4400    // FIXME: This is pretty conservative. Should we provide cmdline option to
4401    // control the behaviour?
4402    if (!Subtarget->isTargetDarwin())
4403      return false;
4404  }
4405
4406  switch (VT.getSimpleVT().SimpleTy) {
4407  default:
4408    return false;
4409  case MVT::i8:
4410  case MVT::i16:
4411  case MVT::i32:
4412    return true;
4413  // FIXME: VLD1 etc with standard alignment is legal.
4414  }
4415}
4416
4417static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
4418  if (V < 0)
4419    return false;
4420
4421  unsigned Scale = 1;
4422  switch (VT.getSimpleVT().SimpleTy) {
4423  default: return false;
4424  case MVT::i1:
4425  case MVT::i8:
4426    // Scale == 1;
4427    break;
4428  case MVT::i16:
4429    // Scale == 2;
4430    Scale = 2;
4431    break;
4432  case MVT::i32:
4433    // Scale == 4;
4434    Scale = 4;
4435    break;
4436  }
4437
4438  if ((V & (Scale - 1)) != 0)
4439    return false;
4440  V /= Scale;
4441  return V == (V & ((1LL << 5) - 1));
4442}
4443
4444static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
4445                                      const ARMSubtarget *Subtarget) {
4446  bool isNeg = false;
4447  if (V < 0) {
4448    isNeg = true;
4449    V = - V;
4450  }
4451
4452  switch (VT.getSimpleVT().SimpleTy) {
4453  default: return false;
4454  case MVT::i1:
4455  case MVT::i8:
4456  case MVT::i16:
4457  case MVT::i32:
4458    // + imm12 or - imm8
4459    if (isNeg)
4460      return V == (V & ((1LL << 8) - 1));
4461    return V == (V & ((1LL << 12) - 1));
4462  case MVT::f32:
4463  case MVT::f64:
4464    // Same as ARM mode. FIXME: NEON?
4465    if (!Subtarget->hasVFP2())
4466      return false;
4467    if ((V & 3) != 0)
4468      return false;
4469    V >>= 2;
4470    return V == (V & ((1LL << 8) - 1));
4471  }
4472}
4473
4474/// isLegalAddressImmediate - Return true if the integer value can be used
4475/// as the offset of the target addressing mode for load / store of the
4476/// given type.
4477static bool isLegalAddressImmediate(int64_t V, EVT VT,
4478                                    const ARMSubtarget *Subtarget) {
4479  if (V == 0)
4480    return true;
4481
4482  if (!VT.isSimple())
4483    return false;
4484
4485  if (Subtarget->isThumb1Only())
4486    return isLegalT1AddressImmediate(V, VT);
4487  else if (Subtarget->isThumb2())
4488    return isLegalT2AddressImmediate(V, VT, Subtarget);
4489
4490  // ARM mode.
4491  if (V < 0)
4492    V = - V;
4493  switch (VT.getSimpleVT().SimpleTy) {
4494  default: return false;
4495  case MVT::i1:
4496  case MVT::i8:
4497  case MVT::i32:
4498    // +- imm12
4499    return V == (V & ((1LL << 12) - 1));
4500  case MVT::i16:
4501    // +- imm8
4502    return V == (V & ((1LL << 8) - 1));
4503  case MVT::f32:
4504  case MVT::f64:
4505    if (!Subtarget->hasVFP2()) // FIXME: NEON?
4506      return false;
4507    if ((V & 3) != 0)
4508      return false;
4509    V >>= 2;
4510    return V == (V & ((1LL << 8) - 1));
4511  }
4512}
4513
4514bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
4515                                                      EVT VT) const {
4516  int Scale = AM.Scale;
4517  if (Scale < 0)
4518    return false;
4519
4520  switch (VT.getSimpleVT().SimpleTy) {
4521  default: return false;
4522  case MVT::i1:
4523  case MVT::i8:
4524  case MVT::i16:
4525  case MVT::i32:
4526    if (Scale == 1)
4527      return true;
4528    // r + r << imm
4529    Scale = Scale & ~1;
4530    return Scale == 2 || Scale == 4 || Scale == 8;
4531  case MVT::i64:
4532    // r + r
4533    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4534      return true;
4535    return false;
4536  case MVT::isVoid:
4537    // Note, we allow "void" uses (basically, uses that aren't loads or
4538    // stores), because arm allows folding a scale into many arithmetic
4539    // operations.  This should be made more precise and revisited later.
4540
4541    // Allow r << imm, but the imm has to be a multiple of two.
4542    if (Scale & 1) return false;
4543    return isPowerOf2_32(Scale);
4544  }
4545}
4546
4547/// isLegalAddressingMode - Return true if the addressing mode represented
4548/// by AM is legal for this target, for a load/store of the specified type.
4549bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
4550                                              const Type *Ty) const {
4551  EVT VT = getValueType(Ty, true);
4552  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
4553    return false;
4554
4555  // Can never fold addr of global into load/store.
4556  if (AM.BaseGV)
4557    return false;
4558
4559  switch (AM.Scale) {
4560  case 0:  // no scale reg, must be "r+i" or "r", or "i".
4561    break;
4562  case 1:
4563    if (Subtarget->isThumb1Only())
4564      return false;
4565    // FALL THROUGH.
4566  default:
4567    // ARM doesn't support any R+R*scale+imm addr modes.
4568    if (AM.BaseOffs)
4569      return false;
4570
4571    if (!VT.isSimple())
4572      return false;
4573
4574    if (Subtarget->isThumb2())
4575      return isLegalT2ScaledAddressingMode(AM, VT);
4576
4577    int Scale = AM.Scale;
4578    switch (VT.getSimpleVT().SimpleTy) {
4579    default: return false;
4580    case MVT::i1:
4581    case MVT::i8:
4582    case MVT::i32:
4583      if (Scale < 0) Scale = -Scale;
4584      if (Scale == 1)
4585        return true;
4586      // r + r << imm
4587      return isPowerOf2_32(Scale & ~1);
4588    case MVT::i16:
4589    case MVT::i64:
4590      // r + r
4591      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4592        return true;
4593      return false;
4594
4595    case MVT::isVoid:
4596      // Note, we allow "void" uses (basically, uses that aren't loads or
4597      // stores), because arm allows folding a scale into many arithmetic
4598      // operations.  This should be made more precise and revisited later.
4599
4600      // Allow r << imm, but the imm has to be a multiple of two.
4601      if (Scale & 1) return false;
4602      return isPowerOf2_32(Scale);
4603    }
4604    break;
4605  }
4606  return true;
4607}
4608
4609/// isLegalICmpImmediate - Return true if the specified immediate is legal
4610/// icmp immediate, that is the target has icmp instructions which can compare
4611/// a register against the immediate without having to materialize the
4612/// immediate into a register.
4613bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4614  if (!Subtarget->isThumb())
4615    return ARM_AM::getSOImmVal(Imm) != -1;
4616  if (Subtarget->isThumb2())
4617    return ARM_AM::getT2SOImmVal(Imm) != -1;
4618  return Imm >= 0 && Imm <= 255;
4619}
4620
4621static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
4622                                      bool isSEXTLoad, SDValue &Base,
4623                                      SDValue &Offset, bool &isInc,
4624                                      SelectionDAG &DAG) {
4625  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4626    return false;
4627
4628  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
4629    // AddressingMode 3
4630    Base = Ptr->getOperand(0);
4631    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4632      int RHSC = (int)RHS->getZExtValue();
4633      if (RHSC < 0 && RHSC > -256) {
4634        assert(Ptr->getOpcode() == ISD::ADD);
4635        isInc = false;
4636        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4637        return true;
4638      }
4639    }
4640    isInc = (Ptr->getOpcode() == ISD::ADD);
4641    Offset = Ptr->getOperand(1);
4642    return true;
4643  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
4644    // AddressingMode 2
4645    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4646      int RHSC = (int)RHS->getZExtValue();
4647      if (RHSC < 0 && RHSC > -0x1000) {
4648        assert(Ptr->getOpcode() == ISD::ADD);
4649        isInc = false;
4650        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4651        Base = Ptr->getOperand(0);
4652        return true;
4653      }
4654    }
4655
4656    if (Ptr->getOpcode() == ISD::ADD) {
4657      isInc = true;
4658      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
4659      if (ShOpcVal != ARM_AM::no_shift) {
4660        Base = Ptr->getOperand(1);
4661        Offset = Ptr->getOperand(0);
4662      } else {
4663        Base = Ptr->getOperand(0);
4664        Offset = Ptr->getOperand(1);
4665      }
4666      return true;
4667    }
4668
4669    isInc = (Ptr->getOpcode() == ISD::ADD);
4670    Base = Ptr->getOperand(0);
4671    Offset = Ptr->getOperand(1);
4672    return true;
4673  }
4674
4675  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
4676  return false;
4677}
4678
4679static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
4680                                     bool isSEXTLoad, SDValue &Base,
4681                                     SDValue &Offset, bool &isInc,
4682                                     SelectionDAG &DAG) {
4683  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4684    return false;
4685
4686  Base = Ptr->getOperand(0);
4687  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4688    int RHSC = (int)RHS->getZExtValue();
4689    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
4690      assert(Ptr->getOpcode() == ISD::ADD);
4691      isInc = false;
4692      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4693      return true;
4694    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
4695      isInc = Ptr->getOpcode() == ISD::ADD;
4696      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
4697      return true;
4698    }
4699  }
4700
4701  return false;
4702}
4703
4704/// getPreIndexedAddressParts - returns true by value, base pointer and
4705/// offset pointer and addressing mode by reference if the node's address
4706/// can be legally represented as pre-indexed load / store address.
4707bool
4708ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
4709                                             SDValue &Offset,
4710                                             ISD::MemIndexedMode &AM,
4711                                             SelectionDAG &DAG) const {
4712  if (Subtarget->isThumb1Only())
4713    return false;
4714
4715  EVT VT;
4716  SDValue Ptr;
4717  bool isSEXTLoad = false;
4718  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4719    Ptr = LD->getBasePtr();
4720    VT  = LD->getMemoryVT();
4721    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4722  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4723    Ptr = ST->getBasePtr();
4724    VT  = ST->getMemoryVT();
4725  } else
4726    return false;
4727
4728  bool isInc;
4729  bool isLegal = false;
4730  if (Subtarget->isThumb2())
4731    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4732                                       Offset, isInc, DAG);
4733  else
4734    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4735                                        Offset, isInc, DAG);
4736  if (!isLegal)
4737    return false;
4738
4739  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
4740  return true;
4741}
4742
4743/// getPostIndexedAddressParts - returns true by value, base pointer and
4744/// offset pointer and addressing mode by reference if this node can be
4745/// combined with a load / store to form a post-indexed load / store.
4746bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
4747                                                   SDValue &Base,
4748                                                   SDValue &Offset,
4749                                                   ISD::MemIndexedMode &AM,
4750                                                   SelectionDAG &DAG) const {
4751  if (Subtarget->isThumb1Only())
4752    return false;
4753
4754  EVT VT;
4755  SDValue Ptr;
4756  bool isSEXTLoad = false;
4757  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4758    VT  = LD->getMemoryVT();
4759    Ptr = LD->getBasePtr();
4760    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4761  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4762    VT  = ST->getMemoryVT();
4763    Ptr = ST->getBasePtr();
4764  } else
4765    return false;
4766
4767  bool isInc;
4768  bool isLegal = false;
4769  if (Subtarget->isThumb2())
4770    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4771                                       isInc, DAG);
4772  else
4773    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4774                                        isInc, DAG);
4775  if (!isLegal)
4776    return false;
4777
4778  if (Ptr != Base) {
4779    // Swap base ptr and offset to catch more post-index load / store when
4780    // it's legal. In Thumb2 mode, offset must be an immediate.
4781    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
4782        !Subtarget->isThumb2())
4783      std::swap(Base, Offset);
4784
4785    // Post-indexed load / store update the base pointer.
4786    if (Ptr != Base)
4787      return false;
4788  }
4789
4790  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
4791  return true;
4792}
4793
4794void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
4795                                                       const APInt &Mask,
4796                                                       APInt &KnownZero,
4797                                                       APInt &KnownOne,
4798                                                       const SelectionDAG &DAG,
4799                                                       unsigned Depth) const {
4800  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
4801  switch (Op.getOpcode()) {
4802  default: break;
4803  case ARMISD::CMOV: {
4804    // Bits are known zero/one if known on the LHS and RHS.
4805    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
4806    if (KnownZero == 0 && KnownOne == 0) return;
4807
4808    APInt KnownZeroRHS, KnownOneRHS;
4809    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
4810                          KnownZeroRHS, KnownOneRHS, Depth+1);
4811    KnownZero &= KnownZeroRHS;
4812    KnownOne  &= KnownOneRHS;
4813    return;
4814  }
4815  }
4816}
4817
4818//===----------------------------------------------------------------------===//
4819//                           ARM Inline Assembly Support
4820//===----------------------------------------------------------------------===//
4821
4822/// getConstraintType - Given a constraint letter, return the type of
4823/// constraint it is for this target.
4824ARMTargetLowering::ConstraintType
4825ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
4826  if (Constraint.size() == 1) {
4827    switch (Constraint[0]) {
4828    default:  break;
4829    case 'l': return C_RegisterClass;
4830    case 'w': return C_RegisterClass;
4831    }
4832  }
4833  return TargetLowering::getConstraintType(Constraint);
4834}
4835
4836std::pair<unsigned, const TargetRegisterClass*>
4837ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4838                                                EVT VT) const {
4839  if (Constraint.size() == 1) {
4840    // GCC ARM Constraint Letters
4841    switch (Constraint[0]) {
4842    case 'l':
4843      if (Subtarget->isThumb())
4844        return std::make_pair(0U, ARM::tGPRRegisterClass);
4845      else
4846        return std::make_pair(0U, ARM::GPRRegisterClass);
4847    case 'r':
4848      return std::make_pair(0U, ARM::GPRRegisterClass);
4849    case 'w':
4850      if (VT == MVT::f32)
4851        return std::make_pair(0U, ARM::SPRRegisterClass);
4852      if (VT.getSizeInBits() == 64)
4853        return std::make_pair(0U, ARM::DPRRegisterClass);
4854      if (VT.getSizeInBits() == 128)
4855        return std::make_pair(0U, ARM::QPRRegisterClass);
4856      break;
4857    }
4858  }
4859  if (StringRef("{cc}").equals_lower(Constraint))
4860    return std::make_pair(0U, ARM::CCRRegisterClass);
4861
4862  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4863}
4864
4865std::vector<unsigned> ARMTargetLowering::
4866getRegClassForInlineAsmConstraint(const std::string &Constraint,
4867                                  EVT VT) const {
4868  if (Constraint.size() != 1)
4869    return std::vector<unsigned>();
4870
4871  switch (Constraint[0]) {      // GCC ARM Constraint Letters
4872  default: break;
4873  case 'l':
4874    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4875                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4876                                 0);
4877  case 'r':
4878    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4879                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4880                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
4881                                 ARM::R12, ARM::LR, 0);
4882  case 'w':
4883    if (VT == MVT::f32)
4884      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
4885                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
4886                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
4887                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
4888                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
4889                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
4890                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
4891                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
4892    if (VT.getSizeInBits() == 64)
4893      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
4894                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
4895                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
4896                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
4897    if (VT.getSizeInBits() == 128)
4898      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
4899                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
4900      break;
4901  }
4902
4903  return std::vector<unsigned>();
4904}
4905
4906/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4907/// vector.  If it is invalid, don't add anything to Ops.
4908void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4909                                                     char Constraint,
4910                                                     bool hasMemory,
4911                                                     std::vector<SDValue>&Ops,
4912                                                     SelectionDAG &DAG) const {
4913  SDValue Result(0, 0);
4914
4915  switch (Constraint) {
4916  default: break;
4917  case 'I': case 'J': case 'K': case 'L':
4918  case 'M': case 'N': case 'O':
4919    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4920    if (!C)
4921      return;
4922
4923    int64_t CVal64 = C->getSExtValue();
4924    int CVal = (int) CVal64;
4925    // None of these constraints allow values larger than 32 bits.  Check
4926    // that the value fits in an int.
4927    if (CVal != CVal64)
4928      return;
4929
4930    switch (Constraint) {
4931      case 'I':
4932        if (Subtarget->isThumb1Only()) {
4933          // This must be a constant between 0 and 255, for ADD
4934          // immediates.
4935          if (CVal >= 0 && CVal <= 255)
4936            break;
4937        } else if (Subtarget->isThumb2()) {
4938          // A constant that can be used as an immediate value in a
4939          // data-processing instruction.
4940          if (ARM_AM::getT2SOImmVal(CVal) != -1)
4941            break;
4942        } else {
4943          // A constant that can be used as an immediate value in a
4944          // data-processing instruction.
4945          if (ARM_AM::getSOImmVal(CVal) != -1)
4946            break;
4947        }
4948        return;
4949
4950      case 'J':
4951        if (Subtarget->isThumb()) {  // FIXME thumb2
4952          // This must be a constant between -255 and -1, for negated ADD
4953          // immediates. This can be used in GCC with an "n" modifier that
4954          // prints the negated value, for use with SUB instructions. It is
4955          // not useful otherwise but is implemented for compatibility.
4956          if (CVal >= -255 && CVal <= -1)
4957            break;
4958        } else {
4959          // This must be a constant between -4095 and 4095. It is not clear
4960          // what this constraint is intended for. Implemented for
4961          // compatibility with GCC.
4962          if (CVal >= -4095 && CVal <= 4095)
4963            break;
4964        }
4965        return;
4966
4967      case 'K':
4968        if (Subtarget->isThumb1Only()) {
4969          // A 32-bit value where only one byte has a nonzero value. Exclude
4970          // zero to match GCC. This constraint is used by GCC internally for
4971          // constants that can be loaded with a move/shift combination.
4972          // It is not useful otherwise but is implemented for compatibility.
4973          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
4974            break;
4975        } else if (Subtarget->isThumb2()) {
4976          // A constant whose bitwise inverse can be used as an immediate
4977          // value in a data-processing instruction. This can be used in GCC
4978          // with a "B" modifier that prints the inverted value, for use with
4979          // BIC and MVN instructions. It is not useful otherwise but is
4980          // implemented for compatibility.
4981          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
4982            break;
4983        } else {
4984          // A constant whose bitwise inverse can be used as an immediate
4985          // value in a data-processing instruction. This can be used in GCC
4986          // with a "B" modifier that prints the inverted value, for use with
4987          // BIC and MVN instructions. It is not useful otherwise but is
4988          // implemented for compatibility.
4989          if (ARM_AM::getSOImmVal(~CVal) != -1)
4990            break;
4991        }
4992        return;
4993
4994      case 'L':
4995        if (Subtarget->isThumb1Only()) {
4996          // This must be a constant between -7 and 7,
4997          // for 3-operand ADD/SUB immediate instructions.
4998          if (CVal >= -7 && CVal < 7)
4999            break;
5000        } else if (Subtarget->isThumb2()) {
5001          // A constant whose negation can be used as an immediate value in a
5002          // data-processing instruction. This can be used in GCC with an "n"
5003          // modifier that prints the negated value, for use with SUB
5004          // instructions. It is not useful otherwise but is implemented for
5005          // compatibility.
5006          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
5007            break;
5008        } else {
5009          // A constant whose negation can be used as an immediate value in a
5010          // data-processing instruction. This can be used in GCC with an "n"
5011          // modifier that prints the negated value, for use with SUB
5012          // instructions. It is not useful otherwise but is implemented for
5013          // compatibility.
5014          if (ARM_AM::getSOImmVal(-CVal) != -1)
5015            break;
5016        }
5017        return;
5018
5019      case 'M':
5020        if (Subtarget->isThumb()) { // FIXME thumb2
5021          // This must be a multiple of 4 between 0 and 1020, for
5022          // ADD sp + immediate.
5023          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
5024            break;
5025        } else {
5026          // A power of two or a constant between 0 and 32.  This is used in
5027          // GCC for the shift amount on shifted register operands, but it is
5028          // useful in general for any shift amounts.
5029          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
5030            break;
5031        }
5032        return;
5033
5034      case 'N':
5035        if (Subtarget->isThumb()) {  // FIXME thumb2
5036          // This must be a constant between 0 and 31, for shift amounts.
5037          if (CVal >= 0 && CVal <= 31)
5038            break;
5039        }
5040        return;
5041
5042      case 'O':
5043        if (Subtarget->isThumb()) {  // FIXME thumb2
5044          // This must be a multiple of 4 between -508 and 508, for
5045          // ADD/SUB sp = sp + immediate.
5046          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
5047            break;
5048        }
5049        return;
5050    }
5051    Result = DAG.getTargetConstant(CVal, Op.getValueType());
5052    break;
5053  }
5054
5055  if (Result.getNode()) {
5056    Ops.push_back(Result);
5057    return;
5058  }
5059  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
5060                                                      Ops, DAG);
5061}
5062
5063bool
5064ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5065  // The ARM target isn't yet aware of offsets.
5066  return false;
5067}
5068
5069int ARM::getVFPf32Imm(const APFloat &FPImm) {
5070  APInt Imm = FPImm.bitcastToAPInt();
5071  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
5072  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
5073  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
5074
5075  // We can handle 4 bits of mantissa.
5076  // mantissa = (16+UInt(e:f:g:h))/16.
5077  if (Mantissa & 0x7ffff)
5078    return -1;
5079  Mantissa >>= 19;
5080  if ((Mantissa & 0xf) != Mantissa)
5081    return -1;
5082
5083  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5084  if (Exp < -3 || Exp > 4)
5085    return -1;
5086  Exp = ((Exp+3) & 0x7) ^ 4;
5087
5088  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5089}
5090
5091int ARM::getVFPf64Imm(const APFloat &FPImm) {
5092  APInt Imm = FPImm.bitcastToAPInt();
5093  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
5094  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
5095  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
5096
5097  // We can handle 4 bits of mantissa.
5098  // mantissa = (16+UInt(e:f:g:h))/16.
5099  if (Mantissa & 0xffffffffffffLL)
5100    return -1;
5101  Mantissa >>= 48;
5102  if ((Mantissa & 0xf) != Mantissa)
5103    return -1;
5104
5105  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5106  if (Exp < -3 || Exp > 4)
5107    return -1;
5108  Exp = ((Exp+3) & 0x7) ^ 4;
5109
5110  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5111}
5112
5113/// isFPImmLegal - Returns true if the target can instruction select the
5114/// specified FP immediate natively. If false, the legalizer will
5115/// materialize the FP immediate as a load from a constant pool.
5116bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5117  if (!Subtarget->hasVFP3())
5118    return false;
5119  if (VT == MVT::f32)
5120    return ARM::getVFPf32Imm(Imm) != -1;
5121  if (VT == MVT::f64)
5122    return ARM::getVFPf64Imm(Imm) != -1;
5123  return false;
5124}
5125