ARMISelLowering.cpp revision df50d7e238c4802eb2de04646b8f7ff7327730a0
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-isel"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetMachine.h"
25#include "ARMTargetObjectFile.h"
26#include "llvm/CallingConv.h"
27#include "llvm/Constants.h"
28#include "llvm/Function.h"
29#include "llvm/GlobalValue.h"
30#include "llvm/Instruction.h"
31#include "llvm/Intrinsics.h"
32#include "llvm/Type.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/PseudoSourceValue.h"
40#include "llvm/CodeGen/SelectionDAG.h"
41#include "llvm/MC/MCSectionMachO.h"
42#include "llvm/Target/TargetOptions.h"
43#include "llvm/ADT/VectorExtras.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Support/CommandLine.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/MathExtras.h"
48#include "llvm/Support/raw_ostream.h"
49#include <sstream>
50using namespace llvm;
51
52STATISTIC(NumTailCalls, "Number of tail calls");
53
54// This option should go away when tail calls fully work.
55static cl::opt<bool>
56EnableARMTailCalls("arm-tail-calls", cl::Hidden,
57  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
58  cl::init(false));
59
60static cl::opt<bool>
61EnableARMLongCalls("arm-long-calls", cl::Hidden,
62  cl::desc("Generate calls via indirect call instructions."),
63  cl::init(false));
64
65static cl::opt<bool>
66ARMInterworking("arm-interworking", cl::Hidden,
67  cl::desc("Enable / disable ARM interworking (for debugging only)"),
68  cl::init(true));
69
70static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
71                                   CCValAssign::LocInfo &LocInfo,
72                                   ISD::ArgFlagsTy &ArgFlags,
73                                   CCState &State);
74static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
75                                    CCValAssign::LocInfo &LocInfo,
76                                    ISD::ArgFlagsTy &ArgFlags,
77                                    CCState &State);
78static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
79                                      CCValAssign::LocInfo &LocInfo,
80                                      ISD::ArgFlagsTy &ArgFlags,
81                                      CCState &State);
82static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
83                                       CCValAssign::LocInfo &LocInfo,
84                                       ISD::ArgFlagsTy &ArgFlags,
85                                       CCState &State);
86
87void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
88                                       EVT PromotedBitwiseVT) {
89  if (VT != PromotedLdStVT) {
90    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
91    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
92                       PromotedLdStVT.getSimpleVT());
93
94    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
95    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
96                       PromotedLdStVT.getSimpleVT());
97  }
98
99  EVT ElemTy = VT.getVectorElementType();
100  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
101    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
102  if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
103    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
104  if (ElemTy != MVT::i32) {
105    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
106    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
107    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
108    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
109  }
110  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
111  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
112  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
113  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
114  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
115  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
116  if (VT.isInteger()) {
117    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
118    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
119    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
120  }
121
122  // Promote all bit-wise operations.
123  if (VT.isInteger() && VT != PromotedBitwiseVT) {
124    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
125    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
126                       PromotedBitwiseVT.getSimpleVT());
127    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
128    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
129                       PromotedBitwiseVT.getSimpleVT());
130    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
131    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
132                       PromotedBitwiseVT.getSimpleVT());
133  }
134
135  // Neon does not support vector divide/remainder operations.
136  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
137  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
138  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
139  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
140  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
141  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
142}
143
144void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
145  addRegisterClass(VT, ARM::DPRRegisterClass);
146  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
147}
148
149void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
150  addRegisterClass(VT, ARM::QPRRegisterClass);
151  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
152}
153
154static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
155  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
156    return new TargetLoweringObjectFileMachO();
157
158  return new ARMElfTargetObjectFile();
159}
160
161ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
162    : TargetLowering(TM, createTLOF(TM)) {
163  Subtarget = &TM.getSubtarget<ARMSubtarget>();
164
165  if (Subtarget->isTargetDarwin()) {
166    // Uses VFP for Thumb libfuncs if available.
167    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
168      // Single-precision floating-point arithmetic.
169      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
170      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
171      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
172      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
173
174      // Double-precision floating-point arithmetic.
175      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
176      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
177      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
178      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
179
180      // Single-precision comparisons.
181      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
182      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
183      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
184      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
185      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
186      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
187      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
188      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
189
190      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
191      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
192      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
193      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
194      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
195      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
196      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
197      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
198
199      // Double-precision comparisons.
200      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
201      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
202      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
203      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
204      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
205      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
206      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
207      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
208
209      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
210      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
211      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
212      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
213      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
214      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
215      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
216      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
217
218      // Floating-point to integer conversions.
219      // i64 conversions are done via library routines even when generating VFP
220      // instructions, so use the same ones.
221      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
222      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
223      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
224      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
225
226      // Conversions between floating types.
227      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
228      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
229
230      // Integer to floating-point conversions.
231      // i64 conversions are done via library routines even when generating VFP
232      // instructions, so use the same ones.
233      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
234      // e.g., __floatunsidf vs. __floatunssidfvfp.
235      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
236      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
237      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
238      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
239    }
240  }
241
242  // These libcalls are not available in 32-bit.
243  setLibcallName(RTLIB::SHL_I128, 0);
244  setLibcallName(RTLIB::SRL_I128, 0);
245  setLibcallName(RTLIB::SRA_I128, 0);
246
247  // Libcalls should use the AAPCS base standard ABI, even if hard float
248  // is in effect, as per the ARM RTABI specification, section 4.1.2.
249  if (Subtarget->isAAPCS_ABI()) {
250    for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
251      setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
252                            CallingConv::ARM_AAPCS);
253    }
254  }
255
256  if (Subtarget->isThumb1Only())
257    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
258  else
259    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
260  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
261    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
262    addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
263
264    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
265  }
266
267  if (Subtarget->hasNEON()) {
268    addDRTypeForNEON(MVT::v2f32);
269    addDRTypeForNEON(MVT::v8i8);
270    addDRTypeForNEON(MVT::v4i16);
271    addDRTypeForNEON(MVT::v2i32);
272    addDRTypeForNEON(MVT::v1i64);
273
274    addQRTypeForNEON(MVT::v4f32);
275    addQRTypeForNEON(MVT::v2f64);
276    addQRTypeForNEON(MVT::v16i8);
277    addQRTypeForNEON(MVT::v8i16);
278    addQRTypeForNEON(MVT::v4i32);
279    addQRTypeForNEON(MVT::v2i64);
280
281    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
282    // neither Neon nor VFP support any arithmetic operations on it.
283    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
284    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
285    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
286    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
287    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
288    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
289    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
290    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
291    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
292    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
293    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
294    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
295    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
296    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
297    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
298    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
299    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
300    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
301    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
302    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
303    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
304    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
305    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
306    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
307
308    // Neon does not support some operations on v1i64 and v2i64 types.
309    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
310    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
311    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
312    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
313
314    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
315    setTargetDAGCombine(ISD::SHL);
316    setTargetDAGCombine(ISD::SRL);
317    setTargetDAGCombine(ISD::SRA);
318    setTargetDAGCombine(ISD::SIGN_EXTEND);
319    setTargetDAGCombine(ISD::ZERO_EXTEND);
320    setTargetDAGCombine(ISD::ANY_EXTEND);
321    setTargetDAGCombine(ISD::SELECT_CC);
322  }
323
324  computeRegisterProperties();
325
326  // ARM does not have f32 extending load.
327  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
328
329  // ARM does not have i1 sign extending load.
330  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
331
332  // ARM supports all 4 flavors of integer indexed load / store.
333  if (!Subtarget->isThumb1Only()) {
334    for (unsigned im = (unsigned)ISD::PRE_INC;
335         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
336      setIndexedLoadAction(im,  MVT::i1,  Legal);
337      setIndexedLoadAction(im,  MVT::i8,  Legal);
338      setIndexedLoadAction(im,  MVT::i16, Legal);
339      setIndexedLoadAction(im,  MVT::i32, Legal);
340      setIndexedStoreAction(im, MVT::i1,  Legal);
341      setIndexedStoreAction(im, MVT::i8,  Legal);
342      setIndexedStoreAction(im, MVT::i16, Legal);
343      setIndexedStoreAction(im, MVT::i32, Legal);
344    }
345  }
346
347  // i64 operation support.
348  if (Subtarget->isThumb1Only()) {
349    setOperationAction(ISD::MUL,     MVT::i64, Expand);
350    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
351    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
352    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
353    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
354  } else {
355    setOperationAction(ISD::MUL,     MVT::i64, Expand);
356    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
357    if (!Subtarget->hasV6Ops())
358      setOperationAction(ISD::MULHS, MVT::i32, Expand);
359  }
360  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
361  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
362  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
363  setOperationAction(ISD::SRL,       MVT::i64, Custom);
364  setOperationAction(ISD::SRA,       MVT::i64, Custom);
365
366  // ARM does not have ROTL.
367  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
368  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
369  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
370  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
371    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
372
373  // Only ARMv6 has BSWAP.
374  if (!Subtarget->hasV6Ops())
375    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
376
377  // These are expanded into libcalls.
378  if (!Subtarget->hasDivide()) {
379    // v7M has a hardware divider
380    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
381    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
382  }
383  setOperationAction(ISD::SREM,  MVT::i32, Expand);
384  setOperationAction(ISD::UREM,  MVT::i32, Expand);
385  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
386  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
387
388  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
389  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
390  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
391  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
392  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
393
394  setOperationAction(ISD::TRAP, MVT::Other, Legal);
395
396  // Use the default implementation.
397  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
398  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
399  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
400  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
401  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
402  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
403  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
404  // FIXME: Shouldn't need this, since no register is used, but the legalizer
405  // doesn't yet know how to not do that for SjLj.
406  setExceptionSelectorRegister(ARM::R0);
407  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
408  // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
409  // use the default expansion.
410  TargetLowering::LegalizeAction AtomicAction =
411    (Subtarget->hasV7Ops() ||
412      (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) ? Custom : Expand;
413  setOperationAction(ISD::MEMBARRIER, MVT::Other, AtomicAction);
414
415  // If the subtarget does not have extract instructions, sign_extend_inreg
416  // needs to be expanded. Extract is available in ARM mode on v6 and up,
417  // and on most Thumb2 implementations.
418  if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
419      || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
420    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
421    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
422  }
423  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
424
425  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
426    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
427    // iff target supports vfp2.
428    setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
429
430  // We want to custom lower some of our intrinsics.
431  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
432
433  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
434  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
435  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
436  setOperationAction(ISD::SELECT,    MVT::i32, Expand);
437  setOperationAction(ISD::SELECT,    MVT::f32, Expand);
438  setOperationAction(ISD::SELECT,    MVT::f64, Expand);
439  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
440  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
441  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
442
443  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
444  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
445  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
446  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
447  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
448
449  // We don't support sin/cos/fmod/copysign/pow
450  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
451  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
452  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
453  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
454  setOperationAction(ISD::FREM,      MVT::f64, Expand);
455  setOperationAction(ISD::FREM,      MVT::f32, Expand);
456  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
457    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
458    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
459  }
460  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
461  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
462
463  // Various VFP goodness
464  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
465    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
466    if (Subtarget->hasVFP2()) {
467      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
468      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
469      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
470      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
471    }
472    // Special handling for half-precision FP.
473    if (!Subtarget->hasFP16()) {
474      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
475      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
476    }
477  }
478
479  // We have target-specific dag combine patterns for the following nodes:
480  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
481  setTargetDAGCombine(ISD::ADD);
482  setTargetDAGCombine(ISD::SUB);
483  setTargetDAGCombine(ISD::MUL);
484
485  setStackPointerRegisterToSaveRestore(ARM::SP);
486
487  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
488    setSchedulingPreference(Sched::RegPressure);
489  else
490    setSchedulingPreference(Sched::Hybrid);
491
492  // FIXME: If-converter should use instruction latency to determine
493  // profitability rather than relying on fixed limits.
494  if (Subtarget->getCPUString() == "generic") {
495    // Generic (and overly aggressive) if-conversion limits.
496    setIfCvtBlockSizeLimit(10);
497    setIfCvtDupBlockSizeLimit(2);
498  } else if (Subtarget->hasV7Ops()) {
499    setIfCvtBlockSizeLimit(3);
500    setIfCvtDupBlockSizeLimit(1);
501  } else if (Subtarget->hasV6Ops()) {
502    setIfCvtBlockSizeLimit(2);
503    setIfCvtDupBlockSizeLimit(1);
504  } else {
505    setIfCvtBlockSizeLimit(3);
506    setIfCvtDupBlockSizeLimit(2);
507  }
508
509  maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
510  // Do not enable CodePlacementOpt for now: it currently runs after the
511  // ARMConstantIslandPass and messes up branch relaxation and placement
512  // of constant islands.
513  // benefitFromCodePlacementOpt = true;
514}
515
516const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
517  switch (Opcode) {
518  default: return 0;
519  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
520  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
521  case ARMISD::CALL:          return "ARMISD::CALL";
522  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
523  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
524  case ARMISD::tCALL:         return "ARMISD::tCALL";
525  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
526  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
527  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
528  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
529  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
530  case ARMISD::CMP:           return "ARMISD::CMP";
531  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
532  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
533  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
534  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
535  case ARMISD::CMOV:          return "ARMISD::CMOV";
536  case ARMISD::CNEG:          return "ARMISD::CNEG";
537
538  case ARMISD::RBIT:          return "ARMISD::RBIT";
539
540  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
541  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
542  case ARMISD::SITOF:         return "ARMISD::SITOF";
543  case ARMISD::UITOF:         return "ARMISD::UITOF";
544
545  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
546  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
547  case ARMISD::RRX:           return "ARMISD::RRX";
548
549  case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
550  case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
551
552  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
553  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
554
555  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
556
557  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
558
559  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
560
561  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
562  case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
563
564  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
565  case ARMISD::VCGE:          return "ARMISD::VCGE";
566  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
567  case ARMISD::VCGT:          return "ARMISD::VCGT";
568  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
569  case ARMISD::VTST:          return "ARMISD::VTST";
570
571  case ARMISD::VSHL:          return "ARMISD::VSHL";
572  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
573  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
574  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
575  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
576  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
577  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
578  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
579  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
580  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
581  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
582  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
583  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
584  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
585  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
586  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
587  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
588  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
589  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
590  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
591  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
592  case ARMISD::VDUP:          return "ARMISD::VDUP";
593  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
594  case ARMISD::VEXT:          return "ARMISD::VEXT";
595  case ARMISD::VREV64:        return "ARMISD::VREV64";
596  case ARMISD::VREV32:        return "ARMISD::VREV32";
597  case ARMISD::VREV16:        return "ARMISD::VREV16";
598  case ARMISD::VZIP:          return "ARMISD::VZIP";
599  case ARMISD::VUZP:          return "ARMISD::VUZP";
600  case ARMISD::VTRN:          return "ARMISD::VTRN";
601  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
602  case ARMISD::FMAX:          return "ARMISD::FMAX";
603  case ARMISD::FMIN:          return "ARMISD::FMIN";
604  }
605}
606
607/// getRegClassFor - Return the register class that should be used for the
608/// specified value type.
609TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
610  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
611  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
612  // load / store 4 to 8 consecutive D registers.
613  if (Subtarget->hasNEON()) {
614    if (VT == MVT::v4i64)
615      return ARM::QQPRRegisterClass;
616    else if (VT == MVT::v8i64)
617      return ARM::QQQQPRRegisterClass;
618  }
619  return TargetLowering::getRegClassFor(VT);
620}
621
622/// getFunctionAlignment - Return the Log2 alignment of this function.
623unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
624  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
625}
626
627Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
628  unsigned NumVals = N->getNumValues();
629  if (!NumVals)
630    return Sched::RegPressure;
631
632  for (unsigned i = 0; i != NumVals; ++i) {
633    EVT VT = N->getValueType(i);
634    if (VT.isFloatingPoint() || VT.isVector())
635      return Sched::Latency;
636  }
637
638  if (!N->isMachineOpcode())
639    return Sched::RegPressure;
640
641  // Load are scheduled for latency even if there instruction itinerary
642  // is not available.
643  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
644  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
645  if (TID.mayLoad())
646    return Sched::Latency;
647
648  const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
649  if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
650    return Sched::Latency;
651  return Sched::RegPressure;
652}
653
654//===----------------------------------------------------------------------===//
655// Lowering Code
656//===----------------------------------------------------------------------===//
657
658/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
659static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
660  switch (CC) {
661  default: llvm_unreachable("Unknown condition code!");
662  case ISD::SETNE:  return ARMCC::NE;
663  case ISD::SETEQ:  return ARMCC::EQ;
664  case ISD::SETGT:  return ARMCC::GT;
665  case ISD::SETGE:  return ARMCC::GE;
666  case ISD::SETLT:  return ARMCC::LT;
667  case ISD::SETLE:  return ARMCC::LE;
668  case ISD::SETUGT: return ARMCC::HI;
669  case ISD::SETUGE: return ARMCC::HS;
670  case ISD::SETULT: return ARMCC::LO;
671  case ISD::SETULE: return ARMCC::LS;
672  }
673}
674
675/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
676static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
677                        ARMCC::CondCodes &CondCode2) {
678  CondCode2 = ARMCC::AL;
679  switch (CC) {
680  default: llvm_unreachable("Unknown FP condition!");
681  case ISD::SETEQ:
682  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
683  case ISD::SETGT:
684  case ISD::SETOGT: CondCode = ARMCC::GT; break;
685  case ISD::SETGE:
686  case ISD::SETOGE: CondCode = ARMCC::GE; break;
687  case ISD::SETOLT: CondCode = ARMCC::MI; break;
688  case ISD::SETOLE: CondCode = ARMCC::LS; break;
689  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
690  case ISD::SETO:   CondCode = ARMCC::VC; break;
691  case ISD::SETUO:  CondCode = ARMCC::VS; break;
692  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
693  case ISD::SETUGT: CondCode = ARMCC::HI; break;
694  case ISD::SETUGE: CondCode = ARMCC::PL; break;
695  case ISD::SETLT:
696  case ISD::SETULT: CondCode = ARMCC::LT; break;
697  case ISD::SETLE:
698  case ISD::SETULE: CondCode = ARMCC::LE; break;
699  case ISD::SETNE:
700  case ISD::SETUNE: CondCode = ARMCC::NE; break;
701  }
702}
703
704//===----------------------------------------------------------------------===//
705//                      Calling Convention Implementation
706//===----------------------------------------------------------------------===//
707
708#include "ARMGenCallingConv.inc"
709
710// APCS f64 is in register pairs, possibly split to stack
711static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
712                          CCValAssign::LocInfo &LocInfo,
713                          CCState &State, bool CanFail) {
714  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
715
716  // Try to get the first register.
717  if (unsigned Reg = State.AllocateReg(RegList, 4))
718    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
719  else {
720    // For the 2nd half of a v2f64, do not fail.
721    if (CanFail)
722      return false;
723
724    // Put the whole thing on the stack.
725    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
726                                           State.AllocateStack(8, 4),
727                                           LocVT, LocInfo));
728    return true;
729  }
730
731  // Try to get the second register.
732  if (unsigned Reg = State.AllocateReg(RegList, 4))
733    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
734  else
735    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
736                                           State.AllocateStack(4, 4),
737                                           LocVT, LocInfo));
738  return true;
739}
740
741static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
742                                   CCValAssign::LocInfo &LocInfo,
743                                   ISD::ArgFlagsTy &ArgFlags,
744                                   CCState &State) {
745  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
746    return false;
747  if (LocVT == MVT::v2f64 &&
748      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
749    return false;
750  return true;  // we handled it
751}
752
753// AAPCS f64 is in aligned register pairs
754static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
755                           CCValAssign::LocInfo &LocInfo,
756                           CCState &State, bool CanFail) {
757  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
758  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
759
760  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
761  if (Reg == 0) {
762    // For the 2nd half of a v2f64, do not just fail.
763    if (CanFail)
764      return false;
765
766    // Put the whole thing on the stack.
767    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
768                                           State.AllocateStack(8, 8),
769                                           LocVT, LocInfo));
770    return true;
771  }
772
773  unsigned i;
774  for (i = 0; i < 2; ++i)
775    if (HiRegList[i] == Reg)
776      break;
777
778  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
779  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
780                                         LocVT, LocInfo));
781  return true;
782}
783
784static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
785                                    CCValAssign::LocInfo &LocInfo,
786                                    ISD::ArgFlagsTy &ArgFlags,
787                                    CCState &State) {
788  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
789    return false;
790  if (LocVT == MVT::v2f64 &&
791      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
792    return false;
793  return true;  // we handled it
794}
795
796static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
797                         CCValAssign::LocInfo &LocInfo, CCState &State) {
798  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
799  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
800
801  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
802  if (Reg == 0)
803    return false; // we didn't handle it
804
805  unsigned i;
806  for (i = 0; i < 2; ++i)
807    if (HiRegList[i] == Reg)
808      break;
809
810  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
811  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
812                                         LocVT, LocInfo));
813  return true;
814}
815
816static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
817                                      CCValAssign::LocInfo &LocInfo,
818                                      ISD::ArgFlagsTy &ArgFlags,
819                                      CCState &State) {
820  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
821    return false;
822  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
823    return false;
824  return true;  // we handled it
825}
826
827static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
828                                       CCValAssign::LocInfo &LocInfo,
829                                       ISD::ArgFlagsTy &ArgFlags,
830                                       CCState &State) {
831  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
832                                   State);
833}
834
835/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
836/// given CallingConvention value.
837CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
838                                                 bool Return,
839                                                 bool isVarArg) const {
840  switch (CC) {
841  default:
842    llvm_unreachable("Unsupported calling convention");
843  case CallingConv::C:
844  case CallingConv::Fast:
845    // Use target triple & subtarget features to do actual dispatch.
846    if (Subtarget->isAAPCS_ABI()) {
847      if (Subtarget->hasVFP2() &&
848          FloatABIType == FloatABI::Hard && !isVarArg)
849        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
850      else
851        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
852    } else
853        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
854  case CallingConv::ARM_AAPCS_VFP:
855    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
856  case CallingConv::ARM_AAPCS:
857    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
858  case CallingConv::ARM_APCS:
859    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
860  }
861}
862
863/// LowerCallResult - Lower the result values of a call into the
864/// appropriate copies out of appropriate physical registers.
865SDValue
866ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
867                                   CallingConv::ID CallConv, bool isVarArg,
868                                   const SmallVectorImpl<ISD::InputArg> &Ins,
869                                   DebugLoc dl, SelectionDAG &DAG,
870                                   SmallVectorImpl<SDValue> &InVals) const {
871
872  // Assign locations to each value returned by this call.
873  SmallVector<CCValAssign, 16> RVLocs;
874  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
875                 RVLocs, *DAG.getContext());
876  CCInfo.AnalyzeCallResult(Ins,
877                           CCAssignFnForNode(CallConv, /* Return*/ true,
878                                             isVarArg));
879
880  // Copy all of the result registers out of their specified physreg.
881  for (unsigned i = 0; i != RVLocs.size(); ++i) {
882    CCValAssign VA = RVLocs[i];
883
884    SDValue Val;
885    if (VA.needsCustom()) {
886      // Handle f64 or half of a v2f64.
887      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
888                                      InFlag);
889      Chain = Lo.getValue(1);
890      InFlag = Lo.getValue(2);
891      VA = RVLocs[++i]; // skip ahead to next loc
892      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
893                                      InFlag);
894      Chain = Hi.getValue(1);
895      InFlag = Hi.getValue(2);
896      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
897
898      if (VA.getLocVT() == MVT::v2f64) {
899        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
900        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
901                          DAG.getConstant(0, MVT::i32));
902
903        VA = RVLocs[++i]; // skip ahead to next loc
904        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
905        Chain = Lo.getValue(1);
906        InFlag = Lo.getValue(2);
907        VA = RVLocs[++i]; // skip ahead to next loc
908        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
909        Chain = Hi.getValue(1);
910        InFlag = Hi.getValue(2);
911        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
912        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
913                          DAG.getConstant(1, MVT::i32));
914      }
915    } else {
916      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
917                               InFlag);
918      Chain = Val.getValue(1);
919      InFlag = Val.getValue(2);
920    }
921
922    switch (VA.getLocInfo()) {
923    default: llvm_unreachable("Unknown loc info!");
924    case CCValAssign::Full: break;
925    case CCValAssign::BCvt:
926      Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
927      break;
928    }
929
930    InVals.push_back(Val);
931  }
932
933  return Chain;
934}
935
936/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
937/// by "Src" to address "Dst" of size "Size".  Alignment information is
938/// specified by the specific parameter attribute.  The copy will be passed as
939/// a byval function parameter.
940/// Sometimes what we are copying is the end of a larger object, the part that
941/// does not fit in registers.
942static SDValue
943CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
944                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
945                          DebugLoc dl) {
946  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
947  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
948                       /*isVolatile=*/false, /*AlwaysInline=*/false,
949                       NULL, 0, NULL, 0);
950}
951
952/// LowerMemOpCallTo - Store the argument to the stack.
953SDValue
954ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
955                                    SDValue StackPtr, SDValue Arg,
956                                    DebugLoc dl, SelectionDAG &DAG,
957                                    const CCValAssign &VA,
958                                    ISD::ArgFlagsTy Flags) const {
959  unsigned LocMemOffset = VA.getLocMemOffset();
960  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
961  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
962  if (Flags.isByVal()) {
963    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
964  }
965  return DAG.getStore(Chain, dl, Arg, PtrOff,
966                      PseudoSourceValue::getStack(), LocMemOffset,
967                      false, false, 0);
968}
969
970void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
971                                         SDValue Chain, SDValue &Arg,
972                                         RegsToPassVector &RegsToPass,
973                                         CCValAssign &VA, CCValAssign &NextVA,
974                                         SDValue &StackPtr,
975                                         SmallVector<SDValue, 8> &MemOpChains,
976                                         ISD::ArgFlagsTy Flags) const {
977
978  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
979                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
980  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
981
982  if (NextVA.isRegLoc())
983    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
984  else {
985    assert(NextVA.isMemLoc());
986    if (StackPtr.getNode() == 0)
987      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
988
989    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
990                                           dl, DAG, NextVA,
991                                           Flags));
992  }
993}
994
995/// LowerCall - Lowering a call into a callseq_start <-
996/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
997/// nodes.
998SDValue
999ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1000                             CallingConv::ID CallConv, bool isVarArg,
1001                             bool &isTailCall,
1002                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1003                             const SmallVectorImpl<ISD::InputArg> &Ins,
1004                             DebugLoc dl, SelectionDAG &DAG,
1005                             SmallVectorImpl<SDValue> &InVals) const {
1006  MachineFunction &MF = DAG.getMachineFunction();
1007  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1008  bool IsSibCall = false;
1009  // Temporarily disable tail calls so things don't break.
1010  if (!EnableARMTailCalls)
1011    isTailCall = false;
1012  if (isTailCall) {
1013    // Check if it's really possible to do a tail call.
1014    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1015                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1016                                                   Outs, Ins, DAG);
1017    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1018    // detected sibcalls.
1019    if (isTailCall) {
1020      ++NumTailCalls;
1021      IsSibCall = true;
1022    }
1023  }
1024
1025  // Analyze operands of the call, assigning locations to each operand.
1026  SmallVector<CCValAssign, 16> ArgLocs;
1027  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1028                 *DAG.getContext());
1029  CCInfo.AnalyzeCallOperands(Outs,
1030                             CCAssignFnForNode(CallConv, /* Return*/ false,
1031                                               isVarArg));
1032
1033  // Get a count of how many bytes are to be pushed on the stack.
1034  unsigned NumBytes = CCInfo.getNextStackOffset();
1035
1036  // For tail calls, memory operands are available in our caller's stack.
1037  if (IsSibCall)
1038    NumBytes = 0;
1039
1040  // Adjust the stack pointer for the new arguments...
1041  // These operations are automatically eliminated by the prolog/epilog pass
1042  if (!IsSibCall)
1043    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1044
1045  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1046
1047  RegsToPassVector RegsToPass;
1048  SmallVector<SDValue, 8> MemOpChains;
1049
1050  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1051  // of tail call optimization, arguments are handled later.
1052  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1053       i != e;
1054       ++i, ++realArgIdx) {
1055    CCValAssign &VA = ArgLocs[i];
1056    SDValue Arg = Outs[realArgIdx].Val;
1057    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1058
1059    // Promote the value if needed.
1060    switch (VA.getLocInfo()) {
1061    default: llvm_unreachable("Unknown loc info!");
1062    case CCValAssign::Full: break;
1063    case CCValAssign::SExt:
1064      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1065      break;
1066    case CCValAssign::ZExt:
1067      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1068      break;
1069    case CCValAssign::AExt:
1070      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1071      break;
1072    case CCValAssign::BCvt:
1073      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1074      break;
1075    }
1076
1077    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1078    if (VA.needsCustom()) {
1079      if (VA.getLocVT() == MVT::v2f64) {
1080        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1081                                  DAG.getConstant(0, MVT::i32));
1082        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1083                                  DAG.getConstant(1, MVT::i32));
1084
1085        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1086                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1087
1088        VA = ArgLocs[++i]; // skip ahead to next loc
1089        if (VA.isRegLoc()) {
1090          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1091                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1092        } else {
1093          assert(VA.isMemLoc());
1094
1095          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1096                                                 dl, DAG, VA, Flags));
1097        }
1098      } else {
1099        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1100                         StackPtr, MemOpChains, Flags);
1101      }
1102    } else if (VA.isRegLoc()) {
1103      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1104    } else if (!IsSibCall) {
1105      assert(VA.isMemLoc());
1106
1107      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1108                                             dl, DAG, VA, Flags));
1109    }
1110  }
1111
1112  if (!MemOpChains.empty())
1113    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1114                        &MemOpChains[0], MemOpChains.size());
1115
1116  // Build a sequence of copy-to-reg nodes chained together with token chain
1117  // and flag operands which copy the outgoing args into the appropriate regs.
1118  SDValue InFlag;
1119  // Tail call byval lowering might overwrite argument registers so in case of
1120  // tail call optimization the copies to registers are lowered later.
1121  if (!isTailCall)
1122    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1123      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1124                               RegsToPass[i].second, InFlag);
1125      InFlag = Chain.getValue(1);
1126    }
1127
1128  // For tail calls lower the arguments to the 'real' stack slot.
1129  if (isTailCall) {
1130    // Force all the incoming stack arguments to be loaded from the stack
1131    // before any new outgoing arguments are stored to the stack, because the
1132    // outgoing stack slots may alias the incoming argument stack slots, and
1133    // the alias isn't otherwise explicit. This is slightly more conservative
1134    // than necessary, because it means that each store effectively depends
1135    // on every argument instead of just those arguments it would clobber.
1136
1137    // Do not flag preceeding copytoreg stuff together with the following stuff.
1138    InFlag = SDValue();
1139    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1140      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1141                               RegsToPass[i].second, InFlag);
1142      InFlag = Chain.getValue(1);
1143    }
1144    InFlag =SDValue();
1145  }
1146
1147  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1148  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1149  // node so that legalize doesn't hack it.
1150  bool isDirect = false;
1151  bool isARMFunc = false;
1152  bool isLocalARMFunc = false;
1153  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1154
1155  if (EnableARMLongCalls) {
1156    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1157            && "long-calls with non-static relocation model!");
1158    // Handle a global address or an external symbol. If it's not one of
1159    // those, the target's already in a register, so we don't need to do
1160    // anything extra.
1161    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1162      const GlobalValue *GV = G->getGlobal();
1163      // Create a constant pool entry for the callee address
1164      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1165      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1166                                                           ARMPCLabelIndex,
1167                                                           ARMCP::CPValue, 0);
1168      // Get the address of the callee into a register
1169      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1170      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1171      Callee = DAG.getLoad(getPointerTy(), dl,
1172                           DAG.getEntryNode(), CPAddr,
1173                           PseudoSourceValue::getConstantPool(), 0,
1174                           false, false, 0);
1175    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1176      const char *Sym = S->getSymbol();
1177
1178      // Create a constant pool entry for the callee address
1179      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1180      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1181                                                       Sym, ARMPCLabelIndex, 0);
1182      // Get the address of the callee into a register
1183      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1184      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1185      Callee = DAG.getLoad(getPointerTy(), dl,
1186                           DAG.getEntryNode(), CPAddr,
1187                           PseudoSourceValue::getConstantPool(), 0,
1188                           false, false, 0);
1189    }
1190  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1191    const GlobalValue *GV = G->getGlobal();
1192    isDirect = true;
1193    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1194    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1195                   getTargetMachine().getRelocationModel() != Reloc::Static;
1196    isARMFunc = !Subtarget->isThumb() || isStub;
1197    // ARM call to a local ARM function is predicable.
1198    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1199    // tBX takes a register source operand.
1200    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1201      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1202      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1203                                                           ARMPCLabelIndex,
1204                                                           ARMCP::CPValue, 4);
1205      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1206      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1207      Callee = DAG.getLoad(getPointerTy(), dl,
1208                           DAG.getEntryNode(), CPAddr,
1209                           PseudoSourceValue::getConstantPool(), 0,
1210                           false, false, 0);
1211      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1212      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1213                           getPointerTy(), Callee, PICLabel);
1214    } else
1215      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
1216  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1217    isDirect = true;
1218    bool isStub = Subtarget->isTargetDarwin() &&
1219                  getTargetMachine().getRelocationModel() != Reloc::Static;
1220    isARMFunc = !Subtarget->isThumb() || isStub;
1221    // tBX takes a register source operand.
1222    const char *Sym = S->getSymbol();
1223    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1224      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1225      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1226                                                       Sym, ARMPCLabelIndex, 4);
1227      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1228      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1229      Callee = DAG.getLoad(getPointerTy(), dl,
1230                           DAG.getEntryNode(), CPAddr,
1231                           PseudoSourceValue::getConstantPool(), 0,
1232                           false, false, 0);
1233      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1234      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1235                           getPointerTy(), Callee, PICLabel);
1236    } else
1237      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1238  }
1239
1240  // FIXME: handle tail calls differently.
1241  unsigned CallOpc;
1242  if (Subtarget->isThumb()) {
1243    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1244      CallOpc = ARMISD::CALL_NOLINK;
1245    else
1246      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1247  } else {
1248    CallOpc = (isDirect || Subtarget->hasV5TOps())
1249      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1250      : ARMISD::CALL_NOLINK;
1251  }
1252  if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
1253    // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
1254    Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
1255    InFlag = Chain.getValue(1);
1256  }
1257
1258  std::vector<SDValue> Ops;
1259  Ops.push_back(Chain);
1260  Ops.push_back(Callee);
1261
1262  // Add argument registers to the end of the list so that they are known live
1263  // into the call.
1264  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1265    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1266                                  RegsToPass[i].second.getValueType()));
1267
1268  if (InFlag.getNode())
1269    Ops.push_back(InFlag);
1270
1271  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1272  if (isTailCall)
1273    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1274
1275  // Returns a chain and a flag for retval copy to use.
1276  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1277  InFlag = Chain.getValue(1);
1278
1279  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1280                             DAG.getIntPtrConstant(0, true), InFlag);
1281  if (!Ins.empty())
1282    InFlag = Chain.getValue(1);
1283
1284  // Handle result values, copying them out of physregs into vregs that we
1285  // return.
1286  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1287                         dl, DAG, InVals);
1288}
1289
1290/// MatchingStackOffset - Return true if the given stack call argument is
1291/// already available in the same position (relatively) of the caller's
1292/// incoming argument stack.
1293static
1294bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1295                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1296                         const ARMInstrInfo *TII) {
1297  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1298  int FI = INT_MAX;
1299  if (Arg.getOpcode() == ISD::CopyFromReg) {
1300    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1301    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
1302      return false;
1303    MachineInstr *Def = MRI->getVRegDef(VR);
1304    if (!Def)
1305      return false;
1306    if (!Flags.isByVal()) {
1307      if (!TII->isLoadFromStackSlot(Def, FI))
1308        return false;
1309    } else {
1310//      unsigned Opcode = Def->getOpcode();
1311//      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
1312//          Def->getOperand(1).isFI()) {
1313//        FI = Def->getOperand(1).getIndex();
1314//        Bytes = Flags.getByValSize();
1315//      } else
1316        return false;
1317    }
1318  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1319    if (Flags.isByVal())
1320      // ByVal argument is passed in as a pointer but it's now being
1321      // dereferenced. e.g.
1322      // define @foo(%struct.X* %A) {
1323      //   tail call @bar(%struct.X* byval %A)
1324      // }
1325      return false;
1326    SDValue Ptr = Ld->getBasePtr();
1327    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1328    if (!FINode)
1329      return false;
1330    FI = FINode->getIndex();
1331  } else
1332    return false;
1333
1334  assert(FI != INT_MAX);
1335  if (!MFI->isFixedObjectIndex(FI))
1336    return false;
1337  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1338}
1339
1340/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1341/// for tail call optimization. Targets which want to do tail call
1342/// optimization should implement this function.
1343bool
1344ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1345                                                     CallingConv::ID CalleeCC,
1346                                                     bool isVarArg,
1347                                                     bool isCalleeStructRet,
1348                                                     bool isCallerStructRet,
1349                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1350                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1351                                                     SelectionDAG& DAG) const {
1352
1353  const Function *CallerF = DAG.getMachineFunction().getFunction();
1354  CallingConv::ID CallerCC = CallerF->getCallingConv();
1355  bool CCMatch = CallerCC == CalleeCC;
1356
1357  // Look for obvious safe cases to perform tail call optimization that do not
1358  // require ABI changes. This is what gcc calls sibcall.
1359
1360  // Do not sibcall optimize vararg calls unless the call site is not passing
1361  // any arguments.
1362  if (isVarArg && !Outs.empty())
1363    return false;
1364
1365  // Also avoid sibcall optimization if either caller or callee uses struct
1366  // return semantics.
1367  if (isCalleeStructRet || isCallerStructRet)
1368    return false;
1369
1370  // On Thumb, for the moment, we can only do this to functions defined in this
1371  // compilation, or to indirect calls.  A Thumb B to an ARM function is not
1372  // easily fixed up in the linker, unlike BL.
1373  if (Subtarget->isThumb()) {
1374    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1375      const GlobalValue *GV = G->getGlobal();
1376      if (GV->isDeclaration() || GV->isWeakForLinker())
1377        return false;
1378    } else if (isa<ExternalSymbolSDNode>(Callee)) {
1379      return false;
1380    }
1381  }
1382
1383  // If the calling conventions do not match, then we'd better make sure the
1384  // results are returned in the same way as what the caller expects.
1385  if (!CCMatch) {
1386    SmallVector<CCValAssign, 16> RVLocs1;
1387    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
1388                    RVLocs1, *DAG.getContext());
1389    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1390
1391    SmallVector<CCValAssign, 16> RVLocs2;
1392    CCState CCInfo2(CallerCC, false, getTargetMachine(),
1393                    RVLocs2, *DAG.getContext());
1394    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1395
1396    if (RVLocs1.size() != RVLocs2.size())
1397      return false;
1398    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1399      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1400        return false;
1401      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1402        return false;
1403      if (RVLocs1[i].isRegLoc()) {
1404        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1405          return false;
1406      } else {
1407        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1408          return false;
1409      }
1410    }
1411  }
1412
1413  // If the callee takes no arguments then go on to check the results of the
1414  // call.
1415  if (!Outs.empty()) {
1416    // Check if stack adjustment is needed. For now, do not do this if any
1417    // argument is passed on the stack.
1418    SmallVector<CCValAssign, 16> ArgLocs;
1419    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
1420                   ArgLocs, *DAG.getContext());
1421    CCInfo.AnalyzeCallOperands(Outs,
1422                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1423    if (CCInfo.getNextStackOffset()) {
1424      MachineFunction &MF = DAG.getMachineFunction();
1425
1426      // Check if the arguments are already laid out in the right way as
1427      // the caller's fixed stack objects.
1428      MachineFrameInfo *MFI = MF.getFrameInfo();
1429      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1430      const ARMInstrInfo *TII =
1431        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1432      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1433           i != e;
1434           ++i, ++realArgIdx) {
1435        CCValAssign &VA = ArgLocs[i];
1436        EVT RegVT = VA.getLocVT();
1437        SDValue Arg = Outs[realArgIdx].Val;
1438        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1439        if (VA.getLocInfo() == CCValAssign::Indirect)
1440          return false;
1441        if (VA.needsCustom()) {
1442          // f64 and vector types are split into multiple registers or
1443          // register/stack-slot combinations.  The types will not match
1444          // the registers; give up on memory f64 refs until we figure
1445          // out what to do about this.
1446          if (!VA.isRegLoc())
1447            return false;
1448          if (!ArgLocs[++i].isRegLoc())
1449            return false;
1450          if (RegVT == MVT::v2f64) {
1451            if (!ArgLocs[++i].isRegLoc())
1452              return false;
1453            if (!ArgLocs[++i].isRegLoc())
1454              return false;
1455          }
1456        } else if (!VA.isRegLoc()) {
1457          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1458                                   MFI, MRI, TII))
1459            return false;
1460        }
1461      }
1462    }
1463  }
1464
1465  return true;
1466}
1467
1468SDValue
1469ARMTargetLowering::LowerReturn(SDValue Chain,
1470                               CallingConv::ID CallConv, bool isVarArg,
1471                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1472                               DebugLoc dl, SelectionDAG &DAG) const {
1473
1474  // CCValAssign - represent the assignment of the return value to a location.
1475  SmallVector<CCValAssign, 16> RVLocs;
1476
1477  // CCState - Info about the registers and stack slots.
1478  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1479                 *DAG.getContext());
1480
1481  // Analyze outgoing return values.
1482  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1483                                               isVarArg));
1484
1485  // If this is the first return lowered for this function, add
1486  // the regs to the liveout set for the function.
1487  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1488    for (unsigned i = 0; i != RVLocs.size(); ++i)
1489      if (RVLocs[i].isRegLoc())
1490        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1491  }
1492
1493  SDValue Flag;
1494
1495  // Copy the result values into the output registers.
1496  for (unsigned i = 0, realRVLocIdx = 0;
1497       i != RVLocs.size();
1498       ++i, ++realRVLocIdx) {
1499    CCValAssign &VA = RVLocs[i];
1500    assert(VA.isRegLoc() && "Can only return in registers!");
1501
1502    SDValue Arg = Outs[realRVLocIdx].Val;
1503
1504    switch (VA.getLocInfo()) {
1505    default: llvm_unreachable("Unknown loc info!");
1506    case CCValAssign::Full: break;
1507    case CCValAssign::BCvt:
1508      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1509      break;
1510    }
1511
1512    if (VA.needsCustom()) {
1513      if (VA.getLocVT() == MVT::v2f64) {
1514        // Extract the first half and return it in two registers.
1515        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1516                                   DAG.getConstant(0, MVT::i32));
1517        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1518                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
1519
1520        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1521        Flag = Chain.getValue(1);
1522        VA = RVLocs[++i]; // skip ahead to next loc
1523        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1524                                 HalfGPRs.getValue(1), Flag);
1525        Flag = Chain.getValue(1);
1526        VA = RVLocs[++i]; // skip ahead to next loc
1527
1528        // Extract the 2nd half and fall through to handle it as an f64 value.
1529        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1530                          DAG.getConstant(1, MVT::i32));
1531      }
1532      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1533      // available.
1534      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1535                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1536      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1537      Flag = Chain.getValue(1);
1538      VA = RVLocs[++i]; // skip ahead to next loc
1539      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1540                               Flag);
1541    } else
1542      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1543
1544    // Guarantee that all emitted copies are
1545    // stuck together, avoiding something bad.
1546    Flag = Chain.getValue(1);
1547  }
1548
1549  SDValue result;
1550  if (Flag.getNode())
1551    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1552  else // Return Void
1553    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1554
1555  return result;
1556}
1557
1558// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1559// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1560// one of the above mentioned nodes. It has to be wrapped because otherwise
1561// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1562// be used to form addressing mode. These wrapped nodes will be selected
1563// into MOVi.
1564static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1565  EVT PtrVT = Op.getValueType();
1566  // FIXME there is no actual debug info here
1567  DebugLoc dl = Op.getDebugLoc();
1568  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1569  SDValue Res;
1570  if (CP->isMachineConstantPoolEntry())
1571    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1572                                    CP->getAlignment());
1573  else
1574    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1575                                    CP->getAlignment());
1576  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1577}
1578
1579SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1580                                             SelectionDAG &DAG) const {
1581  MachineFunction &MF = DAG.getMachineFunction();
1582  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1583  unsigned ARMPCLabelIndex = 0;
1584  DebugLoc DL = Op.getDebugLoc();
1585  EVT PtrVT = getPointerTy();
1586  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1587  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1588  SDValue CPAddr;
1589  if (RelocM == Reloc::Static) {
1590    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1591  } else {
1592    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1593    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1594    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1595                                                         ARMCP::CPBlockAddress,
1596                                                         PCAdj);
1597    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1598  }
1599  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1600  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1601                               PseudoSourceValue::getConstantPool(), 0,
1602                               false, false, 0);
1603  if (RelocM == Reloc::Static)
1604    return Result;
1605  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1606  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1607}
1608
1609// Lower ISD::GlobalTLSAddress using the "general dynamic" model
1610SDValue
1611ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1612                                                 SelectionDAG &DAG) const {
1613  DebugLoc dl = GA->getDebugLoc();
1614  EVT PtrVT = getPointerTy();
1615  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1616  MachineFunction &MF = DAG.getMachineFunction();
1617  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1618  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1619  ARMConstantPoolValue *CPV =
1620    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1621                             ARMCP::CPValue, PCAdj, "tlsgd", true);
1622  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1623  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1624  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1625                         PseudoSourceValue::getConstantPool(), 0,
1626                         false, false, 0);
1627  SDValue Chain = Argument.getValue(1);
1628
1629  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1630  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1631
1632  // call __tls_get_addr.
1633  ArgListTy Args;
1634  ArgListEntry Entry;
1635  Entry.Node = Argument;
1636  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1637  Args.push_back(Entry);
1638  // FIXME: is there useful debug info available here?
1639  std::pair<SDValue, SDValue> CallResult =
1640    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1641                false, false, false, false,
1642                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1643                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1644  return CallResult.first;
1645}
1646
1647// Lower ISD::GlobalTLSAddress using the "initial exec" or
1648// "local exec" model.
1649SDValue
1650ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1651                                        SelectionDAG &DAG) const {
1652  const GlobalValue *GV = GA->getGlobal();
1653  DebugLoc dl = GA->getDebugLoc();
1654  SDValue Offset;
1655  SDValue Chain = DAG.getEntryNode();
1656  EVT PtrVT = getPointerTy();
1657  // Get the Thread Pointer
1658  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1659
1660  if (GV->isDeclaration()) {
1661    MachineFunction &MF = DAG.getMachineFunction();
1662    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1663    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1664    // Initial exec model.
1665    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1666    ARMConstantPoolValue *CPV =
1667      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1668                               ARMCP::CPValue, PCAdj, "gottpoff", true);
1669    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1670    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1671    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1672                         PseudoSourceValue::getConstantPool(), 0,
1673                         false, false, 0);
1674    Chain = Offset.getValue(1);
1675
1676    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1677    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1678
1679    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1680                         PseudoSourceValue::getConstantPool(), 0,
1681                         false, false, 0);
1682  } else {
1683    // local exec model
1684    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1685    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1686    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1687    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1688                         PseudoSourceValue::getConstantPool(), 0,
1689                         false, false, 0);
1690  }
1691
1692  // The address of the thread local variable is the add of the thread
1693  // pointer with the offset of the variable.
1694  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1695}
1696
1697SDValue
1698ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1699  // TODO: implement the "local dynamic" model
1700  assert(Subtarget->isTargetELF() &&
1701         "TLS not implemented for non-ELF targets");
1702  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1703  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1704  // otherwise use the "Local Exec" TLS Model
1705  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1706    return LowerToTLSGeneralDynamicModel(GA, DAG);
1707  else
1708    return LowerToTLSExecModels(GA, DAG);
1709}
1710
1711SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1712                                                 SelectionDAG &DAG) const {
1713  EVT PtrVT = getPointerTy();
1714  DebugLoc dl = Op.getDebugLoc();
1715  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1716  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1717  if (RelocM == Reloc::PIC_) {
1718    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1719    ARMConstantPoolValue *CPV =
1720      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1721    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1722    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1723    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1724                                 CPAddr,
1725                                 PseudoSourceValue::getConstantPool(), 0,
1726                                 false, false, 0);
1727    SDValue Chain = Result.getValue(1);
1728    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1729    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1730    if (!UseGOTOFF)
1731      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1732                           PseudoSourceValue::getGOT(), 0,
1733                           false, false, 0);
1734    return Result;
1735  } else {
1736    // If we have T2 ops, we can materialize the address directly via movt/movw
1737    // pair. This is always cheaper.
1738    if (Subtarget->useMovt()) {
1739      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1740                         DAG.getTargetGlobalAddress(GV, PtrVT));
1741    } else {
1742      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1743      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1744      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1745                         PseudoSourceValue::getConstantPool(), 0,
1746                         false, false, 0);
1747    }
1748  }
1749}
1750
1751SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1752                                                    SelectionDAG &DAG) const {
1753  MachineFunction &MF = DAG.getMachineFunction();
1754  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1755  unsigned ARMPCLabelIndex = 0;
1756  EVT PtrVT = getPointerTy();
1757  DebugLoc dl = Op.getDebugLoc();
1758  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1759  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1760  SDValue CPAddr;
1761  if (RelocM == Reloc::Static)
1762    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1763  else {
1764    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1765    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1766    ARMConstantPoolValue *CPV =
1767      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1768    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1769  }
1770  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1771
1772  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1773                               PseudoSourceValue::getConstantPool(), 0,
1774                               false, false, 0);
1775  SDValue Chain = Result.getValue(1);
1776
1777  if (RelocM == Reloc::PIC_) {
1778    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1779    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1780  }
1781
1782  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1783    Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1784                         PseudoSourceValue::getGOT(), 0,
1785                         false, false, 0);
1786
1787  return Result;
1788}
1789
1790SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1791                                                    SelectionDAG &DAG) const {
1792  assert(Subtarget->isTargetELF() &&
1793         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1794  MachineFunction &MF = DAG.getMachineFunction();
1795  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1796  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1797  EVT PtrVT = getPointerTy();
1798  DebugLoc dl = Op.getDebugLoc();
1799  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1800  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1801                                                       "_GLOBAL_OFFSET_TABLE_",
1802                                                       ARMPCLabelIndex, PCAdj);
1803  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1804  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1805  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1806                               PseudoSourceValue::getConstantPool(), 0,
1807                               false, false, 0);
1808  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1809  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1810}
1811
1812SDValue
1813ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
1814  DebugLoc dl = Op.getDebugLoc();
1815  SDValue Val = DAG.getConstant(0, MVT::i32);
1816  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
1817                     Op.getOperand(1), Val);
1818}
1819
1820SDValue
1821ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1822  DebugLoc dl = Op.getDebugLoc();
1823  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1824                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
1825}
1826
1827SDValue
1828ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1829                                          const ARMSubtarget *Subtarget) const {
1830  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1831  DebugLoc dl = Op.getDebugLoc();
1832  switch (IntNo) {
1833  default: return SDValue();    // Don't custom lower most intrinsics.
1834  case Intrinsic::arm_thread_pointer: {
1835    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1836    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1837  }
1838  case Intrinsic::eh_sjlj_lsda: {
1839    MachineFunction &MF = DAG.getMachineFunction();
1840    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1841    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1842    EVT PtrVT = getPointerTy();
1843    DebugLoc dl = Op.getDebugLoc();
1844    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1845    SDValue CPAddr;
1846    unsigned PCAdj = (RelocM != Reloc::PIC_)
1847      ? 0 : (Subtarget->isThumb() ? 4 : 8);
1848    ARMConstantPoolValue *CPV =
1849      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
1850                               ARMCP::CPLSDA, PCAdj);
1851    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1852    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1853    SDValue Result =
1854      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1855                  PseudoSourceValue::getConstantPool(), 0,
1856                  false, false, 0);
1857    SDValue Chain = Result.getValue(1);
1858
1859    if (RelocM == Reloc::PIC_) {
1860      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1861      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1862    }
1863    return Result;
1864  }
1865  }
1866}
1867
1868static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
1869                               const ARMSubtarget *Subtarget) {
1870  DebugLoc dl = Op.getDebugLoc();
1871  SDValue Op5 = Op.getOperand(5);
1872  unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
1873  // v6 and v7 can both handle barriers directly, but need handled a bit
1874  // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
1875  // never get here.
1876  unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
1877  if (Subtarget->hasV7Ops())
1878    return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
1879  else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
1880    return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
1881                       DAG.getConstant(0, MVT::i32));
1882  assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
1883  return SDValue();
1884}
1885
1886static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
1887  MachineFunction &MF = DAG.getMachineFunction();
1888  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
1889
1890  // vastart just stores the address of the VarArgsFrameIndex slot into the
1891  // memory location argument.
1892  DebugLoc dl = Op.getDebugLoc();
1893  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1894  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1895  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1896  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
1897                      false, false, 0);
1898}
1899
1900SDValue
1901ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1902                                           SelectionDAG &DAG) const {
1903  SDNode *Node = Op.getNode();
1904  DebugLoc dl = Node->getDebugLoc();
1905  EVT VT = Node->getValueType(0);
1906  SDValue Chain = Op.getOperand(0);
1907  SDValue Size  = Op.getOperand(1);
1908  SDValue Align = Op.getOperand(2);
1909
1910  // Chain the dynamic stack allocation so that it doesn't modify the stack
1911  // pointer when other instructions are using the stack.
1912  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
1913
1914  unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
1915  unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
1916  if (AlignVal > StackAlign)
1917    // Do this now since selection pass cannot introduce new target
1918    // independent node.
1919    Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
1920
1921  // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
1922  // using a "add r, sp, r" instead. Negate the size now so we don't have to
1923  // do even more horrible hack later.
1924  MachineFunction &MF = DAG.getMachineFunction();
1925  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1926  if (AFI->isThumb1OnlyFunction()) {
1927    bool Negate = true;
1928    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
1929    if (C) {
1930      uint32_t Val = C->getZExtValue();
1931      if (Val <= 508 && ((Val & 3) == 0))
1932        Negate = false;
1933    }
1934    if (Negate)
1935      Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
1936  }
1937
1938  SDVTList VTList = DAG.getVTList(VT, MVT::Other);
1939  SDValue Ops1[] = { Chain, Size, Align };
1940  SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
1941  Chain = Res.getValue(1);
1942  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
1943                             DAG.getIntPtrConstant(0, true), SDValue());
1944  SDValue Ops2[] = { Res, Chain };
1945  return DAG.getMergeValues(Ops2, 2, dl);
1946}
1947
1948SDValue
1949ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
1950                                        SDValue &Root, SelectionDAG &DAG,
1951                                        DebugLoc dl) const {
1952  MachineFunction &MF = DAG.getMachineFunction();
1953  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1954
1955  TargetRegisterClass *RC;
1956  if (AFI->isThumb1OnlyFunction())
1957    RC = ARM::tGPRRegisterClass;
1958  else
1959    RC = ARM::GPRRegisterClass;
1960
1961  // Transform the arguments stored in physical registers into virtual ones.
1962  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1963  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1964
1965  SDValue ArgValue2;
1966  if (NextVA.isMemLoc()) {
1967    MachineFrameInfo *MFI = MF.getFrameInfo();
1968    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
1969
1970    // Create load node to retrieve arguments from the stack.
1971    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1972    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
1973                            PseudoSourceValue::getFixedStack(FI), 0,
1974                            false, false, 0);
1975  } else {
1976    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1977    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1978  }
1979
1980  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
1981}
1982
1983SDValue
1984ARMTargetLowering::LowerFormalArguments(SDValue Chain,
1985                                        CallingConv::ID CallConv, bool isVarArg,
1986                                        const SmallVectorImpl<ISD::InputArg>
1987                                          &Ins,
1988                                        DebugLoc dl, SelectionDAG &DAG,
1989                                        SmallVectorImpl<SDValue> &InVals)
1990                                          const {
1991
1992  MachineFunction &MF = DAG.getMachineFunction();
1993  MachineFrameInfo *MFI = MF.getFrameInfo();
1994
1995  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1996
1997  // Assign locations to all of the incoming arguments.
1998  SmallVector<CCValAssign, 16> ArgLocs;
1999  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
2000                 *DAG.getContext());
2001  CCInfo.AnalyzeFormalArguments(Ins,
2002                                CCAssignFnForNode(CallConv, /* Return*/ false,
2003                                                  isVarArg));
2004
2005  SmallVector<SDValue, 16> ArgValues;
2006
2007  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2008    CCValAssign &VA = ArgLocs[i];
2009
2010    // Arguments stored in registers.
2011    if (VA.isRegLoc()) {
2012      EVT RegVT = VA.getLocVT();
2013
2014      SDValue ArgValue;
2015      if (VA.needsCustom()) {
2016        // f64 and vector types are split up into multiple registers or
2017        // combinations of registers and stack slots.
2018        if (VA.getLocVT() == MVT::v2f64) {
2019          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2020                                                   Chain, DAG, dl);
2021          VA = ArgLocs[++i]; // skip ahead to next loc
2022          SDValue ArgValue2;
2023          if (VA.isMemLoc()) {
2024            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
2025                                            true, false);
2026            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2027            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2028                                    PseudoSourceValue::getFixedStack(FI), 0,
2029                                    false, false, 0);
2030          } else {
2031            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2032                                             Chain, DAG, dl);
2033          }
2034          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2035          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2036                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2037          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2038                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2039        } else
2040          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2041
2042      } else {
2043        TargetRegisterClass *RC;
2044
2045        if (RegVT == MVT::f32)
2046          RC = ARM::SPRRegisterClass;
2047        else if (RegVT == MVT::f64)
2048          RC = ARM::DPRRegisterClass;
2049        else if (RegVT == MVT::v2f64)
2050          RC = ARM::QPRRegisterClass;
2051        else if (RegVT == MVT::i32)
2052          RC = (AFI->isThumb1OnlyFunction() ?
2053                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2054        else
2055          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2056
2057        // Transform the arguments in physical registers into virtual ones.
2058        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2059        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2060      }
2061
2062      // If this is an 8 or 16-bit value, it is really passed promoted
2063      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2064      // truncate to the right size.
2065      switch (VA.getLocInfo()) {
2066      default: llvm_unreachable("Unknown loc info!");
2067      case CCValAssign::Full: break;
2068      case CCValAssign::BCvt:
2069        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
2070        break;
2071      case CCValAssign::SExt:
2072        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2073                               DAG.getValueType(VA.getValVT()));
2074        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2075        break;
2076      case CCValAssign::ZExt:
2077        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2078                               DAG.getValueType(VA.getValVT()));
2079        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2080        break;
2081      }
2082
2083      InVals.push_back(ArgValue);
2084
2085    } else { // VA.isRegLoc()
2086
2087      // sanity check
2088      assert(VA.isMemLoc());
2089      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2090
2091      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
2092      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2093                                      true, false);
2094
2095      // Create load nodes to retrieve arguments from the stack.
2096      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2097      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2098                                   PseudoSourceValue::getFixedStack(FI), 0,
2099                                   false, false, 0));
2100    }
2101  }
2102
2103  // varargs
2104  if (isVarArg) {
2105    static const unsigned GPRArgRegs[] = {
2106      ARM::R0, ARM::R1, ARM::R2, ARM::R3
2107    };
2108
2109    unsigned NumGPRs = CCInfo.getFirstUnallocated
2110      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2111
2112    unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
2113    unsigned VARegSize = (4 - NumGPRs) * 4;
2114    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2115    unsigned ArgOffset = CCInfo.getNextStackOffset();
2116    if (VARegSaveSize) {
2117      // If this function is vararg, store any remaining integer argument regs
2118      // to their spots on the stack so that they may be loaded by deferencing
2119      // the result of va_next.
2120      AFI->setVarArgsRegSaveSize(VARegSaveSize);
2121      AFI->setVarArgsFrameIndex(
2122        MFI->CreateFixedObject(VARegSaveSize,
2123                               ArgOffset + VARegSaveSize - VARegSize,
2124                               true, false));
2125      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2126                                      getPointerTy());
2127
2128      SmallVector<SDValue, 4> MemOps;
2129      for (; NumGPRs < 4; ++NumGPRs) {
2130        TargetRegisterClass *RC;
2131        if (AFI->isThumb1OnlyFunction())
2132          RC = ARM::tGPRRegisterClass;
2133        else
2134          RC = ARM::GPRRegisterClass;
2135
2136        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
2137        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2138        SDValue Store =
2139          DAG.getStore(Val.getValue(1), dl, Val, FIN,
2140               PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
2141               0, false, false, 0);
2142        MemOps.push_back(Store);
2143        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2144                          DAG.getConstant(4, getPointerTy()));
2145      }
2146      if (!MemOps.empty())
2147        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2148                            &MemOps[0], MemOps.size());
2149    } else
2150      // This will point to the next argument passed via stack.
2151      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
2152                                                       true, false));
2153  }
2154
2155  return Chain;
2156}
2157
2158/// isFloatingPointZero - Return true if this is +0.0.
2159static bool isFloatingPointZero(SDValue Op) {
2160  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2161    return CFP->getValueAPF().isPosZero();
2162  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2163    // Maybe this has already been legalized into the constant pool?
2164    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2165      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2166      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2167        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2168          return CFP->getValueAPF().isPosZero();
2169    }
2170  }
2171  return false;
2172}
2173
2174/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2175/// the given operands.
2176SDValue
2177ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2178                             SDValue &ARMCC, SelectionDAG &DAG,
2179                             DebugLoc dl) const {
2180  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2181    unsigned C = RHSC->getZExtValue();
2182    if (!isLegalICmpImmediate(C)) {
2183      // Constant does not fit, try adjusting it by one?
2184      switch (CC) {
2185      default: break;
2186      case ISD::SETLT:
2187      case ISD::SETGE:
2188        if (isLegalICmpImmediate(C-1)) {
2189          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2190          RHS = DAG.getConstant(C-1, MVT::i32);
2191        }
2192        break;
2193      case ISD::SETULT:
2194      case ISD::SETUGE:
2195        if (C > 0 && isLegalICmpImmediate(C-1)) {
2196          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2197          RHS = DAG.getConstant(C-1, MVT::i32);
2198        }
2199        break;
2200      case ISD::SETLE:
2201      case ISD::SETGT:
2202        if (isLegalICmpImmediate(C+1)) {
2203          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2204          RHS = DAG.getConstant(C+1, MVT::i32);
2205        }
2206        break;
2207      case ISD::SETULE:
2208      case ISD::SETUGT:
2209        if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
2210          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2211          RHS = DAG.getConstant(C+1, MVT::i32);
2212        }
2213        break;
2214      }
2215    }
2216  }
2217
2218  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2219  ARMISD::NodeType CompareType;
2220  switch (CondCode) {
2221  default:
2222    CompareType = ARMISD::CMP;
2223    break;
2224  case ARMCC::EQ:
2225  case ARMCC::NE:
2226    // Uses only Z Flag
2227    CompareType = ARMISD::CMPZ;
2228    break;
2229  }
2230  ARMCC = DAG.getConstant(CondCode, MVT::i32);
2231  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2232}
2233
2234/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2235static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2236                         DebugLoc dl) {
2237  SDValue Cmp;
2238  if (!isFloatingPointZero(RHS))
2239    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
2240  else
2241    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
2242  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
2243}
2244
2245SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2246  EVT VT = Op.getValueType();
2247  SDValue LHS = Op.getOperand(0);
2248  SDValue RHS = Op.getOperand(1);
2249  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2250  SDValue TrueVal = Op.getOperand(2);
2251  SDValue FalseVal = Op.getOperand(3);
2252  DebugLoc dl = Op.getDebugLoc();
2253
2254  if (LHS.getValueType() == MVT::i32) {
2255    SDValue ARMCC;
2256    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2257    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2258    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
2259  }
2260
2261  ARMCC::CondCodes CondCode, CondCode2;
2262  FPCCToARMCC(CC, CondCode, CondCode2);
2263
2264  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2265  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2266  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2267  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2268                                 ARMCC, CCR, Cmp);
2269  if (CondCode2 != ARMCC::AL) {
2270    SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
2271    // FIXME: Needs another CMP because flag can have but one use.
2272    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2273    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2274                         Result, TrueVal, ARMCC2, CCR, Cmp2);
2275  }
2276  return Result;
2277}
2278
2279SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2280  SDValue  Chain = Op.getOperand(0);
2281  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2282  SDValue    LHS = Op.getOperand(2);
2283  SDValue    RHS = Op.getOperand(3);
2284  SDValue   Dest = Op.getOperand(4);
2285  DebugLoc dl = Op.getDebugLoc();
2286
2287  if (LHS.getValueType() == MVT::i32) {
2288    SDValue ARMCC;
2289    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2290    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
2291    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2292                       Chain, Dest, ARMCC, CCR,Cmp);
2293  }
2294
2295  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2296  ARMCC::CondCodes CondCode, CondCode2;
2297  FPCCToARMCC(CC, CondCode, CondCode2);
2298
2299  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2300  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2301  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2302  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2303  SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
2304  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2305  if (CondCode2 != ARMCC::AL) {
2306    ARMCC = DAG.getConstant(CondCode2, MVT::i32);
2307    SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
2308    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2309  }
2310  return Res;
2311}
2312
2313SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2314  SDValue Chain = Op.getOperand(0);
2315  SDValue Table = Op.getOperand(1);
2316  SDValue Index = Op.getOperand(2);
2317  DebugLoc dl = Op.getDebugLoc();
2318
2319  EVT PTy = getPointerTy();
2320  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2321  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2322  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2323  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2324  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2325  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2326  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2327  if (Subtarget->isThumb2()) {
2328    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2329    // which does another jump to the destination. This also makes it easier
2330    // to translate it to TBB / TBH later.
2331    // FIXME: This might not work if the function is extremely large.
2332    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2333                       Addr, Op.getOperand(2), JTI, UId);
2334  }
2335  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2336    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2337                       PseudoSourceValue::getJumpTable(), 0,
2338                       false, false, 0);
2339    Chain = Addr.getValue(1);
2340    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2341    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2342  } else {
2343    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2344                       PseudoSourceValue::getJumpTable(), 0, false, false, 0);
2345    Chain = Addr.getValue(1);
2346    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2347  }
2348}
2349
2350static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2351  DebugLoc dl = Op.getDebugLoc();
2352  unsigned Opc;
2353
2354  switch (Op.getOpcode()) {
2355  default:
2356    assert(0 && "Invalid opcode!");
2357  case ISD::FP_TO_SINT:
2358    Opc = ARMISD::FTOSI;
2359    break;
2360  case ISD::FP_TO_UINT:
2361    Opc = ARMISD::FTOUI;
2362    break;
2363  }
2364  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2365  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2366}
2367
2368static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2369  EVT VT = Op.getValueType();
2370  DebugLoc dl = Op.getDebugLoc();
2371  unsigned Opc;
2372
2373  switch (Op.getOpcode()) {
2374  default:
2375    assert(0 && "Invalid opcode!");
2376  case ISD::SINT_TO_FP:
2377    Opc = ARMISD::SITOF;
2378    break;
2379  case ISD::UINT_TO_FP:
2380    Opc = ARMISD::UITOF;
2381    break;
2382  }
2383
2384  Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2385  return DAG.getNode(Opc, dl, VT, Op);
2386}
2387
2388static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
2389  // Implement fcopysign with a fabs and a conditional fneg.
2390  SDValue Tmp0 = Op.getOperand(0);
2391  SDValue Tmp1 = Op.getOperand(1);
2392  DebugLoc dl = Op.getDebugLoc();
2393  EVT VT = Op.getValueType();
2394  EVT SrcVT = Tmp1.getValueType();
2395  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2396  SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
2397  SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
2398  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2399  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
2400}
2401
2402SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2403  MachineFunction &MF = DAG.getMachineFunction();
2404  MachineFrameInfo *MFI = MF.getFrameInfo();
2405  MFI->setReturnAddressIsTaken(true);
2406
2407  EVT VT = Op.getValueType();
2408  DebugLoc dl = Op.getDebugLoc();
2409  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2410  if (Depth) {
2411    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2412    SDValue Offset = DAG.getConstant(4, MVT::i32);
2413    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2414                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2415                       NULL, 0, false, false, 0);
2416  }
2417
2418  // Return LR, which contains the return address. Mark it an implicit live-in.
2419  unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
2420  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2421}
2422
2423SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2424  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2425  MFI->setFrameAddressIsTaken(true);
2426
2427  EVT VT = Op.getValueType();
2428  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2429  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2430  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2431    ? ARM::R7 : ARM::R11;
2432  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2433  while (Depth--)
2434    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
2435                            false, false, 0);
2436  return FrameAddr;
2437}
2438
2439/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2440/// expand a bit convert where either the source or destination type is i64 to
2441/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2442/// operand type is illegal (e.g., v2f32 for a target that doesn't support
2443/// vectors), since the legalizer won't know what to do with that.
2444static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2445  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2446  DebugLoc dl = N->getDebugLoc();
2447  SDValue Op = N->getOperand(0);
2448
2449  // This function is only supposed to be called for i64 types, either as the
2450  // source or destination of the bit convert.
2451  EVT SrcVT = Op.getValueType();
2452  EVT DstVT = N->getValueType(0);
2453  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2454         "ExpandBIT_CONVERT called for non-i64 type");
2455
2456  // Turn i64->f64 into VMOVDRR.
2457  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2458    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2459                             DAG.getConstant(0, MVT::i32));
2460    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2461                             DAG.getConstant(1, MVT::i32));
2462    return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
2463                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
2464  }
2465
2466  // Turn f64->i64 into VMOVRRD.
2467  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2468    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2469                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2470    // Merge the pieces into a single i64 value.
2471    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2472  }
2473
2474  return SDValue();
2475}
2476
2477/// getZeroVector - Returns a vector of specified type with all zero elements.
2478///
2479static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2480  assert(VT.isVector() && "Expected a vector type");
2481
2482  // Zero vectors are used to represent vector negation and in those cases
2483  // will be implemented with the NEON VNEG instruction.  However, VNEG does
2484  // not support i64 elements, so sometimes the zero vectors will need to be
2485  // explicitly constructed.  For those cases, and potentially other uses in
2486  // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
2487  // to their dest type.  This ensures they get CSE'd.
2488  SDValue Vec;
2489  SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
2490  SmallVector<SDValue, 8> Ops;
2491  MVT TVT;
2492
2493  if (VT.getSizeInBits() == 64) {
2494    Ops.assign(8, Cst); TVT = MVT::v8i8;
2495  } else {
2496    Ops.assign(16, Cst); TVT = MVT::v16i8;
2497  }
2498  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2499
2500  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2501}
2502
2503/// getOnesVector - Returns a vector of specified type with all bits set.
2504///
2505static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2506  assert(VT.isVector() && "Expected a vector type");
2507
2508  // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
2509  // dest type. This ensures they get CSE'd.
2510  SDValue Vec;
2511  SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
2512  SmallVector<SDValue, 8> Ops;
2513  MVT TVT;
2514
2515  if (VT.getSizeInBits() == 64) {
2516    Ops.assign(8, Cst); TVT = MVT::v8i8;
2517  } else {
2518    Ops.assign(16, Cst); TVT = MVT::v16i8;
2519  }
2520  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2521
2522  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2523}
2524
2525/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2526/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2527SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2528                                                SelectionDAG &DAG) const {
2529  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2530  EVT VT = Op.getValueType();
2531  unsigned VTBits = VT.getSizeInBits();
2532  DebugLoc dl = Op.getDebugLoc();
2533  SDValue ShOpLo = Op.getOperand(0);
2534  SDValue ShOpHi = Op.getOperand(1);
2535  SDValue ShAmt  = Op.getOperand(2);
2536  SDValue ARMCC;
2537  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2538
2539  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2540
2541  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2542                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2543  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2544  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2545                                   DAG.getConstant(VTBits, MVT::i32));
2546  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2547  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2548  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2549
2550  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2551  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2552                          ARMCC, DAG, dl);
2553  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2554  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
2555                           CCR, Cmp);
2556
2557  SDValue Ops[2] = { Lo, Hi };
2558  return DAG.getMergeValues(Ops, 2, dl);
2559}
2560
2561/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2562/// i32 values and take a 2 x i32 value to shift plus a shift amount.
2563SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2564                                               SelectionDAG &DAG) const {
2565  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2566  EVT VT = Op.getValueType();
2567  unsigned VTBits = VT.getSizeInBits();
2568  DebugLoc dl = Op.getDebugLoc();
2569  SDValue ShOpLo = Op.getOperand(0);
2570  SDValue ShOpHi = Op.getOperand(1);
2571  SDValue ShAmt  = Op.getOperand(2);
2572  SDValue ARMCC;
2573
2574  assert(Op.getOpcode() == ISD::SHL_PARTS);
2575  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2576                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2577  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2578  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2579                                   DAG.getConstant(VTBits, MVT::i32));
2580  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2581  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2582
2583  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2584  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2585  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2586                          ARMCC, DAG, dl);
2587  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2588  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
2589                           CCR, Cmp);
2590
2591  SDValue Ops[2] = { Lo, Hi };
2592  return DAG.getMergeValues(Ops, 2, dl);
2593}
2594
2595static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2596                         const ARMSubtarget *ST) {
2597  EVT VT = N->getValueType(0);
2598  DebugLoc dl = N->getDebugLoc();
2599
2600  if (!ST->hasV6T2Ops())
2601    return SDValue();
2602
2603  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2604  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2605}
2606
2607static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2608                          const ARMSubtarget *ST) {
2609  EVT VT = N->getValueType(0);
2610  DebugLoc dl = N->getDebugLoc();
2611
2612  // Lower vector shifts on NEON to use VSHL.
2613  if (VT.isVector()) {
2614    assert(ST->hasNEON() && "unexpected vector shift");
2615
2616    // Left shifts translate directly to the vshiftu intrinsic.
2617    if (N->getOpcode() == ISD::SHL)
2618      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2619                         DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2620                         N->getOperand(0), N->getOperand(1));
2621
2622    assert((N->getOpcode() == ISD::SRA ||
2623            N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2624
2625    // NEON uses the same intrinsics for both left and right shifts.  For
2626    // right shifts, the shift amounts are negative, so negate the vector of
2627    // shift amounts.
2628    EVT ShiftVT = N->getOperand(1).getValueType();
2629    SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2630                                       getZeroVector(ShiftVT, DAG, dl),
2631                                       N->getOperand(1));
2632    Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2633                               Intrinsic::arm_neon_vshifts :
2634                               Intrinsic::arm_neon_vshiftu);
2635    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2636                       DAG.getConstant(vshiftInt, MVT::i32),
2637                       N->getOperand(0), NegatedCount);
2638  }
2639
2640  // We can get here for a node like i32 = ISD::SHL i32, i64
2641  if (VT != MVT::i64)
2642    return SDValue();
2643
2644  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2645         "Unknown shift to lower!");
2646
2647  // We only lower SRA, SRL of 1 here, all others use generic lowering.
2648  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2649      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2650    return SDValue();
2651
2652  // If we are in thumb mode, we don't have RRX.
2653  if (ST->isThumb1Only()) return SDValue();
2654
2655  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2656  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2657                           DAG.getConstant(0, MVT::i32));
2658  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2659                           DAG.getConstant(1, MVT::i32));
2660
2661  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2662  // captures the result into a carry flag.
2663  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2664  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2665
2666  // The low part is an ARMISD::RRX operand, which shifts the carry in.
2667  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2668
2669  // Merge the pieces into a single i64 value.
2670 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2671}
2672
2673static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2674  SDValue TmpOp0, TmpOp1;
2675  bool Invert = false;
2676  bool Swap = false;
2677  unsigned Opc = 0;
2678
2679  SDValue Op0 = Op.getOperand(0);
2680  SDValue Op1 = Op.getOperand(1);
2681  SDValue CC = Op.getOperand(2);
2682  EVT VT = Op.getValueType();
2683  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2684  DebugLoc dl = Op.getDebugLoc();
2685
2686  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2687    switch (SetCCOpcode) {
2688    default: llvm_unreachable("Illegal FP comparison"); break;
2689    case ISD::SETUNE:
2690    case ISD::SETNE:  Invert = true; // Fallthrough
2691    case ISD::SETOEQ:
2692    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2693    case ISD::SETOLT:
2694    case ISD::SETLT: Swap = true; // Fallthrough
2695    case ISD::SETOGT:
2696    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2697    case ISD::SETOLE:
2698    case ISD::SETLE:  Swap = true; // Fallthrough
2699    case ISD::SETOGE:
2700    case ISD::SETGE: Opc = ARMISD::VCGE; break;
2701    case ISD::SETUGE: Swap = true; // Fallthrough
2702    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2703    case ISD::SETUGT: Swap = true; // Fallthrough
2704    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2705    case ISD::SETUEQ: Invert = true; // Fallthrough
2706    case ISD::SETONE:
2707      // Expand this to (OLT | OGT).
2708      TmpOp0 = Op0;
2709      TmpOp1 = Op1;
2710      Opc = ISD::OR;
2711      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2712      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2713      break;
2714    case ISD::SETUO: Invert = true; // Fallthrough
2715    case ISD::SETO:
2716      // Expand this to (OLT | OGE).
2717      TmpOp0 = Op0;
2718      TmpOp1 = Op1;
2719      Opc = ISD::OR;
2720      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2721      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2722      break;
2723    }
2724  } else {
2725    // Integer comparisons.
2726    switch (SetCCOpcode) {
2727    default: llvm_unreachable("Illegal integer comparison"); break;
2728    case ISD::SETNE:  Invert = true;
2729    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2730    case ISD::SETLT:  Swap = true;
2731    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2732    case ISD::SETLE:  Swap = true;
2733    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
2734    case ISD::SETULT: Swap = true;
2735    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2736    case ISD::SETULE: Swap = true;
2737    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2738    }
2739
2740    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2741    if (Opc == ARMISD::VCEQ) {
2742
2743      SDValue AndOp;
2744      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2745        AndOp = Op0;
2746      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2747        AndOp = Op1;
2748
2749      // Ignore bitconvert.
2750      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
2751        AndOp = AndOp.getOperand(0);
2752
2753      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
2754        Opc = ARMISD::VTST;
2755        Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
2756        Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
2757        Invert = !Invert;
2758      }
2759    }
2760  }
2761
2762  if (Swap)
2763    std::swap(Op0, Op1);
2764
2765  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
2766
2767  if (Invert)
2768    Result = DAG.getNOT(dl, Result, VT);
2769
2770  return Result;
2771}
2772
2773/// isNEONModifiedImm - Check if the specified splat value corresponds to a
2774/// valid vector constant for a NEON instruction with a "modified immediate"
2775/// operand (e.g., VMOV).  If so, return either the constant being
2776/// splatted or the encoded value, depending on the DoEncode parameter.  The
2777/// format of the encoded value is: bit12=Op, bits11-8=Cmode,
2778/// bits7-0=Immediate.
2779static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
2780                                 unsigned SplatBitSize, SelectionDAG &DAG,
2781                                 bool isVMOV, bool DoEncode) {
2782  unsigned Op, Cmode, Imm;
2783  EVT VT;
2784
2785  // SplatBitSize is set to the smallest size that splats the vector, so a
2786  // zero vector will always have SplatBitSize == 8.  However, NEON modified
2787  // immediate instructions others than VMOV do not support the 8-bit encoding
2788  // of a zero vector, and the default encoding of zero is supposed to be the
2789  // 32-bit version.
2790  if (SplatBits == 0)
2791    SplatBitSize = 32;
2792
2793  Op = 0;
2794  switch (SplatBitSize) {
2795  case 8:
2796    // Any 1-byte value is OK.  Op=0, Cmode=1110.
2797    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
2798    Cmode = 0xe;
2799    Imm = SplatBits;
2800    VT = MVT::i8;
2801    break;
2802
2803  case 16:
2804    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2805    VT = MVT::i16;
2806    if ((SplatBits & ~0xff) == 0) {
2807      // Value = 0x00nn: Op=x, Cmode=100x.
2808      Cmode = 0x8;
2809      Imm = SplatBits;
2810      break;
2811    }
2812    if ((SplatBits & ~0xff00) == 0) {
2813      // Value = 0xnn00: Op=x, Cmode=101x.
2814      Cmode = 0xa;
2815      Imm = SplatBits >> 8;
2816      break;
2817    }
2818    return SDValue();
2819
2820  case 32:
2821    // NEON's 32-bit VMOV supports splat values where:
2822    // * only one byte is nonzero, or
2823    // * the least significant byte is 0xff and the second byte is nonzero, or
2824    // * the least significant 2 bytes are 0xff and the third is nonzero.
2825    VT = MVT::i32;
2826    if ((SplatBits & ~0xff) == 0) {
2827      // Value = 0x000000nn: Op=x, Cmode=000x.
2828      Cmode = 0;
2829      Imm = SplatBits;
2830      break;
2831    }
2832    if ((SplatBits & ~0xff00) == 0) {
2833      // Value = 0x0000nn00: Op=x, Cmode=001x.
2834      Cmode = 0x2;
2835      Imm = SplatBits >> 8;
2836      break;
2837    }
2838    if ((SplatBits & ~0xff0000) == 0) {
2839      // Value = 0x00nn0000: Op=x, Cmode=010x.
2840      Cmode = 0x4;
2841      Imm = SplatBits >> 16;
2842      break;
2843    }
2844    if ((SplatBits & ~0xff000000) == 0) {
2845      // Value = 0xnn000000: Op=x, Cmode=011x.
2846      Cmode = 0x6;
2847      Imm = SplatBits >> 24;
2848      break;
2849    }
2850
2851    if ((SplatBits & ~0xffff) == 0 &&
2852        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
2853      // Value = 0x0000nnff: Op=x, Cmode=1100.
2854      Cmode = 0xc;
2855      Imm = SplatBits >> 8;
2856      SplatBits |= 0xff;
2857      break;
2858    }
2859
2860    if ((SplatBits & ~0xffffff) == 0 &&
2861        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
2862      // Value = 0x00nnffff: Op=x, Cmode=1101.
2863      Cmode = 0xd;
2864      Imm = SplatBits >> 16;
2865      SplatBits |= 0xffff;
2866      break;
2867    }
2868
2869    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2870    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2871    // VMOV.I32.  A (very) minor optimization would be to replicate the value
2872    // and fall through here to test for a valid 64-bit splat.  But, then the
2873    // caller would also need to check and handle the change in size.
2874    return SDValue();
2875
2876  case 64: {
2877    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2878    if (!isVMOV)
2879      return SDValue();
2880    uint64_t BitMask = 0xff;
2881    uint64_t Val = 0;
2882    unsigned ImmMask = 1;
2883    Imm = 0;
2884    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
2885      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
2886        Val |= BitMask;
2887        Imm |= ImmMask;
2888      } else if ((SplatBits & BitMask) != 0) {
2889        return SDValue();
2890      }
2891      BitMask <<= 8;
2892      ImmMask <<= 1;
2893    }
2894    // Op=1, Cmode=1110.
2895    Op = 1;
2896    Cmode = 0xe;
2897    SplatBits = Val;
2898    VT = MVT::i64;
2899    break;
2900  }
2901
2902  default:
2903    llvm_unreachable("unexpected size for EncodeNEONModImm");
2904    return SDValue();
2905  }
2906
2907  if (DoEncode)
2908    return DAG.getTargetConstant((Op << 12) | (Cmode << 8) | Imm, MVT::i32);
2909  return DAG.getTargetConstant(SplatBits, VT);
2910}
2911
2912
2913/// getNEONModImm - If this is a valid vector constant for a NEON instruction
2914/// with a "modified immediate" operand (e.g., VMOV) of the specified element
2915/// size, return the encoded value for that immediate.  The ByteSize field
2916/// indicates the number of bytes of each element [1248].
2917SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
2918                           SelectionDAG &DAG) {
2919  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
2920  APInt SplatBits, SplatUndef;
2921  unsigned SplatBitSize;
2922  bool HasAnyUndefs;
2923  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
2924                                      HasAnyUndefs, ByteSize * 8))
2925    return SDValue();
2926
2927  if (SplatBitSize > ByteSize * 8)
2928    return SDValue();
2929
2930  return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
2931                           SplatBitSize, DAG, isVMOV, true);
2932}
2933
2934static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
2935                       bool &ReverseVEXT, unsigned &Imm) {
2936  unsigned NumElts = VT.getVectorNumElements();
2937  ReverseVEXT = false;
2938  Imm = M[0];
2939
2940  // If this is a VEXT shuffle, the immediate value is the index of the first
2941  // element.  The other shuffle indices must be the successive elements after
2942  // the first one.
2943  unsigned ExpectedElt = Imm;
2944  for (unsigned i = 1; i < NumElts; ++i) {
2945    // Increment the expected index.  If it wraps around, it may still be
2946    // a VEXT but the source vectors must be swapped.
2947    ExpectedElt += 1;
2948    if (ExpectedElt == NumElts * 2) {
2949      ExpectedElt = 0;
2950      ReverseVEXT = true;
2951    }
2952
2953    if (ExpectedElt != static_cast<unsigned>(M[i]))
2954      return false;
2955  }
2956
2957  // Adjust the index value if the source operands will be swapped.
2958  if (ReverseVEXT)
2959    Imm -= NumElts;
2960
2961  return true;
2962}
2963
2964/// isVREVMask - Check if a vector shuffle corresponds to a VREV
2965/// instruction with the specified blocksize.  (The order of the elements
2966/// within each block of the vector is reversed.)
2967static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
2968                       unsigned BlockSize) {
2969  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
2970         "Only possible block sizes for VREV are: 16, 32, 64");
2971
2972  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2973  if (EltSz == 64)
2974    return false;
2975
2976  unsigned NumElts = VT.getVectorNumElements();
2977  unsigned BlockElts = M[0] + 1;
2978
2979  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
2980    return false;
2981
2982  for (unsigned i = 0; i < NumElts; ++i) {
2983    if ((unsigned) M[i] !=
2984        (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
2985      return false;
2986  }
2987
2988  return true;
2989}
2990
2991static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
2992                       unsigned &WhichResult) {
2993  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2994  if (EltSz == 64)
2995    return false;
2996
2997  unsigned NumElts = VT.getVectorNumElements();
2998  WhichResult = (M[0] == 0 ? 0 : 1);
2999  for (unsigned i = 0; i < NumElts; i += 2) {
3000    if ((unsigned) M[i] != i + WhichResult ||
3001        (unsigned) M[i+1] != i + NumElts + WhichResult)
3002      return false;
3003  }
3004  return true;
3005}
3006
3007/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3008/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3009/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3010static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3011                                unsigned &WhichResult) {
3012  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3013  if (EltSz == 64)
3014    return false;
3015
3016  unsigned NumElts = VT.getVectorNumElements();
3017  WhichResult = (M[0] == 0 ? 0 : 1);
3018  for (unsigned i = 0; i < NumElts; i += 2) {
3019    if ((unsigned) M[i] != i + WhichResult ||
3020        (unsigned) M[i+1] != i + WhichResult)
3021      return false;
3022  }
3023  return true;
3024}
3025
3026static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
3027                       unsigned &WhichResult) {
3028  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3029  if (EltSz == 64)
3030    return false;
3031
3032  unsigned NumElts = VT.getVectorNumElements();
3033  WhichResult = (M[0] == 0 ? 0 : 1);
3034  for (unsigned i = 0; i != NumElts; ++i) {
3035    if ((unsigned) M[i] != 2 * i + WhichResult)
3036      return false;
3037  }
3038
3039  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3040  if (VT.is64BitVector() && EltSz == 32)
3041    return false;
3042
3043  return true;
3044}
3045
3046/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3047/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3048/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3049static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3050                                unsigned &WhichResult) {
3051  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3052  if (EltSz == 64)
3053    return false;
3054
3055  unsigned Half = VT.getVectorNumElements() / 2;
3056  WhichResult = (M[0] == 0 ? 0 : 1);
3057  for (unsigned j = 0; j != 2; ++j) {
3058    unsigned Idx = WhichResult;
3059    for (unsigned i = 0; i != Half; ++i) {
3060      if ((unsigned) M[i + j * Half] != Idx)
3061        return false;
3062      Idx += 2;
3063    }
3064  }
3065
3066  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3067  if (VT.is64BitVector() && EltSz == 32)
3068    return false;
3069
3070  return true;
3071}
3072
3073static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
3074                       unsigned &WhichResult) {
3075  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3076  if (EltSz == 64)
3077    return false;
3078
3079  unsigned NumElts = VT.getVectorNumElements();
3080  WhichResult = (M[0] == 0 ? 0 : 1);
3081  unsigned Idx = WhichResult * NumElts / 2;
3082  for (unsigned i = 0; i != NumElts; i += 2) {
3083    if ((unsigned) M[i] != Idx ||
3084        (unsigned) M[i+1] != Idx + NumElts)
3085      return false;
3086    Idx += 1;
3087  }
3088
3089  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3090  if (VT.is64BitVector() && EltSz == 32)
3091    return false;
3092
3093  return true;
3094}
3095
3096/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3097/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3098/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3099static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3100                                unsigned &WhichResult) {
3101  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3102  if (EltSz == 64)
3103    return false;
3104
3105  unsigned NumElts = VT.getVectorNumElements();
3106  WhichResult = (M[0] == 0 ? 0 : 1);
3107  unsigned Idx = WhichResult * NumElts / 2;
3108  for (unsigned i = 0; i != NumElts; i += 2) {
3109    if ((unsigned) M[i] != Idx ||
3110        (unsigned) M[i+1] != Idx)
3111      return false;
3112    Idx += 1;
3113  }
3114
3115  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3116  if (VT.is64BitVector() && EltSz == 32)
3117    return false;
3118
3119  return true;
3120}
3121
3122
3123static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
3124  // Canonicalize all-zeros and all-ones vectors.
3125  ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
3126  if (ConstVal->isNullValue())
3127    return getZeroVector(VT, DAG, dl);
3128  if (ConstVal->isAllOnesValue())
3129    return getOnesVector(VT, DAG, dl);
3130
3131  EVT CanonicalVT;
3132  if (VT.is64BitVector()) {
3133    switch (Val.getValueType().getSizeInBits()) {
3134    case 8:  CanonicalVT = MVT::v8i8; break;
3135    case 16: CanonicalVT = MVT::v4i16; break;
3136    case 32: CanonicalVT = MVT::v2i32; break;
3137    case 64: CanonicalVT = MVT::v1i64; break;
3138    default: llvm_unreachable("unexpected splat element type"); break;
3139    }
3140  } else {
3141    assert(VT.is128BitVector() && "unknown splat vector size");
3142    switch (Val.getValueType().getSizeInBits()) {
3143    case 8:  CanonicalVT = MVT::v16i8; break;
3144    case 16: CanonicalVT = MVT::v8i16; break;
3145    case 32: CanonicalVT = MVT::v4i32; break;
3146    case 64: CanonicalVT = MVT::v2i64; break;
3147    default: llvm_unreachable("unexpected splat element type"); break;
3148    }
3149  }
3150
3151  // Build a canonical splat for this value.
3152  SmallVector<SDValue, 8> Ops;
3153  Ops.assign(CanonicalVT.getVectorNumElements(), Val);
3154  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
3155                            Ops.size());
3156  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
3157}
3158
3159// If this is a case we can't handle, return null and let the default
3160// expansion code take care of it.
3161static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
3162  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3163  DebugLoc dl = Op.getDebugLoc();
3164  EVT VT = Op.getValueType();
3165
3166  APInt SplatBits, SplatUndef;
3167  unsigned SplatBitSize;
3168  bool HasAnyUndefs;
3169  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3170    if (SplatBitSize <= 64) {
3171      // Check if an immediate VMOV works.
3172      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
3173                                      SplatUndef.getZExtValue(),
3174                                      SplatBitSize, DAG, true, false);
3175      if (Val.getNode())
3176        return BuildSplat(Val, VT, DAG, dl);
3177    }
3178  }
3179
3180  // Scan through the operands to see if only one value is used.
3181  unsigned NumElts = VT.getVectorNumElements();
3182  bool isOnlyLowElement = true;
3183  bool usesOnlyOneValue = true;
3184  bool isConstant = true;
3185  SDValue Value;
3186  for (unsigned i = 0; i < NumElts; ++i) {
3187    SDValue V = Op.getOperand(i);
3188    if (V.getOpcode() == ISD::UNDEF)
3189      continue;
3190    if (i > 0)
3191      isOnlyLowElement = false;
3192    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3193      isConstant = false;
3194
3195    if (!Value.getNode())
3196      Value = V;
3197    else if (V != Value)
3198      usesOnlyOneValue = false;
3199  }
3200
3201  if (!Value.getNode())
3202    return DAG.getUNDEF(VT);
3203
3204  if (isOnlyLowElement)
3205    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3206
3207  // If all elements are constants, fall back to the default expansion, which
3208  // will generate a load from the constant pool.
3209  if (isConstant)
3210    return SDValue();
3211
3212  // Use VDUP for non-constant splats.
3213  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3214  if (usesOnlyOneValue && EltSize <= 32)
3215    return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3216
3217  // Vectors with 32- or 64-bit elements can be built by directly assigning
3218  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
3219  // will be legalized.
3220  if (EltSize >= 32) {
3221    // Do the expansion with floating-point types, since that is what the VFP
3222    // registers are defined to use, and since i64 is not legal.
3223    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3224    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3225    SmallVector<SDValue, 8> Ops;
3226    for (unsigned i = 0; i < NumElts; ++i)
3227      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
3228    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3229    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3230  }
3231
3232  return SDValue();
3233}
3234
3235/// isShuffleMaskLegal - Targets can use this to indicate that they only
3236/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3237/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3238/// are assumed to be legal.
3239bool
3240ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
3241                                      EVT VT) const {
3242  if (VT.getVectorNumElements() == 4 &&
3243      (VT.is128BitVector() || VT.is64BitVector())) {
3244    unsigned PFIndexes[4];
3245    for (unsigned i = 0; i != 4; ++i) {
3246      if (M[i] < 0)
3247        PFIndexes[i] = 8;
3248      else
3249        PFIndexes[i] = M[i];
3250    }
3251
3252    // Compute the index in the perfect shuffle table.
3253    unsigned PFTableIndex =
3254      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3255    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3256    unsigned Cost = (PFEntry >> 30);
3257
3258    if (Cost <= 4)
3259      return true;
3260  }
3261
3262  bool ReverseVEXT;
3263  unsigned Imm, WhichResult;
3264
3265  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3266  return (EltSize >= 32 ||
3267          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3268          isVREVMask(M, VT, 64) ||
3269          isVREVMask(M, VT, 32) ||
3270          isVREVMask(M, VT, 16) ||
3271          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
3272          isVTRNMask(M, VT, WhichResult) ||
3273          isVUZPMask(M, VT, WhichResult) ||
3274          isVZIPMask(M, VT, WhichResult) ||
3275          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
3276          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
3277          isVZIP_v_undef_Mask(M, VT, WhichResult));
3278}
3279
3280/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3281/// the specified operations to build the shuffle.
3282static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3283                                      SDValue RHS, SelectionDAG &DAG,
3284                                      DebugLoc dl) {
3285  unsigned OpNum = (PFEntry >> 26) & 0x0F;
3286  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3287  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3288
3289  enum {
3290    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3291    OP_VREV,
3292    OP_VDUP0,
3293    OP_VDUP1,
3294    OP_VDUP2,
3295    OP_VDUP3,
3296    OP_VEXT1,
3297    OP_VEXT2,
3298    OP_VEXT3,
3299    OP_VUZPL, // VUZP, left result
3300    OP_VUZPR, // VUZP, right result
3301    OP_VZIPL, // VZIP, left result
3302    OP_VZIPR, // VZIP, right result
3303    OP_VTRNL, // VTRN, left result
3304    OP_VTRNR  // VTRN, right result
3305  };
3306
3307  if (OpNum == OP_COPY) {
3308    if (LHSID == (1*9+2)*9+3) return LHS;
3309    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3310    return RHS;
3311  }
3312
3313  SDValue OpLHS, OpRHS;
3314  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3315  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3316  EVT VT = OpLHS.getValueType();
3317
3318  switch (OpNum) {
3319  default: llvm_unreachable("Unknown shuffle opcode!");
3320  case OP_VREV:
3321    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
3322  case OP_VDUP0:
3323  case OP_VDUP1:
3324  case OP_VDUP2:
3325  case OP_VDUP3:
3326    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
3327                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3328  case OP_VEXT1:
3329  case OP_VEXT2:
3330  case OP_VEXT3:
3331    return DAG.getNode(ARMISD::VEXT, dl, VT,
3332                       OpLHS, OpRHS,
3333                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3334  case OP_VUZPL:
3335  case OP_VUZPR:
3336    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3337                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3338  case OP_VZIPL:
3339  case OP_VZIPR:
3340    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3341                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3342  case OP_VTRNL:
3343  case OP_VTRNR:
3344    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3345                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3346  }
3347}
3348
3349static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3350  SDValue V1 = Op.getOperand(0);
3351  SDValue V2 = Op.getOperand(1);
3352  DebugLoc dl = Op.getDebugLoc();
3353  EVT VT = Op.getValueType();
3354  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3355  SmallVector<int, 8> ShuffleMask;
3356
3357  // Convert shuffles that are directly supported on NEON to target-specific
3358  // DAG nodes, instead of keeping them as shuffles and matching them again
3359  // during code selection.  This is more efficient and avoids the possibility
3360  // of inconsistencies between legalization and selection.
3361  // FIXME: floating-point vectors should be canonicalized to integer vectors
3362  // of the same time so that they get CSEd properly.
3363  SVN->getMask(ShuffleMask);
3364
3365  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3366  if (EltSize <= 32) {
3367    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3368      int Lane = SVN->getSplatIndex();
3369      // If this is undef splat, generate it via "just" vdup, if possible.
3370      if (Lane == -1) Lane = 0;
3371
3372      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3373        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3374      }
3375      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3376                         DAG.getConstant(Lane, MVT::i32));
3377    }
3378
3379    bool ReverseVEXT;
3380    unsigned Imm;
3381    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3382      if (ReverseVEXT)
3383        std::swap(V1, V2);
3384      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3385                         DAG.getConstant(Imm, MVT::i32));
3386    }
3387
3388    if (isVREVMask(ShuffleMask, VT, 64))
3389      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3390    if (isVREVMask(ShuffleMask, VT, 32))
3391      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3392    if (isVREVMask(ShuffleMask, VT, 16))
3393      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3394
3395    // Check for Neon shuffles that modify both input vectors in place.
3396    // If both results are used, i.e., if there are two shuffles with the same
3397    // source operands and with masks corresponding to both results of one of
3398    // these operations, DAG memoization will ensure that a single node is
3399    // used for both shuffles.
3400    unsigned WhichResult;
3401    if (isVTRNMask(ShuffleMask, VT, WhichResult))
3402      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3403                         V1, V2).getValue(WhichResult);
3404    if (isVUZPMask(ShuffleMask, VT, WhichResult))
3405      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3406                         V1, V2).getValue(WhichResult);
3407    if (isVZIPMask(ShuffleMask, VT, WhichResult))
3408      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3409                         V1, V2).getValue(WhichResult);
3410
3411    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3412      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3413                         V1, V1).getValue(WhichResult);
3414    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3415      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3416                         V1, V1).getValue(WhichResult);
3417    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3418      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3419                         V1, V1).getValue(WhichResult);
3420  }
3421
3422  // If the shuffle is not directly supported and it has 4 elements, use
3423  // the PerfectShuffle-generated table to synthesize it from other shuffles.
3424  unsigned NumElts = VT.getVectorNumElements();
3425  if (NumElts == 4) {
3426    unsigned PFIndexes[4];
3427    for (unsigned i = 0; i != 4; ++i) {
3428      if (ShuffleMask[i] < 0)
3429        PFIndexes[i] = 8;
3430      else
3431        PFIndexes[i] = ShuffleMask[i];
3432    }
3433
3434    // Compute the index in the perfect shuffle table.
3435    unsigned PFTableIndex =
3436      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3437    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3438    unsigned Cost = (PFEntry >> 30);
3439
3440    if (Cost <= 4)
3441      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3442  }
3443
3444  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
3445  if (EltSize >= 32) {
3446    // Do the expansion with floating-point types, since that is what the VFP
3447    // registers are defined to use, and since i64 is not legal.
3448    EVT EltVT = EVT::getFloatingPointVT(EltSize);
3449    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3450    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
3451    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
3452    SmallVector<SDValue, 8> Ops;
3453    for (unsigned i = 0; i < NumElts; ++i) {
3454      if (ShuffleMask[i] < 0)
3455        Ops.push_back(DAG.getUNDEF(EltVT));
3456      else
3457        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3458                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
3459                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3460                                                  MVT::i32)));
3461    }
3462    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3463    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3464  }
3465
3466  return SDValue();
3467}
3468
3469static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3470  EVT VT = Op.getValueType();
3471  DebugLoc dl = Op.getDebugLoc();
3472  SDValue Vec = Op.getOperand(0);
3473  SDValue Lane = Op.getOperand(1);
3474  assert(VT == MVT::i32 &&
3475         Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3476         "unexpected type for custom-lowering vector extract");
3477  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3478}
3479
3480static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3481  // The only time a CONCAT_VECTORS operation can have legal types is when
3482  // two 64-bit vectors are concatenated to a 128-bit vector.
3483  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3484         "unexpected CONCAT_VECTORS");
3485  DebugLoc dl = Op.getDebugLoc();
3486  SDValue Val = DAG.getUNDEF(MVT::v2f64);
3487  SDValue Op0 = Op.getOperand(0);
3488  SDValue Op1 = Op.getOperand(1);
3489  if (Op0.getOpcode() != ISD::UNDEF)
3490    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3491                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3492                      DAG.getIntPtrConstant(0));
3493  if (Op1.getOpcode() != ISD::UNDEF)
3494    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3495                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3496                      DAG.getIntPtrConstant(1));
3497  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3498}
3499
3500SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3501  switch (Op.getOpcode()) {
3502  default: llvm_unreachable("Don't know how to custom lower this!");
3503  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
3504  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
3505  case ISD::GlobalAddress:
3506    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3507      LowerGlobalAddressELF(Op, DAG);
3508  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3509  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
3510  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
3511  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
3512  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3513  case ISD::VASTART:       return LowerVASTART(Op, DAG);
3514  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
3515  case ISD::SINT_TO_FP:
3516  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
3517  case ISD::FP_TO_SINT:
3518  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
3519  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
3520  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
3521  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
3522  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3523  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
3524  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
3525  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3526                                                               Subtarget);
3527  case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
3528  case ISD::SHL:
3529  case ISD::SRL:
3530  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
3531  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
3532  case ISD::SRL_PARTS:
3533  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
3534  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3535  case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
3536  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
3537  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3538  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3539  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3540  }
3541  return SDValue();
3542}
3543
3544/// ReplaceNodeResults - Replace the results of node with an illegal result
3545/// type with new values built out of custom code.
3546void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3547                                           SmallVectorImpl<SDValue>&Results,
3548                                           SelectionDAG &DAG) const {
3549  SDValue Res;
3550  switch (N->getOpcode()) {
3551  default:
3552    llvm_unreachable("Don't know how to custom expand this!");
3553    break;
3554  case ISD::BIT_CONVERT:
3555    Res = ExpandBIT_CONVERT(N, DAG);
3556    break;
3557  case ISD::SRL:
3558  case ISD::SRA:
3559    Res = LowerShift(N, DAG, Subtarget);
3560    break;
3561  }
3562  if (Res.getNode())
3563    Results.push_back(Res);
3564}
3565
3566//===----------------------------------------------------------------------===//
3567//                           ARM Scheduler Hooks
3568//===----------------------------------------------------------------------===//
3569
3570MachineBasicBlock *
3571ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3572                                     MachineBasicBlock *BB,
3573                                     unsigned Size) const {
3574  unsigned dest    = MI->getOperand(0).getReg();
3575  unsigned ptr     = MI->getOperand(1).getReg();
3576  unsigned oldval  = MI->getOperand(2).getReg();
3577  unsigned newval  = MI->getOperand(3).getReg();
3578  unsigned scratch = BB->getParent()->getRegInfo()
3579    .createVirtualRegister(ARM::GPRRegisterClass);
3580  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3581  DebugLoc dl = MI->getDebugLoc();
3582  bool isThumb2 = Subtarget->isThumb2();
3583
3584  unsigned ldrOpc, strOpc;
3585  switch (Size) {
3586  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3587  case 1:
3588    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3589    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3590    break;
3591  case 2:
3592    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3593    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3594    break;
3595  case 4:
3596    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3597    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3598    break;
3599  }
3600
3601  MachineFunction *MF = BB->getParent();
3602  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3603  MachineFunction::iterator It = BB;
3604  ++It; // insert the new blocks after the current block
3605
3606  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3607  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3608  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3609  MF->insert(It, loop1MBB);
3610  MF->insert(It, loop2MBB);
3611  MF->insert(It, exitMBB);
3612  exitMBB->transferSuccessors(BB);
3613
3614  //  thisMBB:
3615  //   ...
3616  //   fallthrough --> loop1MBB
3617  BB->addSuccessor(loop1MBB);
3618
3619  // loop1MBB:
3620  //   ldrex dest, [ptr]
3621  //   cmp dest, oldval
3622  //   bne exitMBB
3623  BB = loop1MBB;
3624  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3625  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3626                 .addReg(dest).addReg(oldval));
3627  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3628    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3629  BB->addSuccessor(loop2MBB);
3630  BB->addSuccessor(exitMBB);
3631
3632  // loop2MBB:
3633  //   strex scratch, newval, [ptr]
3634  //   cmp scratch, #0
3635  //   bne loop1MBB
3636  BB = loop2MBB;
3637  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3638                 .addReg(ptr));
3639  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3640                 .addReg(scratch).addImm(0));
3641  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3642    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3643  BB->addSuccessor(loop1MBB);
3644  BB->addSuccessor(exitMBB);
3645
3646  //  exitMBB:
3647  //   ...
3648  BB = exitMBB;
3649
3650  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
3651
3652  return BB;
3653}
3654
3655MachineBasicBlock *
3656ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3657                                    unsigned Size, unsigned BinOpcode) const {
3658  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3659  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3660
3661  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3662  MachineFunction *MF = BB->getParent();
3663  MachineFunction::iterator It = BB;
3664  ++It;
3665
3666  unsigned dest = MI->getOperand(0).getReg();
3667  unsigned ptr = MI->getOperand(1).getReg();
3668  unsigned incr = MI->getOperand(2).getReg();
3669  DebugLoc dl = MI->getDebugLoc();
3670
3671  bool isThumb2 = Subtarget->isThumb2();
3672  unsigned ldrOpc, strOpc;
3673  switch (Size) {
3674  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3675  case 1:
3676    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3677    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
3678    break;
3679  case 2:
3680    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3681    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3682    break;
3683  case 4:
3684    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3685    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3686    break;
3687  }
3688
3689  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3690  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3691  MF->insert(It, loopMBB);
3692  MF->insert(It, exitMBB);
3693  exitMBB->transferSuccessors(BB);
3694
3695  MachineRegisterInfo &RegInfo = MF->getRegInfo();
3696  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3697  unsigned scratch2 = (!BinOpcode) ? incr :
3698    RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3699
3700  //  thisMBB:
3701  //   ...
3702  //   fallthrough --> loopMBB
3703  BB->addSuccessor(loopMBB);
3704
3705  //  loopMBB:
3706  //   ldrex dest, ptr
3707  //   <binop> scratch2, dest, incr
3708  //   strex scratch, scratch2, ptr
3709  //   cmp scratch, #0
3710  //   bne- loopMBB
3711  //   fallthrough --> exitMBB
3712  BB = loopMBB;
3713  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3714  if (BinOpcode) {
3715    // operand order needs to go the other way for NAND
3716    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
3717      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3718                     addReg(incr).addReg(dest)).addReg(0);
3719    else
3720      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3721                     addReg(dest).addReg(incr)).addReg(0);
3722  }
3723
3724  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
3725                 .addReg(ptr));
3726  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3727                 .addReg(scratch).addImm(0));
3728  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3729    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3730
3731  BB->addSuccessor(loopMBB);
3732  BB->addSuccessor(exitMBB);
3733
3734  //  exitMBB:
3735  //   ...
3736  BB = exitMBB;
3737
3738  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
3739
3740  return BB;
3741}
3742
3743MachineBasicBlock *
3744ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
3745                                               MachineBasicBlock *BB) const {
3746  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3747  DebugLoc dl = MI->getDebugLoc();
3748  bool isThumb2 = Subtarget->isThumb2();
3749  switch (MI->getOpcode()) {
3750  default:
3751    MI->dump();
3752    llvm_unreachable("Unexpected instr type to insert");
3753
3754  case ARM::ATOMIC_LOAD_ADD_I8:
3755     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3756  case ARM::ATOMIC_LOAD_ADD_I16:
3757     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3758  case ARM::ATOMIC_LOAD_ADD_I32:
3759     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3760
3761  case ARM::ATOMIC_LOAD_AND_I8:
3762     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3763  case ARM::ATOMIC_LOAD_AND_I16:
3764     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3765  case ARM::ATOMIC_LOAD_AND_I32:
3766     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3767
3768  case ARM::ATOMIC_LOAD_OR_I8:
3769     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3770  case ARM::ATOMIC_LOAD_OR_I16:
3771     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3772  case ARM::ATOMIC_LOAD_OR_I32:
3773     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3774
3775  case ARM::ATOMIC_LOAD_XOR_I8:
3776     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3777  case ARM::ATOMIC_LOAD_XOR_I16:
3778     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3779  case ARM::ATOMIC_LOAD_XOR_I32:
3780     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3781
3782  case ARM::ATOMIC_LOAD_NAND_I8:
3783     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3784  case ARM::ATOMIC_LOAD_NAND_I16:
3785     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3786  case ARM::ATOMIC_LOAD_NAND_I32:
3787     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3788
3789  case ARM::ATOMIC_LOAD_SUB_I8:
3790     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3791  case ARM::ATOMIC_LOAD_SUB_I16:
3792     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3793  case ARM::ATOMIC_LOAD_SUB_I32:
3794     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3795
3796  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
3797  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
3798  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
3799
3800  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
3801  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
3802  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
3803
3804  case ARM::tMOVCCr_pseudo: {
3805    // To "insert" a SELECT_CC instruction, we actually have to insert the
3806    // diamond control-flow pattern.  The incoming instruction knows the
3807    // destination vreg to set, the condition code register to branch on, the
3808    // true/false values to select between, and a branch opcode to use.
3809    const BasicBlock *LLVM_BB = BB->getBasicBlock();
3810    MachineFunction::iterator It = BB;
3811    ++It;
3812
3813    //  thisMBB:
3814    //  ...
3815    //   TrueVal = ...
3816    //   cmpTY ccX, r1, r2
3817    //   bCC copy1MBB
3818    //   fallthrough --> copy0MBB
3819    MachineBasicBlock *thisMBB  = BB;
3820    MachineFunction *F = BB->getParent();
3821    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
3822    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
3823    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
3824      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
3825    F->insert(It, copy0MBB);
3826    F->insert(It, sinkMBB);
3827    // Update machine-CFG edges by first adding all successors of the current
3828    // block to the new block which will contain the Phi node for the select.
3829    for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
3830           E = BB->succ_end(); I != E; ++I)
3831      sinkMBB->addSuccessor(*I);
3832    // Next, remove all successors of the current block, and add the true
3833    // and fallthrough blocks as its successors.
3834    while (!BB->succ_empty())
3835      BB->removeSuccessor(BB->succ_begin());
3836    BB->addSuccessor(copy0MBB);
3837    BB->addSuccessor(sinkMBB);
3838
3839    //  copy0MBB:
3840    //   %FalseValue = ...
3841    //   # fallthrough to sinkMBB
3842    BB = copy0MBB;
3843
3844    // Update machine-CFG edges
3845    BB->addSuccessor(sinkMBB);
3846
3847    //  sinkMBB:
3848    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
3849    //  ...
3850    BB = sinkMBB;
3851    BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
3852      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
3853      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
3854
3855    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
3856    return BB;
3857  }
3858
3859  case ARM::tANDsp:
3860  case ARM::tADDspr_:
3861  case ARM::tSUBspi_:
3862  case ARM::t2SUBrSPi_:
3863  case ARM::t2SUBrSPi12_:
3864  case ARM::t2SUBrSPs_: {
3865    MachineFunction *MF = BB->getParent();
3866    unsigned DstReg = MI->getOperand(0).getReg();
3867    unsigned SrcReg = MI->getOperand(1).getReg();
3868    bool DstIsDead = MI->getOperand(0).isDead();
3869    bool SrcIsKill = MI->getOperand(1).isKill();
3870
3871    if (SrcReg != ARM::SP) {
3872      // Copy the source to SP from virtual register.
3873      const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
3874      unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3875        ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
3876      BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
3877        .addReg(SrcReg, getKillRegState(SrcIsKill));
3878    }
3879
3880    unsigned OpOpc = 0;
3881    bool NeedPred = false, NeedCC = false, NeedOp3 = false;
3882    switch (MI->getOpcode()) {
3883    default:
3884      llvm_unreachable("Unexpected pseudo instruction!");
3885    case ARM::tANDsp:
3886      OpOpc = ARM::tAND;
3887      NeedPred = true;
3888      break;
3889    case ARM::tADDspr_:
3890      OpOpc = ARM::tADDspr;
3891      break;
3892    case ARM::tSUBspi_:
3893      OpOpc = ARM::tSUBspi;
3894      break;
3895    case ARM::t2SUBrSPi_:
3896      OpOpc = ARM::t2SUBrSPi;
3897      NeedPred = true; NeedCC = true;
3898      break;
3899    case ARM::t2SUBrSPi12_:
3900      OpOpc = ARM::t2SUBrSPi12;
3901      NeedPred = true;
3902      break;
3903    case ARM::t2SUBrSPs_:
3904      OpOpc = ARM::t2SUBrSPs;
3905      NeedPred = true; NeedCC = true; NeedOp3 = true;
3906      break;
3907    }
3908    MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
3909    if (OpOpc == ARM::tAND)
3910      AddDefaultT1CC(MIB);
3911    MIB.addReg(ARM::SP);
3912    MIB.addOperand(MI->getOperand(2));
3913    if (NeedOp3)
3914      MIB.addOperand(MI->getOperand(3));
3915    if (NeedPred)
3916      AddDefaultPred(MIB);
3917    if (NeedCC)
3918      AddDefaultCC(MIB);
3919
3920    // Copy the result from SP to virtual register.
3921    const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
3922    unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3923      ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
3924    BuildMI(BB, dl, TII->get(CopyOpc))
3925      .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
3926      .addReg(ARM::SP);
3927    MF->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
3928    return BB;
3929  }
3930  }
3931}
3932
3933//===----------------------------------------------------------------------===//
3934//                           ARM Optimization Hooks
3935//===----------------------------------------------------------------------===//
3936
3937static
3938SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
3939                            TargetLowering::DAGCombinerInfo &DCI) {
3940  SelectionDAG &DAG = DCI.DAG;
3941  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3942  EVT VT = N->getValueType(0);
3943  unsigned Opc = N->getOpcode();
3944  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
3945  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
3946  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
3947  ISD::CondCode CC = ISD::SETCC_INVALID;
3948
3949  if (isSlctCC) {
3950    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
3951  } else {
3952    SDValue CCOp = Slct.getOperand(0);
3953    if (CCOp.getOpcode() == ISD::SETCC)
3954      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
3955  }
3956
3957  bool DoXform = false;
3958  bool InvCC = false;
3959  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
3960          "Bad input!");
3961
3962  if (LHS.getOpcode() == ISD::Constant &&
3963      cast<ConstantSDNode>(LHS)->isNullValue()) {
3964    DoXform = true;
3965  } else if (CC != ISD::SETCC_INVALID &&
3966             RHS.getOpcode() == ISD::Constant &&
3967             cast<ConstantSDNode>(RHS)->isNullValue()) {
3968    std::swap(LHS, RHS);
3969    SDValue Op0 = Slct.getOperand(0);
3970    EVT OpVT = isSlctCC ? Op0.getValueType() :
3971                          Op0.getOperand(0).getValueType();
3972    bool isInt = OpVT.isInteger();
3973    CC = ISD::getSetCCInverse(CC, isInt);
3974
3975    if (!TLI.isCondCodeLegal(CC, OpVT))
3976      return SDValue();         // Inverse operator isn't legal.
3977
3978    DoXform = true;
3979    InvCC = true;
3980  }
3981
3982  if (DoXform) {
3983    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
3984    if (isSlctCC)
3985      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
3986                             Slct.getOperand(0), Slct.getOperand(1), CC);
3987    SDValue CCOp = Slct.getOperand(0);
3988    if (InvCC)
3989      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
3990                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
3991    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
3992                       CCOp, OtherOp, Result);
3993  }
3994  return SDValue();
3995}
3996
3997/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3998static SDValue PerformADDCombine(SDNode *N,
3999                                 TargetLowering::DAGCombinerInfo &DCI) {
4000  // added by evan in r37685 with no testcase.
4001  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4002
4003  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
4004  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
4005    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
4006    if (Result.getNode()) return Result;
4007  }
4008  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
4009    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
4010    if (Result.getNode()) return Result;
4011  }
4012
4013  return SDValue();
4014}
4015
4016/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
4017static SDValue PerformSUBCombine(SDNode *N,
4018                                 TargetLowering::DAGCombinerInfo &DCI) {
4019  // added by evan in r37685 with no testcase.
4020  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4021
4022  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
4023  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
4024    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
4025    if (Result.getNode()) return Result;
4026  }
4027
4028  return SDValue();
4029}
4030
4031static SDValue PerformMULCombine(SDNode *N,
4032                                 TargetLowering::DAGCombinerInfo &DCI,
4033                                 const ARMSubtarget *Subtarget) {
4034  SelectionDAG &DAG = DCI.DAG;
4035
4036  if (Subtarget->isThumb1Only())
4037    return SDValue();
4038
4039  if (DAG.getMachineFunction().
4040      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
4041    return SDValue();
4042
4043  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
4044    return SDValue();
4045
4046  EVT VT = N->getValueType(0);
4047  if (VT != MVT::i32)
4048    return SDValue();
4049
4050  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
4051  if (!C)
4052    return SDValue();
4053
4054  uint64_t MulAmt = C->getZExtValue();
4055  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
4056  ShiftAmt = ShiftAmt & (32 - 1);
4057  SDValue V = N->getOperand(0);
4058  DebugLoc DL = N->getDebugLoc();
4059
4060  SDValue Res;
4061  MulAmt >>= ShiftAmt;
4062  if (isPowerOf2_32(MulAmt - 1)) {
4063    // (mul x, 2^N + 1) => (add (shl x, N), x)
4064    Res = DAG.getNode(ISD::ADD, DL, VT,
4065                      V, DAG.getNode(ISD::SHL, DL, VT,
4066                                     V, DAG.getConstant(Log2_32(MulAmt-1),
4067                                                        MVT::i32)));
4068  } else if (isPowerOf2_32(MulAmt + 1)) {
4069    // (mul x, 2^N - 1) => (sub (shl x, N), x)
4070    Res = DAG.getNode(ISD::SUB, DL, VT,
4071                      DAG.getNode(ISD::SHL, DL, VT,
4072                                  V, DAG.getConstant(Log2_32(MulAmt+1),
4073                                                     MVT::i32)),
4074                                                     V);
4075  } else
4076    return SDValue();
4077
4078  if (ShiftAmt != 0)
4079    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
4080                      DAG.getConstant(ShiftAmt, MVT::i32));
4081
4082  // Do not add new nodes to DAG combiner worklist.
4083  DCI.CombineTo(N, Res, false);
4084  return SDValue();
4085}
4086
4087/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
4088/// ARMISD::VMOVRRD.
4089static SDValue PerformVMOVRRDCombine(SDNode *N,
4090                                   TargetLowering::DAGCombinerInfo &DCI) {
4091  // fmrrd(fmdrr x, y) -> x,y
4092  SDValue InDouble = N->getOperand(0);
4093  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
4094    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
4095  return SDValue();
4096}
4097
4098/// getVShiftImm - Check if this is a valid build_vector for the immediate
4099/// operand of a vector shift operation, where all the elements of the
4100/// build_vector must have the same constant integer value.
4101static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
4102  // Ignore bit_converts.
4103  while (Op.getOpcode() == ISD::BIT_CONVERT)
4104    Op = Op.getOperand(0);
4105  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4106  APInt SplatBits, SplatUndef;
4107  unsigned SplatBitSize;
4108  bool HasAnyUndefs;
4109  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
4110                                      HasAnyUndefs, ElementBits) ||
4111      SplatBitSize > ElementBits)
4112    return false;
4113  Cnt = SplatBits.getSExtValue();
4114  return true;
4115}
4116
4117/// isVShiftLImm - Check if this is a valid build_vector for the immediate
4118/// operand of a vector shift left operation.  That value must be in the range:
4119///   0 <= Value < ElementBits for a left shift; or
4120///   0 <= Value <= ElementBits for a long left shift.
4121static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
4122  assert(VT.isVector() && "vector shift count is not a vector type");
4123  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4124  if (! getVShiftImm(Op, ElementBits, Cnt))
4125    return false;
4126  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
4127}
4128
4129/// isVShiftRImm - Check if this is a valid build_vector for the immediate
4130/// operand of a vector shift right operation.  For a shift opcode, the value
4131/// is positive, but for an intrinsic the value count must be negative. The
4132/// absolute value must be in the range:
4133///   1 <= |Value| <= ElementBits for a right shift; or
4134///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
4135static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
4136                         int64_t &Cnt) {
4137  assert(VT.isVector() && "vector shift count is not a vector type");
4138  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4139  if (! getVShiftImm(Op, ElementBits, Cnt))
4140    return false;
4141  if (isIntrinsic)
4142    Cnt = -Cnt;
4143  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
4144}
4145
4146/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
4147static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
4148  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4149  switch (IntNo) {
4150  default:
4151    // Don't do anything for most intrinsics.
4152    break;
4153
4154  // Vector shifts: check for immediate versions and lower them.
4155  // Note: This is done during DAG combining instead of DAG legalizing because
4156  // the build_vectors for 64-bit vector element shift counts are generally
4157  // not legal, and it is hard to see their values after they get legalized to
4158  // loads from a constant pool.
4159  case Intrinsic::arm_neon_vshifts:
4160  case Intrinsic::arm_neon_vshiftu:
4161  case Intrinsic::arm_neon_vshiftls:
4162  case Intrinsic::arm_neon_vshiftlu:
4163  case Intrinsic::arm_neon_vshiftn:
4164  case Intrinsic::arm_neon_vrshifts:
4165  case Intrinsic::arm_neon_vrshiftu:
4166  case Intrinsic::arm_neon_vrshiftn:
4167  case Intrinsic::arm_neon_vqshifts:
4168  case Intrinsic::arm_neon_vqshiftu:
4169  case Intrinsic::arm_neon_vqshiftsu:
4170  case Intrinsic::arm_neon_vqshiftns:
4171  case Intrinsic::arm_neon_vqshiftnu:
4172  case Intrinsic::arm_neon_vqshiftnsu:
4173  case Intrinsic::arm_neon_vqrshiftns:
4174  case Intrinsic::arm_neon_vqrshiftnu:
4175  case Intrinsic::arm_neon_vqrshiftnsu: {
4176    EVT VT = N->getOperand(1).getValueType();
4177    int64_t Cnt;
4178    unsigned VShiftOpc = 0;
4179
4180    switch (IntNo) {
4181    case Intrinsic::arm_neon_vshifts:
4182    case Intrinsic::arm_neon_vshiftu:
4183      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
4184        VShiftOpc = ARMISD::VSHL;
4185        break;
4186      }
4187      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
4188        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
4189                     ARMISD::VSHRs : ARMISD::VSHRu);
4190        break;
4191      }
4192      return SDValue();
4193
4194    case Intrinsic::arm_neon_vshiftls:
4195    case Intrinsic::arm_neon_vshiftlu:
4196      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
4197        break;
4198      llvm_unreachable("invalid shift count for vshll intrinsic");
4199
4200    case Intrinsic::arm_neon_vrshifts:
4201    case Intrinsic::arm_neon_vrshiftu:
4202      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
4203        break;
4204      return SDValue();
4205
4206    case Intrinsic::arm_neon_vqshifts:
4207    case Intrinsic::arm_neon_vqshiftu:
4208      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4209        break;
4210      return SDValue();
4211
4212    case Intrinsic::arm_neon_vqshiftsu:
4213      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4214        break;
4215      llvm_unreachable("invalid shift count for vqshlu intrinsic");
4216
4217    case Intrinsic::arm_neon_vshiftn:
4218    case Intrinsic::arm_neon_vrshiftn:
4219    case Intrinsic::arm_neon_vqshiftns:
4220    case Intrinsic::arm_neon_vqshiftnu:
4221    case Intrinsic::arm_neon_vqshiftnsu:
4222    case Intrinsic::arm_neon_vqrshiftns:
4223    case Intrinsic::arm_neon_vqrshiftnu:
4224    case Intrinsic::arm_neon_vqrshiftnsu:
4225      // Narrowing shifts require an immediate right shift.
4226      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
4227        break;
4228      llvm_unreachable("invalid shift count for narrowing vector shift "
4229                       "intrinsic");
4230
4231    default:
4232      llvm_unreachable("unhandled vector shift");
4233    }
4234
4235    switch (IntNo) {
4236    case Intrinsic::arm_neon_vshifts:
4237    case Intrinsic::arm_neon_vshiftu:
4238      // Opcode already set above.
4239      break;
4240    case Intrinsic::arm_neon_vshiftls:
4241    case Intrinsic::arm_neon_vshiftlu:
4242      if (Cnt == VT.getVectorElementType().getSizeInBits())
4243        VShiftOpc = ARMISD::VSHLLi;
4244      else
4245        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
4246                     ARMISD::VSHLLs : ARMISD::VSHLLu);
4247      break;
4248    case Intrinsic::arm_neon_vshiftn:
4249      VShiftOpc = ARMISD::VSHRN; break;
4250    case Intrinsic::arm_neon_vrshifts:
4251      VShiftOpc = ARMISD::VRSHRs; break;
4252    case Intrinsic::arm_neon_vrshiftu:
4253      VShiftOpc = ARMISD::VRSHRu; break;
4254    case Intrinsic::arm_neon_vrshiftn:
4255      VShiftOpc = ARMISD::VRSHRN; break;
4256    case Intrinsic::arm_neon_vqshifts:
4257      VShiftOpc = ARMISD::VQSHLs; break;
4258    case Intrinsic::arm_neon_vqshiftu:
4259      VShiftOpc = ARMISD::VQSHLu; break;
4260    case Intrinsic::arm_neon_vqshiftsu:
4261      VShiftOpc = ARMISD::VQSHLsu; break;
4262    case Intrinsic::arm_neon_vqshiftns:
4263      VShiftOpc = ARMISD::VQSHRNs; break;
4264    case Intrinsic::arm_neon_vqshiftnu:
4265      VShiftOpc = ARMISD::VQSHRNu; break;
4266    case Intrinsic::arm_neon_vqshiftnsu:
4267      VShiftOpc = ARMISD::VQSHRNsu; break;
4268    case Intrinsic::arm_neon_vqrshiftns:
4269      VShiftOpc = ARMISD::VQRSHRNs; break;
4270    case Intrinsic::arm_neon_vqrshiftnu:
4271      VShiftOpc = ARMISD::VQRSHRNu; break;
4272    case Intrinsic::arm_neon_vqrshiftnsu:
4273      VShiftOpc = ARMISD::VQRSHRNsu; break;
4274    }
4275
4276    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4277                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
4278  }
4279
4280  case Intrinsic::arm_neon_vshiftins: {
4281    EVT VT = N->getOperand(1).getValueType();
4282    int64_t Cnt;
4283    unsigned VShiftOpc = 0;
4284
4285    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
4286      VShiftOpc = ARMISD::VSLI;
4287    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
4288      VShiftOpc = ARMISD::VSRI;
4289    else {
4290      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
4291    }
4292
4293    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4294                       N->getOperand(1), N->getOperand(2),
4295                       DAG.getConstant(Cnt, MVT::i32));
4296  }
4297
4298  case Intrinsic::arm_neon_vqrshifts:
4299  case Intrinsic::arm_neon_vqrshiftu:
4300    // No immediate versions of these to check for.
4301    break;
4302  }
4303
4304  return SDValue();
4305}
4306
4307/// PerformShiftCombine - Checks for immediate versions of vector shifts and
4308/// lowers them.  As with the vector shift intrinsics, this is done during DAG
4309/// combining instead of DAG legalizing because the build_vectors for 64-bit
4310/// vector element shift counts are generally not legal, and it is hard to see
4311/// their values after they get legalized to loads from a constant pool.
4312static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
4313                                   const ARMSubtarget *ST) {
4314  EVT VT = N->getValueType(0);
4315
4316  // Nothing to be done for scalar shifts.
4317  if (! VT.isVector())
4318    return SDValue();
4319
4320  assert(ST->hasNEON() && "unexpected vector shift");
4321  int64_t Cnt;
4322
4323  switch (N->getOpcode()) {
4324  default: llvm_unreachable("unexpected shift opcode");
4325
4326  case ISD::SHL:
4327    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
4328      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
4329                         DAG.getConstant(Cnt, MVT::i32));
4330    break;
4331
4332  case ISD::SRA:
4333  case ISD::SRL:
4334    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
4335      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
4336                            ARMISD::VSHRs : ARMISD::VSHRu);
4337      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
4338                         DAG.getConstant(Cnt, MVT::i32));
4339    }
4340  }
4341  return SDValue();
4342}
4343
4344/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
4345/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
4346static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
4347                                    const ARMSubtarget *ST) {
4348  SDValue N0 = N->getOperand(0);
4349
4350  // Check for sign- and zero-extensions of vector extract operations of 8-
4351  // and 16-bit vector elements.  NEON supports these directly.  They are
4352  // handled during DAG combining because type legalization will promote them
4353  // to 32-bit types and it is messy to recognize the operations after that.
4354  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4355    SDValue Vec = N0.getOperand(0);
4356    SDValue Lane = N0.getOperand(1);
4357    EVT VT = N->getValueType(0);
4358    EVT EltVT = N0.getValueType();
4359    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4360
4361    if (VT == MVT::i32 &&
4362        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
4363        TLI.isTypeLegal(Vec.getValueType())) {
4364
4365      unsigned Opc = 0;
4366      switch (N->getOpcode()) {
4367      default: llvm_unreachable("unexpected opcode");
4368      case ISD::SIGN_EXTEND:
4369        Opc = ARMISD::VGETLANEs;
4370        break;
4371      case ISD::ZERO_EXTEND:
4372      case ISD::ANY_EXTEND:
4373        Opc = ARMISD::VGETLANEu;
4374        break;
4375      }
4376      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
4377    }
4378  }
4379
4380  return SDValue();
4381}
4382
4383/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
4384/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
4385static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
4386                                       const ARMSubtarget *ST) {
4387  // If the target supports NEON, try to use vmax/vmin instructions for f32
4388  // selects like "x < y ? x : y".  Unless the FiniteOnlyFPMath option is set,
4389  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
4390  // a NaN; only do the transformation when it matches that behavior.
4391
4392  // For now only do this when using NEON for FP operations; if using VFP, it
4393  // is not obvious that the benefit outweighs the cost of switching to the
4394  // NEON pipeline.
4395  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
4396      N->getValueType(0) != MVT::f32)
4397    return SDValue();
4398
4399  SDValue CondLHS = N->getOperand(0);
4400  SDValue CondRHS = N->getOperand(1);
4401  SDValue LHS = N->getOperand(2);
4402  SDValue RHS = N->getOperand(3);
4403  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4404
4405  unsigned Opcode = 0;
4406  bool IsReversed;
4407  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
4408    IsReversed = false; // x CC y ? x : y
4409  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
4410    IsReversed = true ; // x CC y ? y : x
4411  } else {
4412    return SDValue();
4413  }
4414
4415  bool IsUnordered;
4416  switch (CC) {
4417  default: break;
4418  case ISD::SETOLT:
4419  case ISD::SETOLE:
4420  case ISD::SETLT:
4421  case ISD::SETLE:
4422  case ISD::SETULT:
4423  case ISD::SETULE:
4424    // If LHS is NaN, an ordered comparison will be false and the result will
4425    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
4426    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4427    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4428    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4429      break;
4430    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4431    // will return -0, so vmin can only be used for unsafe math or if one of
4432    // the operands is known to be nonzero.
4433    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4434        !UnsafeFPMath &&
4435        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4436      break;
4437    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4438    break;
4439
4440  case ISD::SETOGT:
4441  case ISD::SETOGE:
4442  case ISD::SETGT:
4443  case ISD::SETGE:
4444  case ISD::SETUGT:
4445  case ISD::SETUGE:
4446    // If LHS is NaN, an ordered comparison will be false and the result will
4447    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
4448    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4449    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4450    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4451      break;
4452    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4453    // will return +0, so vmax can only be used for unsafe math or if one of
4454    // the operands is known to be nonzero.
4455    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4456        !UnsafeFPMath &&
4457        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4458      break;
4459    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4460    break;
4461  }
4462
4463  if (!Opcode)
4464    return SDValue();
4465  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
4466}
4467
4468SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
4469                                             DAGCombinerInfo &DCI) const {
4470  switch (N->getOpcode()) {
4471  default: break;
4472  case ISD::ADD:        return PerformADDCombine(N, DCI);
4473  case ISD::SUB:        return PerformSUBCombine(N, DCI);
4474  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
4475  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
4476  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
4477  case ISD::SHL:
4478  case ISD::SRA:
4479  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
4480  case ISD::SIGN_EXTEND:
4481  case ISD::ZERO_EXTEND:
4482  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
4483  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
4484  }
4485  return SDValue();
4486}
4487
4488bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
4489  if (!Subtarget->hasV6Ops())
4490    // Pre-v6 does not support unaligned mem access.
4491    return false;
4492  else {
4493    // v6+ may or may not support unaligned mem access depending on the system
4494    // configuration.
4495    // FIXME: This is pretty conservative. Should we provide cmdline option to
4496    // control the behaviour?
4497    if (!Subtarget->isTargetDarwin())
4498      return false;
4499  }
4500
4501  switch (VT.getSimpleVT().SimpleTy) {
4502  default:
4503    return false;
4504  case MVT::i8:
4505  case MVT::i16:
4506  case MVT::i32:
4507    return true;
4508  // FIXME: VLD1 etc with standard alignment is legal.
4509  }
4510}
4511
4512static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
4513  if (V < 0)
4514    return false;
4515
4516  unsigned Scale = 1;
4517  switch (VT.getSimpleVT().SimpleTy) {
4518  default: return false;
4519  case MVT::i1:
4520  case MVT::i8:
4521    // Scale == 1;
4522    break;
4523  case MVT::i16:
4524    // Scale == 2;
4525    Scale = 2;
4526    break;
4527  case MVT::i32:
4528    // Scale == 4;
4529    Scale = 4;
4530    break;
4531  }
4532
4533  if ((V & (Scale - 1)) != 0)
4534    return false;
4535  V /= Scale;
4536  return V == (V & ((1LL << 5) - 1));
4537}
4538
4539static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
4540                                      const ARMSubtarget *Subtarget) {
4541  bool isNeg = false;
4542  if (V < 0) {
4543    isNeg = true;
4544    V = - V;
4545  }
4546
4547  switch (VT.getSimpleVT().SimpleTy) {
4548  default: return false;
4549  case MVT::i1:
4550  case MVT::i8:
4551  case MVT::i16:
4552  case MVT::i32:
4553    // + imm12 or - imm8
4554    if (isNeg)
4555      return V == (V & ((1LL << 8) - 1));
4556    return V == (V & ((1LL << 12) - 1));
4557  case MVT::f32:
4558  case MVT::f64:
4559    // Same as ARM mode. FIXME: NEON?
4560    if (!Subtarget->hasVFP2())
4561      return false;
4562    if ((V & 3) != 0)
4563      return false;
4564    V >>= 2;
4565    return V == (V & ((1LL << 8) - 1));
4566  }
4567}
4568
4569/// isLegalAddressImmediate - Return true if the integer value can be used
4570/// as the offset of the target addressing mode for load / store of the
4571/// given type.
4572static bool isLegalAddressImmediate(int64_t V, EVT VT,
4573                                    const ARMSubtarget *Subtarget) {
4574  if (V == 0)
4575    return true;
4576
4577  if (!VT.isSimple())
4578    return false;
4579
4580  if (Subtarget->isThumb1Only())
4581    return isLegalT1AddressImmediate(V, VT);
4582  else if (Subtarget->isThumb2())
4583    return isLegalT2AddressImmediate(V, VT, Subtarget);
4584
4585  // ARM mode.
4586  if (V < 0)
4587    V = - V;
4588  switch (VT.getSimpleVT().SimpleTy) {
4589  default: return false;
4590  case MVT::i1:
4591  case MVT::i8:
4592  case MVT::i32:
4593    // +- imm12
4594    return V == (V & ((1LL << 12) - 1));
4595  case MVT::i16:
4596    // +- imm8
4597    return V == (V & ((1LL << 8) - 1));
4598  case MVT::f32:
4599  case MVT::f64:
4600    if (!Subtarget->hasVFP2()) // FIXME: NEON?
4601      return false;
4602    if ((V & 3) != 0)
4603      return false;
4604    V >>= 2;
4605    return V == (V & ((1LL << 8) - 1));
4606  }
4607}
4608
4609bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
4610                                                      EVT VT) const {
4611  int Scale = AM.Scale;
4612  if (Scale < 0)
4613    return false;
4614
4615  switch (VT.getSimpleVT().SimpleTy) {
4616  default: return false;
4617  case MVT::i1:
4618  case MVT::i8:
4619  case MVT::i16:
4620  case MVT::i32:
4621    if (Scale == 1)
4622      return true;
4623    // r + r << imm
4624    Scale = Scale & ~1;
4625    return Scale == 2 || Scale == 4 || Scale == 8;
4626  case MVT::i64:
4627    // r + r
4628    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4629      return true;
4630    return false;
4631  case MVT::isVoid:
4632    // Note, we allow "void" uses (basically, uses that aren't loads or
4633    // stores), because arm allows folding a scale into many arithmetic
4634    // operations.  This should be made more precise and revisited later.
4635
4636    // Allow r << imm, but the imm has to be a multiple of two.
4637    if (Scale & 1) return false;
4638    return isPowerOf2_32(Scale);
4639  }
4640}
4641
4642/// isLegalAddressingMode - Return true if the addressing mode represented
4643/// by AM is legal for this target, for a load/store of the specified type.
4644bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
4645                                              const Type *Ty) const {
4646  EVT VT = getValueType(Ty, true);
4647  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
4648    return false;
4649
4650  // Can never fold addr of global into load/store.
4651  if (AM.BaseGV)
4652    return false;
4653
4654  switch (AM.Scale) {
4655  case 0:  // no scale reg, must be "r+i" or "r", or "i".
4656    break;
4657  case 1:
4658    if (Subtarget->isThumb1Only())
4659      return false;
4660    // FALL THROUGH.
4661  default:
4662    // ARM doesn't support any R+R*scale+imm addr modes.
4663    if (AM.BaseOffs)
4664      return false;
4665
4666    if (!VT.isSimple())
4667      return false;
4668
4669    if (Subtarget->isThumb2())
4670      return isLegalT2ScaledAddressingMode(AM, VT);
4671
4672    int Scale = AM.Scale;
4673    switch (VT.getSimpleVT().SimpleTy) {
4674    default: return false;
4675    case MVT::i1:
4676    case MVT::i8:
4677    case MVT::i32:
4678      if (Scale < 0) Scale = -Scale;
4679      if (Scale == 1)
4680        return true;
4681      // r + r << imm
4682      return isPowerOf2_32(Scale & ~1);
4683    case MVT::i16:
4684    case MVT::i64:
4685      // r + r
4686      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4687        return true;
4688      return false;
4689
4690    case MVT::isVoid:
4691      // Note, we allow "void" uses (basically, uses that aren't loads or
4692      // stores), because arm allows folding a scale into many arithmetic
4693      // operations.  This should be made more precise and revisited later.
4694
4695      // Allow r << imm, but the imm has to be a multiple of two.
4696      if (Scale & 1) return false;
4697      return isPowerOf2_32(Scale);
4698    }
4699    break;
4700  }
4701  return true;
4702}
4703
4704/// isLegalICmpImmediate - Return true if the specified immediate is legal
4705/// icmp immediate, that is the target has icmp instructions which can compare
4706/// a register against the immediate without having to materialize the
4707/// immediate into a register.
4708bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4709  if (!Subtarget->isThumb())
4710    return ARM_AM::getSOImmVal(Imm) != -1;
4711  if (Subtarget->isThumb2())
4712    return ARM_AM::getT2SOImmVal(Imm) != -1;
4713  return Imm >= 0 && Imm <= 255;
4714}
4715
4716static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
4717                                      bool isSEXTLoad, SDValue &Base,
4718                                      SDValue &Offset, bool &isInc,
4719                                      SelectionDAG &DAG) {
4720  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4721    return false;
4722
4723  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
4724    // AddressingMode 3
4725    Base = Ptr->getOperand(0);
4726    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4727      int RHSC = (int)RHS->getZExtValue();
4728      if (RHSC < 0 && RHSC > -256) {
4729        assert(Ptr->getOpcode() == ISD::ADD);
4730        isInc = false;
4731        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4732        return true;
4733      }
4734    }
4735    isInc = (Ptr->getOpcode() == ISD::ADD);
4736    Offset = Ptr->getOperand(1);
4737    return true;
4738  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
4739    // AddressingMode 2
4740    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4741      int RHSC = (int)RHS->getZExtValue();
4742      if (RHSC < 0 && RHSC > -0x1000) {
4743        assert(Ptr->getOpcode() == ISD::ADD);
4744        isInc = false;
4745        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4746        Base = Ptr->getOperand(0);
4747        return true;
4748      }
4749    }
4750
4751    if (Ptr->getOpcode() == ISD::ADD) {
4752      isInc = true;
4753      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
4754      if (ShOpcVal != ARM_AM::no_shift) {
4755        Base = Ptr->getOperand(1);
4756        Offset = Ptr->getOperand(0);
4757      } else {
4758        Base = Ptr->getOperand(0);
4759        Offset = Ptr->getOperand(1);
4760      }
4761      return true;
4762    }
4763
4764    isInc = (Ptr->getOpcode() == ISD::ADD);
4765    Base = Ptr->getOperand(0);
4766    Offset = Ptr->getOperand(1);
4767    return true;
4768  }
4769
4770  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
4771  return false;
4772}
4773
4774static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
4775                                     bool isSEXTLoad, SDValue &Base,
4776                                     SDValue &Offset, bool &isInc,
4777                                     SelectionDAG &DAG) {
4778  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4779    return false;
4780
4781  Base = Ptr->getOperand(0);
4782  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4783    int RHSC = (int)RHS->getZExtValue();
4784    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
4785      assert(Ptr->getOpcode() == ISD::ADD);
4786      isInc = false;
4787      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4788      return true;
4789    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
4790      isInc = Ptr->getOpcode() == ISD::ADD;
4791      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
4792      return true;
4793    }
4794  }
4795
4796  return false;
4797}
4798
4799/// getPreIndexedAddressParts - returns true by value, base pointer and
4800/// offset pointer and addressing mode by reference if the node's address
4801/// can be legally represented as pre-indexed load / store address.
4802bool
4803ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
4804                                             SDValue &Offset,
4805                                             ISD::MemIndexedMode &AM,
4806                                             SelectionDAG &DAG) const {
4807  if (Subtarget->isThumb1Only())
4808    return false;
4809
4810  EVT VT;
4811  SDValue Ptr;
4812  bool isSEXTLoad = false;
4813  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4814    Ptr = LD->getBasePtr();
4815    VT  = LD->getMemoryVT();
4816    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4817  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4818    Ptr = ST->getBasePtr();
4819    VT  = ST->getMemoryVT();
4820  } else
4821    return false;
4822
4823  bool isInc;
4824  bool isLegal = false;
4825  if (Subtarget->isThumb2())
4826    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4827                                       Offset, isInc, DAG);
4828  else
4829    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4830                                        Offset, isInc, DAG);
4831  if (!isLegal)
4832    return false;
4833
4834  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
4835  return true;
4836}
4837
4838/// getPostIndexedAddressParts - returns true by value, base pointer and
4839/// offset pointer and addressing mode by reference if this node can be
4840/// combined with a load / store to form a post-indexed load / store.
4841bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
4842                                                   SDValue &Base,
4843                                                   SDValue &Offset,
4844                                                   ISD::MemIndexedMode &AM,
4845                                                   SelectionDAG &DAG) const {
4846  if (Subtarget->isThumb1Only())
4847    return false;
4848
4849  EVT VT;
4850  SDValue Ptr;
4851  bool isSEXTLoad = false;
4852  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4853    VT  = LD->getMemoryVT();
4854    Ptr = LD->getBasePtr();
4855    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4856  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4857    VT  = ST->getMemoryVT();
4858    Ptr = ST->getBasePtr();
4859  } else
4860    return false;
4861
4862  bool isInc;
4863  bool isLegal = false;
4864  if (Subtarget->isThumb2())
4865    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4866                                       isInc, DAG);
4867  else
4868    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4869                                        isInc, DAG);
4870  if (!isLegal)
4871    return false;
4872
4873  if (Ptr != Base) {
4874    // Swap base ptr and offset to catch more post-index load / store when
4875    // it's legal. In Thumb2 mode, offset must be an immediate.
4876    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
4877        !Subtarget->isThumb2())
4878      std::swap(Base, Offset);
4879
4880    // Post-indexed load / store update the base pointer.
4881    if (Ptr != Base)
4882      return false;
4883  }
4884
4885  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
4886  return true;
4887}
4888
4889void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
4890                                                       const APInt &Mask,
4891                                                       APInt &KnownZero,
4892                                                       APInt &KnownOne,
4893                                                       const SelectionDAG &DAG,
4894                                                       unsigned Depth) const {
4895  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
4896  switch (Op.getOpcode()) {
4897  default: break;
4898  case ARMISD::CMOV: {
4899    // Bits are known zero/one if known on the LHS and RHS.
4900    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
4901    if (KnownZero == 0 && KnownOne == 0) return;
4902
4903    APInt KnownZeroRHS, KnownOneRHS;
4904    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
4905                          KnownZeroRHS, KnownOneRHS, Depth+1);
4906    KnownZero &= KnownZeroRHS;
4907    KnownOne  &= KnownOneRHS;
4908    return;
4909  }
4910  }
4911}
4912
4913//===----------------------------------------------------------------------===//
4914//                           ARM Inline Assembly Support
4915//===----------------------------------------------------------------------===//
4916
4917/// getConstraintType - Given a constraint letter, return the type of
4918/// constraint it is for this target.
4919ARMTargetLowering::ConstraintType
4920ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
4921  if (Constraint.size() == 1) {
4922    switch (Constraint[0]) {
4923    default:  break;
4924    case 'l': return C_RegisterClass;
4925    case 'w': return C_RegisterClass;
4926    }
4927  }
4928  return TargetLowering::getConstraintType(Constraint);
4929}
4930
4931std::pair<unsigned, const TargetRegisterClass*>
4932ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4933                                                EVT VT) const {
4934  if (Constraint.size() == 1) {
4935    // GCC ARM Constraint Letters
4936    switch (Constraint[0]) {
4937    case 'l':
4938      if (Subtarget->isThumb())
4939        return std::make_pair(0U, ARM::tGPRRegisterClass);
4940      else
4941        return std::make_pair(0U, ARM::GPRRegisterClass);
4942    case 'r':
4943      return std::make_pair(0U, ARM::GPRRegisterClass);
4944    case 'w':
4945      if (VT == MVT::f32)
4946        return std::make_pair(0U, ARM::SPRRegisterClass);
4947      if (VT.getSizeInBits() == 64)
4948        return std::make_pair(0U, ARM::DPRRegisterClass);
4949      if (VT.getSizeInBits() == 128)
4950        return std::make_pair(0U, ARM::QPRRegisterClass);
4951      break;
4952    }
4953  }
4954  if (StringRef("{cc}").equals_lower(Constraint))
4955    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
4956
4957  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4958}
4959
4960std::vector<unsigned> ARMTargetLowering::
4961getRegClassForInlineAsmConstraint(const std::string &Constraint,
4962                                  EVT VT) const {
4963  if (Constraint.size() != 1)
4964    return std::vector<unsigned>();
4965
4966  switch (Constraint[0]) {      // GCC ARM Constraint Letters
4967  default: break;
4968  case 'l':
4969    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4970                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4971                                 0);
4972  case 'r':
4973    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4974                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4975                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
4976                                 ARM::R12, ARM::LR, 0);
4977  case 'w':
4978    if (VT == MVT::f32)
4979      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
4980                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
4981                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
4982                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
4983                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
4984                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
4985                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
4986                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
4987    if (VT.getSizeInBits() == 64)
4988      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
4989                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
4990                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
4991                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
4992    if (VT.getSizeInBits() == 128)
4993      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
4994                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
4995      break;
4996  }
4997
4998  return std::vector<unsigned>();
4999}
5000
5001/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5002/// vector.  If it is invalid, don't add anything to Ops.
5003void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5004                                                     char Constraint,
5005                                                     bool hasMemory,
5006                                                     std::vector<SDValue>&Ops,
5007                                                     SelectionDAG &DAG) const {
5008  SDValue Result(0, 0);
5009
5010  switch (Constraint) {
5011  default: break;
5012  case 'I': case 'J': case 'K': case 'L':
5013  case 'M': case 'N': case 'O':
5014    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5015    if (!C)
5016      return;
5017
5018    int64_t CVal64 = C->getSExtValue();
5019    int CVal = (int) CVal64;
5020    // None of these constraints allow values larger than 32 bits.  Check
5021    // that the value fits in an int.
5022    if (CVal != CVal64)
5023      return;
5024
5025    switch (Constraint) {
5026      case 'I':
5027        if (Subtarget->isThumb1Only()) {
5028          // This must be a constant between 0 and 255, for ADD
5029          // immediates.
5030          if (CVal >= 0 && CVal <= 255)
5031            break;
5032        } else if (Subtarget->isThumb2()) {
5033          // A constant that can be used as an immediate value in a
5034          // data-processing instruction.
5035          if (ARM_AM::getT2SOImmVal(CVal) != -1)
5036            break;
5037        } else {
5038          // A constant that can be used as an immediate value in a
5039          // data-processing instruction.
5040          if (ARM_AM::getSOImmVal(CVal) != -1)
5041            break;
5042        }
5043        return;
5044
5045      case 'J':
5046        if (Subtarget->isThumb()) {  // FIXME thumb2
5047          // This must be a constant between -255 and -1, for negated ADD
5048          // immediates. This can be used in GCC with an "n" modifier that
5049          // prints the negated value, for use with SUB instructions. It is
5050          // not useful otherwise but is implemented for compatibility.
5051          if (CVal >= -255 && CVal <= -1)
5052            break;
5053        } else {
5054          // This must be a constant between -4095 and 4095. It is not clear
5055          // what this constraint is intended for. Implemented for
5056          // compatibility with GCC.
5057          if (CVal >= -4095 && CVal <= 4095)
5058            break;
5059        }
5060        return;
5061
5062      case 'K':
5063        if (Subtarget->isThumb1Only()) {
5064          // A 32-bit value where only one byte has a nonzero value. Exclude
5065          // zero to match GCC. This constraint is used by GCC internally for
5066          // constants that can be loaded with a move/shift combination.
5067          // It is not useful otherwise but is implemented for compatibility.
5068          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
5069            break;
5070        } else if (Subtarget->isThumb2()) {
5071          // A constant whose bitwise inverse can be used as an immediate
5072          // value in a data-processing instruction. This can be used in GCC
5073          // with a "B" modifier that prints the inverted value, for use with
5074          // BIC and MVN instructions. It is not useful otherwise but is
5075          // implemented for compatibility.
5076          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
5077            break;
5078        } else {
5079          // A constant whose bitwise inverse can be used as an immediate
5080          // value in a data-processing instruction. This can be used in GCC
5081          // with a "B" modifier that prints the inverted value, for use with
5082          // BIC and MVN instructions. It is not useful otherwise but is
5083          // implemented for compatibility.
5084          if (ARM_AM::getSOImmVal(~CVal) != -1)
5085            break;
5086        }
5087        return;
5088
5089      case 'L':
5090        if (Subtarget->isThumb1Only()) {
5091          // This must be a constant between -7 and 7,
5092          // for 3-operand ADD/SUB immediate instructions.
5093          if (CVal >= -7 && CVal < 7)
5094            break;
5095        } else if (Subtarget->isThumb2()) {
5096          // A constant whose negation can be used as an immediate value in a
5097          // data-processing instruction. This can be used in GCC with an "n"
5098          // modifier that prints the negated value, for use with SUB
5099          // instructions. It is not useful otherwise but is implemented for
5100          // compatibility.
5101          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
5102            break;
5103        } else {
5104          // A constant whose negation can be used as an immediate value in a
5105          // data-processing instruction. This can be used in GCC with an "n"
5106          // modifier that prints the negated value, for use with SUB
5107          // instructions. It is not useful otherwise but is implemented for
5108          // compatibility.
5109          if (ARM_AM::getSOImmVal(-CVal) != -1)
5110            break;
5111        }
5112        return;
5113
5114      case 'M':
5115        if (Subtarget->isThumb()) { // FIXME thumb2
5116          // This must be a multiple of 4 between 0 and 1020, for
5117          // ADD sp + immediate.
5118          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
5119            break;
5120        } else {
5121          // A power of two or a constant between 0 and 32.  This is used in
5122          // GCC for the shift amount on shifted register operands, but it is
5123          // useful in general for any shift amounts.
5124          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
5125            break;
5126        }
5127        return;
5128
5129      case 'N':
5130        if (Subtarget->isThumb()) {  // FIXME thumb2
5131          // This must be a constant between 0 and 31, for shift amounts.
5132          if (CVal >= 0 && CVal <= 31)
5133            break;
5134        }
5135        return;
5136
5137      case 'O':
5138        if (Subtarget->isThumb()) {  // FIXME thumb2
5139          // This must be a multiple of 4 between -508 and 508, for
5140          // ADD/SUB sp = sp + immediate.
5141          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
5142            break;
5143        }
5144        return;
5145    }
5146    Result = DAG.getTargetConstant(CVal, Op.getValueType());
5147    break;
5148  }
5149
5150  if (Result.getNode()) {
5151    Ops.push_back(Result);
5152    return;
5153  }
5154  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
5155                                                      Ops, DAG);
5156}
5157
5158bool
5159ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5160  // The ARM target isn't yet aware of offsets.
5161  return false;
5162}
5163
5164int ARM::getVFPf32Imm(const APFloat &FPImm) {
5165  APInt Imm = FPImm.bitcastToAPInt();
5166  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
5167  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
5168  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
5169
5170  // We can handle 4 bits of mantissa.
5171  // mantissa = (16+UInt(e:f:g:h))/16.
5172  if (Mantissa & 0x7ffff)
5173    return -1;
5174  Mantissa >>= 19;
5175  if ((Mantissa & 0xf) != Mantissa)
5176    return -1;
5177
5178  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5179  if (Exp < -3 || Exp > 4)
5180    return -1;
5181  Exp = ((Exp+3) & 0x7) ^ 4;
5182
5183  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5184}
5185
5186int ARM::getVFPf64Imm(const APFloat &FPImm) {
5187  APInt Imm = FPImm.bitcastToAPInt();
5188  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
5189  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
5190  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
5191
5192  // We can handle 4 bits of mantissa.
5193  // mantissa = (16+UInt(e:f:g:h))/16.
5194  if (Mantissa & 0xffffffffffffLL)
5195    return -1;
5196  Mantissa >>= 48;
5197  if ((Mantissa & 0xf) != Mantissa)
5198    return -1;
5199
5200  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5201  if (Exp < -3 || Exp > 4)
5202    return -1;
5203  Exp = ((Exp+3) & 0x7) ^ 4;
5204
5205  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5206}
5207
5208/// isFPImmLegal - Returns true if the target can instruction select the
5209/// specified FP immediate natively. If false, the legalizer will
5210/// materialize the FP immediate as a load from a constant pool.
5211bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5212  if (!Subtarget->hasVFP3())
5213    return false;
5214  if (VT == MVT::f32)
5215    return ARM::getVFPf32Imm(Imm) != -1;
5216  if (VT == MVT::f64)
5217    return ARM::getVFPf64Imm(Imm) != -1;
5218  return false;
5219}
5220