ARMISelLowering.cpp revision cdd8e46bec4e975d00a5abea808d8eb4138515c5
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-isel"
16#include "ARM.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetMachine.h"
25#include "ARMTargetObjectFile.h"
26#include "MCTargetDesc/ARMAddressingModes.h"
27#include "llvm/CallingConv.h"
28#include "llvm/Constants.h"
29#include "llvm/Function.h"
30#include "llvm/GlobalValue.h"
31#include "llvm/Instruction.h"
32#include "llvm/Instructions.h"
33#include "llvm/Intrinsics.h"
34#include "llvm/Type.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/IntrinsicLowering.h"
37#include "llvm/CodeGen/MachineBasicBlock.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineModuleInfo.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/PseudoSourceValue.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/MC/MCSectionMachO.h"
46#include "llvm/Target/TargetOptions.h"
47#include "llvm/ADT/VectorExtras.h"
48#include "llvm/ADT/StringExtras.h"
49#include "llvm/ADT/Statistic.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/ErrorHandling.h"
52#include "llvm/Support/MathExtras.h"
53#include "llvm/Support/raw_ostream.h"
54#include <sstream>
55using namespace llvm;
56
57STATISTIC(NumTailCalls, "Number of tail calls");
58STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
59
60// This option should go away when tail calls fully work.
61static cl::opt<bool>
62EnableARMTailCalls("arm-tail-calls", cl::Hidden,
63  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
64  cl::init(false));
65
66cl::opt<bool>
67EnableARMLongCalls("arm-long-calls", cl::Hidden,
68  cl::desc("Generate calls via indirect call instructions"),
69  cl::init(false));
70
71static cl::opt<bool>
72ARMInterworking("arm-interworking", cl::Hidden,
73  cl::desc("Enable / disable ARM interworking (for debugging only)"),
74  cl::init(true));
75
76namespace llvm {
77  class ARMCCState : public CCState {
78  public:
79    ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
80               const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
81               LLVMContext &C, ParmContext PC)
82        : CCState(CC, isVarArg, MF, TM, locs, C) {
83      assert(((PC == Call) || (PC == Prologue)) &&
84             "ARMCCState users must specify whether their context is call"
85             "or prologue generation.");
86      CallOrPrologue = PC;
87    }
88  };
89}
90
91// The APCS parameter registers.
92static const unsigned GPRArgRegs[] = {
93  ARM::R0, ARM::R1, ARM::R2, ARM::R3
94};
95
96void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
97                                       EVT PromotedBitwiseVT) {
98  if (VT != PromotedLdStVT) {
99    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
100    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
101                       PromotedLdStVT.getSimpleVT());
102
103    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
104    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
105                       PromotedLdStVT.getSimpleVT());
106  }
107
108  EVT ElemTy = VT.getVectorElementType();
109  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
110    setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
111  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
112  if (ElemTy != MVT::i32) {
113    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
114    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
115    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
116    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
117  }
118  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
119  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
120  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
121  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
122  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
123  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
124  if (VT.isInteger()) {
125    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
126    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
127    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
128    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
129    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
130    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
131         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
132      setTruncStoreAction(VT.getSimpleVT(),
133                          (MVT::SimpleValueType)InnerVT, Expand);
134  }
135  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
136
137  // Promote all bit-wise operations.
138  if (VT.isInteger() && VT != PromotedBitwiseVT) {
139    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
140    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
141                       PromotedBitwiseVT.getSimpleVT());
142    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
143    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
144                       PromotedBitwiseVT.getSimpleVT());
145    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
146    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
147                       PromotedBitwiseVT.getSimpleVT());
148  }
149
150  // Neon does not support vector divide/remainder operations.
151  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
152  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
153  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
154  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
155  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
156  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
157}
158
159void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
160  addRegisterClass(VT, ARM::DPRRegisterClass);
161  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
162}
163
164void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
165  addRegisterClass(VT, ARM::QPRRegisterClass);
166  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
167}
168
169static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
170  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
171    return new TargetLoweringObjectFileMachO();
172
173  return new ARMElfTargetObjectFile();
174}
175
176ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
177    : TargetLowering(TM, createTLOF(TM)) {
178  Subtarget = &TM.getSubtarget<ARMSubtarget>();
179  RegInfo = TM.getRegisterInfo();
180  Itins = TM.getInstrItineraryData();
181
182  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
183
184  if (Subtarget->isTargetDarwin()) {
185    // Uses VFP for Thumb libfuncs if available.
186    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
187      // Single-precision floating-point arithmetic.
188      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
189      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
190      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
191      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
192
193      // Double-precision floating-point arithmetic.
194      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
195      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
196      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
197      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
198
199      // Single-precision comparisons.
200      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
201      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
202      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
203      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
204      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
205      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
206      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
207      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
208
209      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
210      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
211      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
212      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
213      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
214      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
215      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
216      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
217
218      // Double-precision comparisons.
219      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
220      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
221      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
222      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
223      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
224      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
225      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
226      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
227
228      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
229      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
230      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
231      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
232      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
233      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
234      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
235      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
236
237      // Floating-point to integer conversions.
238      // i64 conversions are done via library routines even when generating VFP
239      // instructions, so use the same ones.
240      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
241      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
242      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
243      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
244
245      // Conversions between floating types.
246      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
247      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
248
249      // Integer to floating-point conversions.
250      // i64 conversions are done via library routines even when generating VFP
251      // instructions, so use the same ones.
252      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
253      // e.g., __floatunsidf vs. __floatunssidfvfp.
254      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
255      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
256      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
257      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
258    }
259  }
260
261  // These libcalls are not available in 32-bit.
262  setLibcallName(RTLIB::SHL_I128, 0);
263  setLibcallName(RTLIB::SRL_I128, 0);
264  setLibcallName(RTLIB::SRA_I128, 0);
265
266  if (Subtarget->isAAPCS_ABI()) {
267    // Double-precision floating-point arithmetic helper functions
268    // RTABI chapter 4.1.2, Table 2
269    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
270    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
271    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
272    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
273    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
274    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
275    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
276    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
277
278    // Double-precision floating-point comparison helper functions
279    // RTABI chapter 4.1.2, Table 3
280    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
281    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
282    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
283    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
284    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
285    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
286    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
287    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
288    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
289    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
290    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
291    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
292    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
293    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
294    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
295    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
296    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
297    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
298    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
299    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
300    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
301    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
302    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
303    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
304
305    // Single-precision floating-point arithmetic helper functions
306    // RTABI chapter 4.1.2, Table 4
307    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
308    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
309    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
310    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
311    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
312    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
313    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
314    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
315
316    // Single-precision floating-point comparison helper functions
317    // RTABI chapter 4.1.2, Table 5
318    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
319    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
320    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
321    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
322    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
323    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
324    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
325    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
326    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
327    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
328    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
329    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
330    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
331    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
332    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
333    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
334    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
335    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
336    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
337    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
338    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
339    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
340    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
341    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
342
343    // Floating-point to integer conversions.
344    // RTABI chapter 4.1.2, Table 6
345    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
346    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
347    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
348    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
349    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
350    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
351    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
352    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
353    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
354    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
355    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
356    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
357    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
358    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
359    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
360    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
361
362    // Conversions between floating types.
363    // RTABI chapter 4.1.2, Table 7
364    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
365    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
366    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
367    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
368
369    // Integer to floating-point conversions.
370    // RTABI chapter 4.1.2, Table 8
371    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
372    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
373    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
374    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
375    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
376    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
377    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
378    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
379    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
380    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
381    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
382    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
383    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
384    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
385    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
386    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
387
388    // Long long helper functions
389    // RTABI chapter 4.2, Table 9
390    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
391    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
392    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
393    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
394    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
395    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
396    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
397    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
398    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
399    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
400    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
401    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
402
403    // Integer division functions
404    // RTABI chapter 4.3.1
405    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
406    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
407    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
408    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
409    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
410    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
411    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
412    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
413    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
414    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
415    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
416    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
417
418    // Memory operations
419    // RTABI chapter 4.3.4
420    setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
421    setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
422    setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
423  }
424
425  // Use divmod compiler-rt calls for iOS 5.0 and later.
426  if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
427      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
428    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
429    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
430  }
431
432  if (Subtarget->isThumb1Only())
433    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
434  else
435    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
436  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
437    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
438    if (!Subtarget->isFPOnlySP())
439      addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
440
441    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
442  }
443
444  if (Subtarget->hasNEON()) {
445    addDRTypeForNEON(MVT::v2f32);
446    addDRTypeForNEON(MVT::v8i8);
447    addDRTypeForNEON(MVT::v4i16);
448    addDRTypeForNEON(MVT::v2i32);
449    addDRTypeForNEON(MVT::v1i64);
450
451    addQRTypeForNEON(MVT::v4f32);
452    addQRTypeForNEON(MVT::v2f64);
453    addQRTypeForNEON(MVT::v16i8);
454    addQRTypeForNEON(MVT::v8i16);
455    addQRTypeForNEON(MVT::v4i32);
456    addQRTypeForNEON(MVT::v2i64);
457
458    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
459    // neither Neon nor VFP support any arithmetic operations on it.
460    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
461    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
462    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
463    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
464    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
465    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
466    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
467    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
468    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
469    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
470    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
471    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
472    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
473    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
474    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
475    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
476    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
477    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
478    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
479    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
480    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
481    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
482    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
483    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
484
485    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
486
487    // Neon does not support some operations on v1i64 and v2i64 types.
488    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
489    // Custom handling for some quad-vector types to detect VMULL.
490    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
491    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
492    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
493    // Custom handling for some vector types to avoid expensive expansions
494    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
495    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
496    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
497    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
498    setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
499    setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
500    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
501    // a destination type that is wider than the source.
502    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
503    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
504
505    setTargetDAGCombine(ISD::INTRINSIC_VOID);
506    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
507    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
508    setTargetDAGCombine(ISD::SHL);
509    setTargetDAGCombine(ISD::SRL);
510    setTargetDAGCombine(ISD::SRA);
511    setTargetDAGCombine(ISD::SIGN_EXTEND);
512    setTargetDAGCombine(ISD::ZERO_EXTEND);
513    setTargetDAGCombine(ISD::ANY_EXTEND);
514    setTargetDAGCombine(ISD::SELECT_CC);
515    setTargetDAGCombine(ISD::BUILD_VECTOR);
516    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
517    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
518    setTargetDAGCombine(ISD::STORE);
519    setTargetDAGCombine(ISD::FP_TO_SINT);
520    setTargetDAGCombine(ISD::FP_TO_UINT);
521    setTargetDAGCombine(ISD::FDIV);
522
523    setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
524  }
525
526  computeRegisterProperties();
527
528  // ARM does not have f32 extending load.
529  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
530
531  // ARM does not have i1 sign extending load.
532  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
533
534  // ARM supports all 4 flavors of integer indexed load / store.
535  if (!Subtarget->isThumb1Only()) {
536    for (unsigned im = (unsigned)ISD::PRE_INC;
537         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
538      setIndexedLoadAction(im,  MVT::i1,  Legal);
539      setIndexedLoadAction(im,  MVT::i8,  Legal);
540      setIndexedLoadAction(im,  MVT::i16, Legal);
541      setIndexedLoadAction(im,  MVT::i32, Legal);
542      setIndexedStoreAction(im, MVT::i1,  Legal);
543      setIndexedStoreAction(im, MVT::i8,  Legal);
544      setIndexedStoreAction(im, MVT::i16, Legal);
545      setIndexedStoreAction(im, MVT::i32, Legal);
546    }
547  }
548
549  // i64 operation support.
550  setOperationAction(ISD::MUL,     MVT::i64, Expand);
551  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
552  if (Subtarget->isThumb1Only()) {
553    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
554    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
555  }
556  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
557      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
558    setOperationAction(ISD::MULHS, MVT::i32, Expand);
559
560  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
561  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
562  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
563  setOperationAction(ISD::SRL,       MVT::i64, Custom);
564  setOperationAction(ISD::SRA,       MVT::i64, Custom);
565
566  if (!Subtarget->isThumb1Only()) {
567    // FIXME: We should do this for Thumb1 as well.
568    setOperationAction(ISD::ADDC,    MVT::i32, Custom);
569    setOperationAction(ISD::ADDE,    MVT::i32, Custom);
570    setOperationAction(ISD::SUBC,    MVT::i32, Custom);
571    setOperationAction(ISD::SUBE,    MVT::i32, Custom);
572  }
573
574  // ARM does not have ROTL.
575  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
576  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
577  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
578  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
579    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
580
581  // Only ARMv6 has BSWAP.
582  if (!Subtarget->hasV6Ops())
583    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
584
585  // These are expanded into libcalls.
586  if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
587    // v7M has a hardware divider
588    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
589    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
590  }
591  setOperationAction(ISD::SREM,  MVT::i32, Expand);
592  setOperationAction(ISD::UREM,  MVT::i32, Expand);
593  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
594  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
595
596  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
597  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
598  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
599  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
600  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
601
602  setOperationAction(ISD::TRAP, MVT::Other, Legal);
603
604  // Use the default implementation.
605  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
606  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
607  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
608  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
609  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
610  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
611  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
612  setOperationAction(ISD::EXCEPTIONADDR,      MVT::i32,   Expand);
613  setExceptionPointerRegister(ARM::R0);
614  setExceptionSelectorRegister(ARM::R1);
615
616  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
617  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
618  // the default expansion.
619  // FIXME: This should be checking for v6k, not just v6.
620  if (Subtarget->hasDataBarrier() ||
621      (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
622    // membarrier needs custom lowering; the rest are legal and handled
623    // normally.
624    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
625    setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
626    // Custom lowering for 64-bit ops
627    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
628    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Custom);
629    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
630    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
631    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
632    setOperationAction(ISD::ATOMIC_SWAP,  MVT::i64, Custom);
633    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
634    // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
635    setInsertFencesForAtomic(true);
636  } else {
637    // Set them all for expansion, which will force libcalls.
638    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
639    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
640    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
641    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
642    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
643    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
644    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
645    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
646    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
647    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
648    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
649    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
650    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
651    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
652    // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
653    // Unordered/Monotonic case.
654    setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
655    setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
656    // Since the libcalls include locking, fold in the fences
657    setShouldFoldAtomicFences(true);
658  }
659
660  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
661
662  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
663  if (!Subtarget->hasV6Ops()) {
664    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
665    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
666  }
667  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
668
669  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
670    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
671    // iff target supports vfp2.
672    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
673    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
674  }
675
676  // We want to custom lower some of our intrinsics.
677  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
678  if (Subtarget->isTargetDarwin()) {
679    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
680    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
681    setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
682    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
683  }
684
685  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
686  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
687  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
688  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
689  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
690  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
691  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
692  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
693  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
694
695  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
696  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
697  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
698  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
699  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
700
701  // We don't support sin/cos/fmod/copysign/pow
702  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
703  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
704  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
705  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
706  setOperationAction(ISD::FREM,      MVT::f64, Expand);
707  setOperationAction(ISD::FREM,      MVT::f32, Expand);
708  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
709    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
710    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
711  }
712  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
713  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
714
715  setOperationAction(ISD::FMA, MVT::f64, Expand);
716  setOperationAction(ISD::FMA, MVT::f32, Expand);
717
718  // Various VFP goodness
719  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
720    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
721    if (Subtarget->hasVFP2()) {
722      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
723      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
724      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
725      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
726    }
727    // Special handling for half-precision FP.
728    if (!Subtarget->hasFP16()) {
729      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
730      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
731    }
732  }
733
734  // We have target-specific dag combine patterns for the following nodes:
735  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
736  setTargetDAGCombine(ISD::ADD);
737  setTargetDAGCombine(ISD::SUB);
738  setTargetDAGCombine(ISD::MUL);
739
740  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
741    setTargetDAGCombine(ISD::OR);
742  if (Subtarget->hasNEON())
743    setTargetDAGCombine(ISD::AND);
744
745  setStackPointerRegisterToSaveRestore(ARM::SP);
746
747  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
748    setSchedulingPreference(Sched::RegPressure);
749  else
750    setSchedulingPreference(Sched::Hybrid);
751
752  //// temporary - rewrite interface to use type
753  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
754
755  // On ARM arguments smaller than 4 bytes are extended, so all arguments
756  // are at least 4 bytes aligned.
757  setMinStackArgumentAlignment(4);
758
759  benefitFromCodePlacementOpt = true;
760
761  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
762}
763
764// FIXME: It might make sense to define the representative register class as the
765// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
766// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
767// SPR's representative would be DPR_VFP2. This should work well if register
768// pressure tracking were modified such that a register use would increment the
769// pressure of the register class's representative and all of it's super
770// classes' representatives transitively. We have not implemented this because
771// of the difficulty prior to coalescing of modeling operand register classes
772// due to the common occurrence of cross class copies and subregister insertions
773// and extractions.
774std::pair<const TargetRegisterClass*, uint8_t>
775ARMTargetLowering::findRepresentativeClass(EVT VT) const{
776  const TargetRegisterClass *RRC = 0;
777  uint8_t Cost = 1;
778  switch (VT.getSimpleVT().SimpleTy) {
779  default:
780    return TargetLowering::findRepresentativeClass(VT);
781  // Use DPR as representative register class for all floating point
782  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
783  // the cost is 1 for both f32 and f64.
784  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
785  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
786    RRC = ARM::DPRRegisterClass;
787    // When NEON is used for SP, only half of the register file is available
788    // because operations that define both SP and DP results will be constrained
789    // to the VFP2 class (D0-D15). We currently model this constraint prior to
790    // coalescing by double-counting the SP regs. See the FIXME above.
791    if (Subtarget->useNEONForSinglePrecisionFP())
792      Cost = 2;
793    break;
794  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
795  case MVT::v4f32: case MVT::v2f64:
796    RRC = ARM::DPRRegisterClass;
797    Cost = 2;
798    break;
799  case MVT::v4i64:
800    RRC = ARM::DPRRegisterClass;
801    Cost = 4;
802    break;
803  case MVT::v8i64:
804    RRC = ARM::DPRRegisterClass;
805    Cost = 8;
806    break;
807  }
808  return std::make_pair(RRC, Cost);
809}
810
811const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
812  switch (Opcode) {
813  default: return 0;
814  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
815  case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
816  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
817  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
818  case ARMISD::CALL:          return "ARMISD::CALL";
819  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
820  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
821  case ARMISD::tCALL:         return "ARMISD::tCALL";
822  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
823  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
824  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
825  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
826  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
827  case ARMISD::CMP:           return "ARMISD::CMP";
828  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
829  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
830  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
831  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
832  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
833  case ARMISD::CMOV:          return "ARMISD::CMOV";
834
835  case ARMISD::RBIT:          return "ARMISD::RBIT";
836
837  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
838  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
839  case ARMISD::SITOF:         return "ARMISD::SITOF";
840  case ARMISD::UITOF:         return "ARMISD::UITOF";
841
842  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
843  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
844  case ARMISD::RRX:           return "ARMISD::RRX";
845
846  case ARMISD::ADDC:          return "ARMISD::ADDC";
847  case ARMISD::ADDE:          return "ARMISD::ADDE";
848  case ARMISD::SUBC:          return "ARMISD::SUBC";
849  case ARMISD::SUBE:          return "ARMISD::SUBE";
850
851  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
852  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
853
854  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
855  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
856  case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
857
858  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
859
860  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
861
862  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
863
864  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
865  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
866
867  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
868
869  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
870  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
871  case ARMISD::VCGE:          return "ARMISD::VCGE";
872  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
873  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
874  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
875  case ARMISD::VCGT:          return "ARMISD::VCGT";
876  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
877  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
878  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
879  case ARMISD::VTST:          return "ARMISD::VTST";
880
881  case ARMISD::VSHL:          return "ARMISD::VSHL";
882  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
883  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
884  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
885  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
886  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
887  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
888  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
889  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
890  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
891  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
892  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
893  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
894  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
895  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
896  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
897  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
898  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
899  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
900  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
901  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
902  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
903  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
904  case ARMISD::VDUP:          return "ARMISD::VDUP";
905  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
906  case ARMISD::VEXT:          return "ARMISD::VEXT";
907  case ARMISD::VREV64:        return "ARMISD::VREV64";
908  case ARMISD::VREV32:        return "ARMISD::VREV32";
909  case ARMISD::VREV16:        return "ARMISD::VREV16";
910  case ARMISD::VZIP:          return "ARMISD::VZIP";
911  case ARMISD::VUZP:          return "ARMISD::VUZP";
912  case ARMISD::VTRN:          return "ARMISD::VTRN";
913  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
914  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
915  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
916  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
917  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
918  case ARMISD::FMAX:          return "ARMISD::FMAX";
919  case ARMISD::FMIN:          return "ARMISD::FMIN";
920  case ARMISD::BFI:           return "ARMISD::BFI";
921  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
922  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
923  case ARMISD::VBSL:          return "ARMISD::VBSL";
924  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
925  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
926  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
927  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
928  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
929  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
930  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
931  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
932  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
933  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
934  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
935  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
936  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
937  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
938  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
939  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
940  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
941  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
942  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
943  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
944  }
945}
946
947EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
948  if (!VT.isVector()) return getPointerTy();
949  return VT.changeVectorElementTypeToInteger();
950}
951
952/// getRegClassFor - Return the register class that should be used for the
953/// specified value type.
954TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
955  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
956  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
957  // load / store 4 to 8 consecutive D registers.
958  if (Subtarget->hasNEON()) {
959    if (VT == MVT::v4i64)
960      return ARM::QQPRRegisterClass;
961    else if (VT == MVT::v8i64)
962      return ARM::QQQQPRRegisterClass;
963  }
964  return TargetLowering::getRegClassFor(VT);
965}
966
967// Create a fast isel object.
968FastISel *
969ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
970  return ARM::createFastISel(funcInfo);
971}
972
973/// getMaximalGlobalOffset - Returns the maximal possible offset which can
974/// be used for loads / stores from the global.
975unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
976  return (Subtarget->isThumb1Only() ? 127 : 4095);
977}
978
979Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
980  unsigned NumVals = N->getNumValues();
981  if (!NumVals)
982    return Sched::RegPressure;
983
984  for (unsigned i = 0; i != NumVals; ++i) {
985    EVT VT = N->getValueType(i);
986    if (VT == MVT::Glue || VT == MVT::Other)
987      continue;
988    if (VT.isFloatingPoint() || VT.isVector())
989      return Sched::Latency;
990  }
991
992  if (!N->isMachineOpcode())
993    return Sched::RegPressure;
994
995  // Load are scheduled for latency even if there instruction itinerary
996  // is not available.
997  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
998  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
999
1000  if (MCID.getNumDefs() == 0)
1001    return Sched::RegPressure;
1002  if (!Itins->isEmpty() &&
1003      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1004    return Sched::Latency;
1005
1006  return Sched::RegPressure;
1007}
1008
1009//===----------------------------------------------------------------------===//
1010// Lowering Code
1011//===----------------------------------------------------------------------===//
1012
1013/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1014static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1015  switch (CC) {
1016  default: llvm_unreachable("Unknown condition code!");
1017  case ISD::SETNE:  return ARMCC::NE;
1018  case ISD::SETEQ:  return ARMCC::EQ;
1019  case ISD::SETGT:  return ARMCC::GT;
1020  case ISD::SETGE:  return ARMCC::GE;
1021  case ISD::SETLT:  return ARMCC::LT;
1022  case ISD::SETLE:  return ARMCC::LE;
1023  case ISD::SETUGT: return ARMCC::HI;
1024  case ISD::SETUGE: return ARMCC::HS;
1025  case ISD::SETULT: return ARMCC::LO;
1026  case ISD::SETULE: return ARMCC::LS;
1027  }
1028}
1029
1030/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1031static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1032                        ARMCC::CondCodes &CondCode2) {
1033  CondCode2 = ARMCC::AL;
1034  switch (CC) {
1035  default: llvm_unreachable("Unknown FP condition!");
1036  case ISD::SETEQ:
1037  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1038  case ISD::SETGT:
1039  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1040  case ISD::SETGE:
1041  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1042  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1043  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1044  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1045  case ISD::SETO:   CondCode = ARMCC::VC; break;
1046  case ISD::SETUO:  CondCode = ARMCC::VS; break;
1047  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1048  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1049  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1050  case ISD::SETLT:
1051  case ISD::SETULT: CondCode = ARMCC::LT; break;
1052  case ISD::SETLE:
1053  case ISD::SETULE: CondCode = ARMCC::LE; break;
1054  case ISD::SETNE:
1055  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1056  }
1057}
1058
1059//===----------------------------------------------------------------------===//
1060//                      Calling Convention Implementation
1061//===----------------------------------------------------------------------===//
1062
1063#include "ARMGenCallingConv.inc"
1064
1065/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
1066/// given CallingConvention value.
1067CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1068                                                 bool Return,
1069                                                 bool isVarArg) const {
1070  switch (CC) {
1071  default:
1072    llvm_unreachable("Unsupported calling convention");
1073  case CallingConv::Fast:
1074    if (Subtarget->hasVFP2() && !isVarArg) {
1075      if (!Subtarget->isAAPCS_ABI())
1076        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1077      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1078      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1079    }
1080    // Fallthrough
1081  case CallingConv::C: {
1082    // Use target triple & subtarget features to do actual dispatch.
1083    if (!Subtarget->isAAPCS_ABI())
1084      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1085    else if (Subtarget->hasVFP2() &&
1086             FloatABIType == FloatABI::Hard && !isVarArg)
1087      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1088    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1089  }
1090  case CallingConv::ARM_AAPCS_VFP:
1091    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1092  case CallingConv::ARM_AAPCS:
1093    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1094  case CallingConv::ARM_APCS:
1095    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1096  }
1097}
1098
1099/// LowerCallResult - Lower the result values of a call into the
1100/// appropriate copies out of appropriate physical registers.
1101SDValue
1102ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
1103                                   CallingConv::ID CallConv, bool isVarArg,
1104                                   const SmallVectorImpl<ISD::InputArg> &Ins,
1105                                   DebugLoc dl, SelectionDAG &DAG,
1106                                   SmallVectorImpl<SDValue> &InVals) const {
1107
1108  // Assign locations to each value returned by this call.
1109  SmallVector<CCValAssign, 16> RVLocs;
1110  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1111                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
1112  CCInfo.AnalyzeCallResult(Ins,
1113                           CCAssignFnForNode(CallConv, /* Return*/ true,
1114                                             isVarArg));
1115
1116  // Copy all of the result registers out of their specified physreg.
1117  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1118    CCValAssign VA = RVLocs[i];
1119
1120    SDValue Val;
1121    if (VA.needsCustom()) {
1122      // Handle f64 or half of a v2f64.
1123      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1124                                      InFlag);
1125      Chain = Lo.getValue(1);
1126      InFlag = Lo.getValue(2);
1127      VA = RVLocs[++i]; // skip ahead to next loc
1128      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1129                                      InFlag);
1130      Chain = Hi.getValue(1);
1131      InFlag = Hi.getValue(2);
1132      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1133
1134      if (VA.getLocVT() == MVT::v2f64) {
1135        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1136        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1137                          DAG.getConstant(0, MVT::i32));
1138
1139        VA = RVLocs[++i]; // skip ahead to next loc
1140        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1141        Chain = Lo.getValue(1);
1142        InFlag = Lo.getValue(2);
1143        VA = RVLocs[++i]; // skip ahead to next loc
1144        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1145        Chain = Hi.getValue(1);
1146        InFlag = Hi.getValue(2);
1147        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1148        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1149                          DAG.getConstant(1, MVT::i32));
1150      }
1151    } else {
1152      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1153                               InFlag);
1154      Chain = Val.getValue(1);
1155      InFlag = Val.getValue(2);
1156    }
1157
1158    switch (VA.getLocInfo()) {
1159    default: llvm_unreachable("Unknown loc info!");
1160    case CCValAssign::Full: break;
1161    case CCValAssign::BCvt:
1162      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1163      break;
1164    }
1165
1166    InVals.push_back(Val);
1167  }
1168
1169  return Chain;
1170}
1171
1172/// LowerMemOpCallTo - Store the argument to the stack.
1173SDValue
1174ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1175                                    SDValue StackPtr, SDValue Arg,
1176                                    DebugLoc dl, SelectionDAG &DAG,
1177                                    const CCValAssign &VA,
1178                                    ISD::ArgFlagsTy Flags) const {
1179  unsigned LocMemOffset = VA.getLocMemOffset();
1180  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1181  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1182  return DAG.getStore(Chain, dl, Arg, PtrOff,
1183                      MachinePointerInfo::getStack(LocMemOffset),
1184                      false, false, 0);
1185}
1186
1187void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
1188                                         SDValue Chain, SDValue &Arg,
1189                                         RegsToPassVector &RegsToPass,
1190                                         CCValAssign &VA, CCValAssign &NextVA,
1191                                         SDValue &StackPtr,
1192                                         SmallVector<SDValue, 8> &MemOpChains,
1193                                         ISD::ArgFlagsTy Flags) const {
1194
1195  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1196                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1197  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1198
1199  if (NextVA.isRegLoc())
1200    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
1201  else {
1202    assert(NextVA.isMemLoc());
1203    if (StackPtr.getNode() == 0)
1204      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1205
1206    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1207                                           dl, DAG, NextVA,
1208                                           Flags));
1209  }
1210}
1211
1212/// LowerCall - Lowering a call into a callseq_start <-
1213/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1214/// nodes.
1215SDValue
1216ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1217                             CallingConv::ID CallConv, bool isVarArg,
1218                             bool &isTailCall,
1219                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1220                             const SmallVectorImpl<SDValue> &OutVals,
1221                             const SmallVectorImpl<ISD::InputArg> &Ins,
1222                             DebugLoc dl, SelectionDAG &DAG,
1223                             SmallVectorImpl<SDValue> &InVals) const {
1224  MachineFunction &MF = DAG.getMachineFunction();
1225  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1226  bool IsSibCall = false;
1227  // Disable tail calls if they're not supported.
1228  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
1229    isTailCall = false;
1230  if (isTailCall) {
1231    // Check if it's really possible to do a tail call.
1232    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1233                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1234                                                   Outs, OutVals, Ins, DAG);
1235    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1236    // detected sibcalls.
1237    if (isTailCall) {
1238      ++NumTailCalls;
1239      IsSibCall = true;
1240    }
1241  }
1242
1243  // Analyze operands of the call, assigning locations to each operand.
1244  SmallVector<CCValAssign, 16> ArgLocs;
1245  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1246                 getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1247  CCInfo.AnalyzeCallOperands(Outs,
1248                             CCAssignFnForNode(CallConv, /* Return*/ false,
1249                                               isVarArg));
1250
1251  // Get a count of how many bytes are to be pushed on the stack.
1252  unsigned NumBytes = CCInfo.getNextStackOffset();
1253
1254  // For tail calls, memory operands are available in our caller's stack.
1255  if (IsSibCall)
1256    NumBytes = 0;
1257
1258  // Adjust the stack pointer for the new arguments...
1259  // These operations are automatically eliminated by the prolog/epilog pass
1260  if (!IsSibCall)
1261    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1262
1263  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1264
1265  RegsToPassVector RegsToPass;
1266  SmallVector<SDValue, 8> MemOpChains;
1267
1268  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1269  // of tail call optimization, arguments are handled later.
1270  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1271       i != e;
1272       ++i, ++realArgIdx) {
1273    CCValAssign &VA = ArgLocs[i];
1274    SDValue Arg = OutVals[realArgIdx];
1275    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1276    bool isByVal = Flags.isByVal();
1277
1278    // Promote the value if needed.
1279    switch (VA.getLocInfo()) {
1280    default: llvm_unreachable("Unknown loc info!");
1281    case CCValAssign::Full: break;
1282    case CCValAssign::SExt:
1283      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1284      break;
1285    case CCValAssign::ZExt:
1286      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1287      break;
1288    case CCValAssign::AExt:
1289      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1290      break;
1291    case CCValAssign::BCvt:
1292      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1293      break;
1294    }
1295
1296    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1297    if (VA.needsCustom()) {
1298      if (VA.getLocVT() == MVT::v2f64) {
1299        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1300                                  DAG.getConstant(0, MVT::i32));
1301        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1302                                  DAG.getConstant(1, MVT::i32));
1303
1304        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1305                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1306
1307        VA = ArgLocs[++i]; // skip ahead to next loc
1308        if (VA.isRegLoc()) {
1309          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1310                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1311        } else {
1312          assert(VA.isMemLoc());
1313
1314          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1315                                                 dl, DAG, VA, Flags));
1316        }
1317      } else {
1318        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1319                         StackPtr, MemOpChains, Flags);
1320      }
1321    } else if (VA.isRegLoc()) {
1322      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1323    } else if (isByVal) {
1324      assert(VA.isMemLoc());
1325      unsigned offset = 0;
1326
1327      // True if this byval aggregate will be split between registers
1328      // and memory.
1329      if (CCInfo.isFirstByValRegValid()) {
1330        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1331        unsigned int i, j;
1332        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
1333          SDValue Const = DAG.getConstant(4*i, MVT::i32);
1334          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1335          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1336                                     MachinePointerInfo(),
1337                                     false, false, 0);
1338          MemOpChains.push_back(Load.getValue(1));
1339          RegsToPass.push_back(std::make_pair(j, Load));
1340        }
1341        offset = ARM::R4 - CCInfo.getFirstByValReg();
1342        CCInfo.clearFirstByValReg();
1343      }
1344
1345      unsigned LocMemOffset = VA.getLocMemOffset();
1346      SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
1347      SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
1348                                StkPtrOff);
1349      SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
1350      SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
1351      SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
1352                                         MVT::i32);
1353      // TODO: Disable AlwaysInline when it becomes possible
1354      //       to emit a nested call sequence.
1355      MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
1356                                          Flags.getByValAlign(),
1357                                          /*isVolatile=*/false,
1358                                          /*AlwaysInline=*/true,
1359                                          MachinePointerInfo(0),
1360                                          MachinePointerInfo(0)));
1361
1362    } else if (!IsSibCall) {
1363      assert(VA.isMemLoc());
1364
1365      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1366                                             dl, DAG, VA, Flags));
1367    }
1368  }
1369
1370  if (!MemOpChains.empty())
1371    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1372                        &MemOpChains[0], MemOpChains.size());
1373
1374  // Build a sequence of copy-to-reg nodes chained together with token chain
1375  // and flag operands which copy the outgoing args into the appropriate regs.
1376  SDValue InFlag;
1377  // Tail call byval lowering might overwrite argument registers so in case of
1378  // tail call optimization the copies to registers are lowered later.
1379  if (!isTailCall)
1380    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1381      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1382                               RegsToPass[i].second, InFlag);
1383      InFlag = Chain.getValue(1);
1384    }
1385
1386  // For tail calls lower the arguments to the 'real' stack slot.
1387  if (isTailCall) {
1388    // Force all the incoming stack arguments to be loaded from the stack
1389    // before any new outgoing arguments are stored to the stack, because the
1390    // outgoing stack slots may alias the incoming argument stack slots, and
1391    // the alias isn't otherwise explicit. This is slightly more conservative
1392    // than necessary, because it means that each store effectively depends
1393    // on every argument instead of just those arguments it would clobber.
1394
1395    // Do not flag preceding copytoreg stuff together with the following stuff.
1396    InFlag = SDValue();
1397    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1398      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1399                               RegsToPass[i].second, InFlag);
1400      InFlag = Chain.getValue(1);
1401    }
1402    InFlag =SDValue();
1403  }
1404
1405  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1406  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1407  // node so that legalize doesn't hack it.
1408  bool isDirect = false;
1409  bool isARMFunc = false;
1410  bool isLocalARMFunc = false;
1411  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1412
1413  if (EnableARMLongCalls) {
1414    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1415            && "long-calls with non-static relocation model!");
1416    // Handle a global address or an external symbol. If it's not one of
1417    // those, the target's already in a register, so we don't need to do
1418    // anything extra.
1419    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1420      const GlobalValue *GV = G->getGlobal();
1421      // Create a constant pool entry for the callee address
1422      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1423      ARMConstantPoolValue *CPV =
1424        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1425
1426      // Get the address of the callee into a register
1427      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1428      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1429      Callee = DAG.getLoad(getPointerTy(), dl,
1430                           DAG.getEntryNode(), CPAddr,
1431                           MachinePointerInfo::getConstantPool(),
1432                           false, false, 0);
1433    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1434      const char *Sym = S->getSymbol();
1435
1436      // Create a constant pool entry for the callee address
1437      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1438      ARMConstantPoolValue *CPV =
1439        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1440                                      ARMPCLabelIndex, 0);
1441      // Get the address of the callee into a register
1442      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1443      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1444      Callee = DAG.getLoad(getPointerTy(), dl,
1445                           DAG.getEntryNode(), CPAddr,
1446                           MachinePointerInfo::getConstantPool(),
1447                           false, false, 0);
1448    }
1449  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1450    const GlobalValue *GV = G->getGlobal();
1451    isDirect = true;
1452    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1453    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1454                   getTargetMachine().getRelocationModel() != Reloc::Static;
1455    isARMFunc = !Subtarget->isThumb() || isStub;
1456    // ARM call to a local ARM function is predicable.
1457    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1458    // tBX takes a register source operand.
1459    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1460      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1461      ARMConstantPoolValue *CPV =
1462        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
1463      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1464      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1465      Callee = DAG.getLoad(getPointerTy(), dl,
1466                           DAG.getEntryNode(), CPAddr,
1467                           MachinePointerInfo::getConstantPool(),
1468                           false, false, 0);
1469      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1470      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1471                           getPointerTy(), Callee, PICLabel);
1472    } else {
1473      // On ELF targets for PIC code, direct calls should go through the PLT
1474      unsigned OpFlags = 0;
1475      if (Subtarget->isTargetELF() &&
1476                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1477        OpFlags = ARMII::MO_PLT;
1478      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1479    }
1480  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1481    isDirect = true;
1482    bool isStub = Subtarget->isTargetDarwin() &&
1483                  getTargetMachine().getRelocationModel() != Reloc::Static;
1484    isARMFunc = !Subtarget->isThumb() || isStub;
1485    // tBX takes a register source operand.
1486    const char *Sym = S->getSymbol();
1487    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1488      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1489      ARMConstantPoolValue *CPV =
1490        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1491                                      ARMPCLabelIndex, 4);
1492      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1493      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1494      Callee = DAG.getLoad(getPointerTy(), dl,
1495                           DAG.getEntryNode(), CPAddr,
1496                           MachinePointerInfo::getConstantPool(),
1497                           false, false, 0);
1498      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1499      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1500                           getPointerTy(), Callee, PICLabel);
1501    } else {
1502      unsigned OpFlags = 0;
1503      // On ELF targets for PIC code, direct calls should go through the PLT
1504      if (Subtarget->isTargetELF() &&
1505                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1506        OpFlags = ARMII::MO_PLT;
1507      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1508    }
1509  }
1510
1511  // FIXME: handle tail calls differently.
1512  unsigned CallOpc;
1513  if (Subtarget->isThumb()) {
1514    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1515      CallOpc = ARMISD::CALL_NOLINK;
1516    else
1517      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1518  } else {
1519    CallOpc = (isDirect || Subtarget->hasV5TOps())
1520      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1521      : ARMISD::CALL_NOLINK;
1522  }
1523
1524  std::vector<SDValue> Ops;
1525  Ops.push_back(Chain);
1526  Ops.push_back(Callee);
1527
1528  // Add argument registers to the end of the list so that they are known live
1529  // into the call.
1530  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1531    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1532                                  RegsToPass[i].second.getValueType()));
1533
1534  if (InFlag.getNode())
1535    Ops.push_back(InFlag);
1536
1537  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1538  if (isTailCall)
1539    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1540
1541  // Returns a chain and a flag for retval copy to use.
1542  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1543  InFlag = Chain.getValue(1);
1544
1545  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1546                             DAG.getIntPtrConstant(0, true), InFlag);
1547  if (!Ins.empty())
1548    InFlag = Chain.getValue(1);
1549
1550  // Handle result values, copying them out of physregs into vregs that we
1551  // return.
1552  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1553                         dl, DAG, InVals);
1554}
1555
1556/// HandleByVal - Every parameter *after* a byval parameter is passed
1557/// on the stack.  Remember the next parameter register to allocate,
1558/// and then confiscate the rest of the parameter registers to insure
1559/// this.
1560void
1561llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
1562  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
1563  assert((State->getCallOrPrologue() == Prologue ||
1564          State->getCallOrPrologue() == Call) &&
1565         "unhandled ParmContext");
1566  if ((!State->isFirstByValRegValid()) &&
1567      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
1568    State->setFirstByValReg(reg);
1569    // At a call site, a byval parameter that is split between
1570    // registers and memory needs its size truncated here.  In a
1571    // function prologue, such byval parameters are reassembled in
1572    // memory, and are not truncated.
1573    if (State->getCallOrPrologue() == Call) {
1574      unsigned excess = 4 * (ARM::R4 - reg);
1575      assert(size >= excess && "expected larger existing stack allocation");
1576      size -= excess;
1577    }
1578  }
1579  // Confiscate any remaining parameter registers to preclude their
1580  // assignment to subsequent parameters.
1581  while (State->AllocateReg(GPRArgRegs, 4))
1582    ;
1583}
1584
1585/// MatchingStackOffset - Return true if the given stack call argument is
1586/// already available in the same position (relatively) of the caller's
1587/// incoming argument stack.
1588static
1589bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1590                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1591                         const ARMInstrInfo *TII) {
1592  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1593  int FI = INT_MAX;
1594  if (Arg.getOpcode() == ISD::CopyFromReg) {
1595    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1596    if (!TargetRegisterInfo::isVirtualRegister(VR))
1597      return false;
1598    MachineInstr *Def = MRI->getVRegDef(VR);
1599    if (!Def)
1600      return false;
1601    if (!Flags.isByVal()) {
1602      if (!TII->isLoadFromStackSlot(Def, FI))
1603        return false;
1604    } else {
1605      return false;
1606    }
1607  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1608    if (Flags.isByVal())
1609      // ByVal argument is passed in as a pointer but it's now being
1610      // dereferenced. e.g.
1611      // define @foo(%struct.X* %A) {
1612      //   tail call @bar(%struct.X* byval %A)
1613      // }
1614      return false;
1615    SDValue Ptr = Ld->getBasePtr();
1616    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1617    if (!FINode)
1618      return false;
1619    FI = FINode->getIndex();
1620  } else
1621    return false;
1622
1623  assert(FI != INT_MAX);
1624  if (!MFI->isFixedObjectIndex(FI))
1625    return false;
1626  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1627}
1628
1629/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1630/// for tail call optimization. Targets which want to do tail call
1631/// optimization should implement this function.
1632bool
1633ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1634                                                     CallingConv::ID CalleeCC,
1635                                                     bool isVarArg,
1636                                                     bool isCalleeStructRet,
1637                                                     bool isCallerStructRet,
1638                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1639                                    const SmallVectorImpl<SDValue> &OutVals,
1640                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1641                                                     SelectionDAG& DAG) const {
1642  const Function *CallerF = DAG.getMachineFunction().getFunction();
1643  CallingConv::ID CallerCC = CallerF->getCallingConv();
1644  bool CCMatch = CallerCC == CalleeCC;
1645
1646  // Look for obvious safe cases to perform tail call optimization that do not
1647  // require ABI changes. This is what gcc calls sibcall.
1648
1649  // Do not sibcall optimize vararg calls unless the call site is not passing
1650  // any arguments.
1651  if (isVarArg && !Outs.empty())
1652    return false;
1653
1654  // Also avoid sibcall optimization if either caller or callee uses struct
1655  // return semantics.
1656  if (isCalleeStructRet || isCallerStructRet)
1657    return false;
1658
1659  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1660  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
1661  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
1662  // support in the assembler and linker to be used. This would need to be
1663  // fixed to fully support tail calls in Thumb1.
1664  //
1665  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1666  // LR.  This means if we need to reload LR, it takes an extra instructions,
1667  // which outweighs the value of the tail call; but here we don't know yet
1668  // whether LR is going to be used.  Probably the right approach is to
1669  // generate the tail call here and turn it back into CALL/RET in
1670  // emitEpilogue if LR is used.
1671
1672  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1673  // but we need to make sure there are enough registers; the only valid
1674  // registers are the 4 used for parameters.  We don't currently do this
1675  // case.
1676  if (Subtarget->isThumb1Only())
1677    return false;
1678
1679  // If the calling conventions do not match, then we'd better make sure the
1680  // results are returned in the same way as what the caller expects.
1681  if (!CCMatch) {
1682    SmallVector<CCValAssign, 16> RVLocs1;
1683    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
1684                       getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
1685    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1686
1687    SmallVector<CCValAssign, 16> RVLocs2;
1688    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
1689                       getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
1690    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1691
1692    if (RVLocs1.size() != RVLocs2.size())
1693      return false;
1694    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1695      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1696        return false;
1697      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1698        return false;
1699      if (RVLocs1[i].isRegLoc()) {
1700        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1701          return false;
1702      } else {
1703        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1704          return false;
1705      }
1706    }
1707  }
1708
1709  // If the callee takes no arguments then go on to check the results of the
1710  // call.
1711  if (!Outs.empty()) {
1712    // Check if stack adjustment is needed. For now, do not do this if any
1713    // argument is passed on the stack.
1714    SmallVector<CCValAssign, 16> ArgLocs;
1715    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
1716                      getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1717    CCInfo.AnalyzeCallOperands(Outs,
1718                               CCAssignFnForNode(CalleeCC, false, isVarArg));
1719    if (CCInfo.getNextStackOffset()) {
1720      MachineFunction &MF = DAG.getMachineFunction();
1721
1722      // Check if the arguments are already laid out in the right way as
1723      // the caller's fixed stack objects.
1724      MachineFrameInfo *MFI = MF.getFrameInfo();
1725      const MachineRegisterInfo *MRI = &MF.getRegInfo();
1726      const ARMInstrInfo *TII =
1727        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1728      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1729           i != e;
1730           ++i, ++realArgIdx) {
1731        CCValAssign &VA = ArgLocs[i];
1732        EVT RegVT = VA.getLocVT();
1733        SDValue Arg = OutVals[realArgIdx];
1734        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1735        if (VA.getLocInfo() == CCValAssign::Indirect)
1736          return false;
1737        if (VA.needsCustom()) {
1738          // f64 and vector types are split into multiple registers or
1739          // register/stack-slot combinations.  The types will not match
1740          // the registers; give up on memory f64 refs until we figure
1741          // out what to do about this.
1742          if (!VA.isRegLoc())
1743            return false;
1744          if (!ArgLocs[++i].isRegLoc())
1745            return false;
1746          if (RegVT == MVT::v2f64) {
1747            if (!ArgLocs[++i].isRegLoc())
1748              return false;
1749            if (!ArgLocs[++i].isRegLoc())
1750              return false;
1751          }
1752        } else if (!VA.isRegLoc()) {
1753          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1754                                   MFI, MRI, TII))
1755            return false;
1756        }
1757      }
1758    }
1759  }
1760
1761  return true;
1762}
1763
1764SDValue
1765ARMTargetLowering::LowerReturn(SDValue Chain,
1766                               CallingConv::ID CallConv, bool isVarArg,
1767                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1768                               const SmallVectorImpl<SDValue> &OutVals,
1769                               DebugLoc dl, SelectionDAG &DAG) const {
1770
1771  // CCValAssign - represent the assignment of the return value to a location.
1772  SmallVector<CCValAssign, 16> RVLocs;
1773
1774  // CCState - Info about the registers and stack slots.
1775  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1776                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
1777
1778  // Analyze outgoing return values.
1779  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1780                                               isVarArg));
1781
1782  // If this is the first return lowered for this function, add
1783  // the regs to the liveout set for the function.
1784  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1785    for (unsigned i = 0; i != RVLocs.size(); ++i)
1786      if (RVLocs[i].isRegLoc())
1787        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1788  }
1789
1790  SDValue Flag;
1791
1792  // Copy the result values into the output registers.
1793  for (unsigned i = 0, realRVLocIdx = 0;
1794       i != RVLocs.size();
1795       ++i, ++realRVLocIdx) {
1796    CCValAssign &VA = RVLocs[i];
1797    assert(VA.isRegLoc() && "Can only return in registers!");
1798
1799    SDValue Arg = OutVals[realRVLocIdx];
1800
1801    switch (VA.getLocInfo()) {
1802    default: llvm_unreachable("Unknown loc info!");
1803    case CCValAssign::Full: break;
1804    case CCValAssign::BCvt:
1805      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1806      break;
1807    }
1808
1809    if (VA.needsCustom()) {
1810      if (VA.getLocVT() == MVT::v2f64) {
1811        // Extract the first half and return it in two registers.
1812        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1813                                   DAG.getConstant(0, MVT::i32));
1814        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1815                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
1816
1817        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1818        Flag = Chain.getValue(1);
1819        VA = RVLocs[++i]; // skip ahead to next loc
1820        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1821                                 HalfGPRs.getValue(1), Flag);
1822        Flag = Chain.getValue(1);
1823        VA = RVLocs[++i]; // skip ahead to next loc
1824
1825        // Extract the 2nd half and fall through to handle it as an f64 value.
1826        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1827                          DAG.getConstant(1, MVT::i32));
1828      }
1829      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1830      // available.
1831      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1832                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1833      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1834      Flag = Chain.getValue(1);
1835      VA = RVLocs[++i]; // skip ahead to next loc
1836      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1837                               Flag);
1838    } else
1839      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1840
1841    // Guarantee that all emitted copies are
1842    // stuck together, avoiding something bad.
1843    Flag = Chain.getValue(1);
1844  }
1845
1846  SDValue result;
1847  if (Flag.getNode())
1848    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1849  else // Return Void
1850    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1851
1852  return result;
1853}
1854
1855bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
1856  if (N->getNumValues() != 1)
1857    return false;
1858  if (!N->hasNUsesOfValue(1, 0))
1859    return false;
1860
1861  unsigned NumCopies = 0;
1862  SDNode* Copies[2];
1863  SDNode *Use = *N->use_begin();
1864  if (Use->getOpcode() == ISD::CopyToReg) {
1865    Copies[NumCopies++] = Use;
1866  } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
1867    // f64 returned in a pair of GPRs.
1868    for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
1869         UI != UE; ++UI) {
1870      if (UI->getOpcode() != ISD::CopyToReg)
1871        return false;
1872      Copies[UI.getUse().getResNo()] = *UI;
1873      ++NumCopies;
1874    }
1875  } else if (Use->getOpcode() == ISD::BITCAST) {
1876    // f32 returned in a single GPR.
1877    if (!Use->hasNUsesOfValue(1, 0))
1878      return false;
1879    Use = *Use->use_begin();
1880    if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
1881      return false;
1882    Copies[NumCopies++] = Use;
1883  } else {
1884    return false;
1885  }
1886
1887  if (NumCopies != 1 && NumCopies != 2)
1888    return false;
1889
1890  bool HasRet = false;
1891  for (unsigned i = 0; i < NumCopies; ++i) {
1892    SDNode *Copy = Copies[i];
1893    for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
1894         UI != UE; ++UI) {
1895      if (UI->getOpcode() == ISD::CopyToReg) {
1896        SDNode *Use = *UI;
1897        if (Use == Copies[0] || Use == Copies[1])
1898          continue;
1899        return false;
1900      }
1901      if (UI->getOpcode() != ARMISD::RET_FLAG)
1902        return false;
1903      HasRet = true;
1904    }
1905  }
1906
1907  return HasRet;
1908}
1909
1910bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
1911  if (!EnableARMTailCalls)
1912    return false;
1913
1914  if (!CI->isTailCall())
1915    return false;
1916
1917  return !Subtarget->isThumb1Only();
1918}
1919
1920// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1921// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1922// one of the above mentioned nodes. It has to be wrapped because otherwise
1923// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1924// be used to form addressing mode. These wrapped nodes will be selected
1925// into MOVi.
1926static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1927  EVT PtrVT = Op.getValueType();
1928  // FIXME there is no actual debug info here
1929  DebugLoc dl = Op.getDebugLoc();
1930  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1931  SDValue Res;
1932  if (CP->isMachineConstantPoolEntry())
1933    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1934                                    CP->getAlignment());
1935  else
1936    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1937                                    CP->getAlignment());
1938  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1939}
1940
1941unsigned ARMTargetLowering::getJumpTableEncoding() const {
1942  return MachineJumpTableInfo::EK_Inline;
1943}
1944
1945SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1946                                             SelectionDAG &DAG) const {
1947  MachineFunction &MF = DAG.getMachineFunction();
1948  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1949  unsigned ARMPCLabelIndex = 0;
1950  DebugLoc DL = Op.getDebugLoc();
1951  EVT PtrVT = getPointerTy();
1952  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1953  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1954  SDValue CPAddr;
1955  if (RelocM == Reloc::Static) {
1956    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1957  } else {
1958    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1959    ARMPCLabelIndex = AFI->createPICLabelUId();
1960    ARMConstantPoolValue *CPV =
1961      ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
1962                                      ARMCP::CPBlockAddress, PCAdj);
1963    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1964  }
1965  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1966  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1967                               MachinePointerInfo::getConstantPool(),
1968                               false, false, 0);
1969  if (RelocM == Reloc::Static)
1970    return Result;
1971  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1972  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1973}
1974
1975// Lower ISD::GlobalTLSAddress using the "general dynamic" model
1976SDValue
1977ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1978                                                 SelectionDAG &DAG) const {
1979  DebugLoc dl = GA->getDebugLoc();
1980  EVT PtrVT = getPointerTy();
1981  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1982  MachineFunction &MF = DAG.getMachineFunction();
1983  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1984  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1985  ARMConstantPoolValue *CPV =
1986    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
1987                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
1988  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1989  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1990  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1991                         MachinePointerInfo::getConstantPool(),
1992                         false, false, 0);
1993  SDValue Chain = Argument.getValue(1);
1994
1995  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1996  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1997
1998  // call __tls_get_addr.
1999  ArgListTy Args;
2000  ArgListEntry Entry;
2001  Entry.Node = Argument;
2002  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2003  Args.push_back(Entry);
2004  // FIXME: is there useful debug info available here?
2005  std::pair<SDValue, SDValue> CallResult =
2006    LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
2007                false, false, false, false,
2008                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
2009                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
2010  return CallResult.first;
2011}
2012
2013// Lower ISD::GlobalTLSAddress using the "initial exec" or
2014// "local exec" model.
2015SDValue
2016ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2017                                        SelectionDAG &DAG) const {
2018  const GlobalValue *GV = GA->getGlobal();
2019  DebugLoc dl = GA->getDebugLoc();
2020  SDValue Offset;
2021  SDValue Chain = DAG.getEntryNode();
2022  EVT PtrVT = getPointerTy();
2023  // Get the Thread Pointer
2024  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2025
2026  if (GV->isDeclaration()) {
2027    MachineFunction &MF = DAG.getMachineFunction();
2028    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2029    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2030    // Initial exec model.
2031    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2032    ARMConstantPoolValue *CPV =
2033      ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2034                                      ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2035                                      true);
2036    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2037    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2038    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2039                         MachinePointerInfo::getConstantPool(),
2040                         false, false, 0);
2041    Chain = Offset.getValue(1);
2042
2043    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2044    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2045
2046    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2047                         MachinePointerInfo::getConstantPool(),
2048                         false, false, 0);
2049  } else {
2050    // local exec model
2051    ARMConstantPoolValue *CPV =
2052      ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2053    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2054    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2055    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2056                         MachinePointerInfo::getConstantPool(),
2057                         false, false, 0);
2058  }
2059
2060  // The address of the thread local variable is the add of the thread
2061  // pointer with the offset of the variable.
2062  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2063}
2064
2065SDValue
2066ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2067  // TODO: implement the "local dynamic" model
2068  assert(Subtarget->isTargetELF() &&
2069         "TLS not implemented for non-ELF targets");
2070  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2071  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
2072  // otherwise use the "Local Exec" TLS Model
2073  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
2074    return LowerToTLSGeneralDynamicModel(GA, DAG);
2075  else
2076    return LowerToTLSExecModels(GA, DAG);
2077}
2078
2079SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
2080                                                 SelectionDAG &DAG) const {
2081  EVT PtrVT = getPointerTy();
2082  DebugLoc dl = Op.getDebugLoc();
2083  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2084  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2085  if (RelocM == Reloc::PIC_) {
2086    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
2087    ARMConstantPoolValue *CPV =
2088      ARMConstantPoolConstant::Create(GV,
2089                                      UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
2090    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2091    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2092    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
2093                                 CPAddr,
2094                                 MachinePointerInfo::getConstantPool(),
2095                                 false, false, 0);
2096    SDValue Chain = Result.getValue(1);
2097    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2098    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
2099    if (!UseGOTOFF)
2100      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
2101                           MachinePointerInfo::getGOT(), false, false, 0);
2102    return Result;
2103  }
2104
2105  // If we have T2 ops, we can materialize the address directly via movt/movw
2106  // pair. This is always cheaper in terms of performance, but uses at least 2
2107  // extra bytes.
2108  if (Subtarget->useMovt() &&
2109      !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) {
2110    ++NumMovwMovt;
2111    // FIXME: Once remat is capable of dealing with instructions with register
2112    // operands, expand this into two nodes.
2113    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2114                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2115  } else {
2116    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2117    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2118    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2119                       MachinePointerInfo::getConstantPool(),
2120                       false, false, 0);
2121  }
2122}
2123
2124SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
2125                                                    SelectionDAG &DAG) const {
2126  EVT PtrVT = getPointerTy();
2127  DebugLoc dl = Op.getDebugLoc();
2128  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2129  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2130  MachineFunction &MF = DAG.getMachineFunction();
2131  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2132
2133  // FIXME: Enable this for static codegen when tool issues are fixed.
2134  if (Subtarget->useMovt() && RelocM != Reloc::Static &&
2135      !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) {
2136    ++NumMovwMovt;
2137    // FIXME: Once remat is capable of dealing with instructions with register
2138    // operands, expand this into two nodes.
2139    if (RelocM == Reloc::Static)
2140      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2141                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2142
2143    unsigned Wrapper = (RelocM == Reloc::PIC_)
2144      ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
2145    SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
2146                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2147    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2148      Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
2149                           MachinePointerInfo::getGOT(), false, false, 0);
2150    return Result;
2151  }
2152
2153  unsigned ARMPCLabelIndex = 0;
2154  SDValue CPAddr;
2155  if (RelocM == Reloc::Static) {
2156    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2157  } else {
2158    ARMPCLabelIndex = AFI->createPICLabelUId();
2159    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
2160    ARMConstantPoolValue *CPV =
2161      ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
2162                                      PCAdj);
2163    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2164  }
2165  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2166
2167  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2168                               MachinePointerInfo::getConstantPool(),
2169                               false, false, 0);
2170  SDValue Chain = Result.getValue(1);
2171
2172  if (RelocM == Reloc::PIC_) {
2173    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2174    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2175  }
2176
2177  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2178    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
2179                         false, false, 0);
2180
2181  return Result;
2182}
2183
2184SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
2185                                                    SelectionDAG &DAG) const {
2186  assert(Subtarget->isTargetELF() &&
2187         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2188  MachineFunction &MF = DAG.getMachineFunction();
2189  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2190  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2191  EVT PtrVT = getPointerTy();
2192  DebugLoc dl = Op.getDebugLoc();
2193  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2194  ARMConstantPoolValue *CPV =
2195    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
2196                                  ARMPCLabelIndex, PCAdj);
2197  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2198  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2199  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2200                               MachinePointerInfo::getConstantPool(),
2201                               false, false, 0);
2202  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2203  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2204}
2205
2206SDValue
2207ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
2208  const {
2209  DebugLoc dl = Op.getDebugLoc();
2210  return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
2211                     Op.getOperand(0), Op.getOperand(1));
2212}
2213
2214SDValue
2215ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
2216  DebugLoc dl = Op.getDebugLoc();
2217  SDValue Val = DAG.getConstant(0, MVT::i32);
2218  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
2219                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
2220                     Op.getOperand(1), Val);
2221}
2222
2223SDValue
2224ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
2225  DebugLoc dl = Op.getDebugLoc();
2226  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
2227                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
2228}
2229
2230SDValue
2231ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2232                                          const ARMSubtarget *Subtarget) const {
2233  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2234  DebugLoc dl = Op.getDebugLoc();
2235  switch (IntNo) {
2236  default: return SDValue();    // Don't custom lower most intrinsics.
2237  case Intrinsic::arm_thread_pointer: {
2238    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2239    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2240  }
2241  case Intrinsic::eh_sjlj_lsda: {
2242    MachineFunction &MF = DAG.getMachineFunction();
2243    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2244    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2245    EVT PtrVT = getPointerTy();
2246    DebugLoc dl = Op.getDebugLoc();
2247    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2248    SDValue CPAddr;
2249    unsigned PCAdj = (RelocM != Reloc::PIC_)
2250      ? 0 : (Subtarget->isThumb() ? 4 : 8);
2251    ARMConstantPoolValue *CPV =
2252      ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
2253                                      ARMCP::CPLSDA, PCAdj);
2254    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2255    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2256    SDValue Result =
2257      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2258                  MachinePointerInfo::getConstantPool(),
2259                  false, false, 0);
2260
2261    if (RelocM == Reloc::PIC_) {
2262      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2263      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2264    }
2265    return Result;
2266  }
2267  case Intrinsic::arm_neon_vmulls:
2268  case Intrinsic::arm_neon_vmullu: {
2269    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
2270      ? ARMISD::VMULLs : ARMISD::VMULLu;
2271    return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
2272                       Op.getOperand(1), Op.getOperand(2));
2273  }
2274  }
2275}
2276
2277static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2278                               const ARMSubtarget *Subtarget) {
2279  DebugLoc dl = Op.getDebugLoc();
2280  if (!Subtarget->hasDataBarrier()) {
2281    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2282    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2283    // here.
2284    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2285           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2286    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2287                       DAG.getConstant(0, MVT::i32));
2288  }
2289
2290  SDValue Op5 = Op.getOperand(5);
2291  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
2292  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2293  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2294  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
2295
2296  ARM_MB::MemBOpt DMBOpt;
2297  if (isDeviceBarrier)
2298    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
2299  else
2300    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
2301  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2302                     DAG.getConstant(DMBOpt, MVT::i32));
2303}
2304
2305
2306static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
2307                                 const ARMSubtarget *Subtarget) {
2308  // FIXME: handle "fence singlethread" more efficiently.
2309  DebugLoc dl = Op.getDebugLoc();
2310  if (!Subtarget->hasDataBarrier()) {
2311    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2312    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2313    // here.
2314    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2315           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2316    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2317                       DAG.getConstant(0, MVT::i32));
2318  }
2319
2320  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2321                     DAG.getConstant(ARM_MB::ISH, MVT::i32));
2322}
2323
2324static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
2325                             const ARMSubtarget *Subtarget) {
2326  // ARM pre v5TE and Thumb1 does not have preload instructions.
2327  if (!(Subtarget->isThumb2() ||
2328        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2329    // Just preserve the chain.
2330    return Op.getOperand(0);
2331
2332  DebugLoc dl = Op.getDebugLoc();
2333  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2334  if (!isRead &&
2335      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2336    // ARMv7 with MP extension has PLDW.
2337    return Op.getOperand(0);
2338
2339  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2340  if (Subtarget->isThumb()) {
2341    // Invert the bits.
2342    isRead = ~isRead & 1;
2343    isData = ~isData & 1;
2344  }
2345
2346  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2347                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
2348                     DAG.getConstant(isData, MVT::i32));
2349}
2350
2351static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2352  MachineFunction &MF = DAG.getMachineFunction();
2353  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2354
2355  // vastart just stores the address of the VarArgsFrameIndex slot into the
2356  // memory location argument.
2357  DebugLoc dl = Op.getDebugLoc();
2358  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2359  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2360  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2361  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2362                      MachinePointerInfo(SV), false, false, 0);
2363}
2364
2365SDValue
2366ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2367                                        SDValue &Root, SelectionDAG &DAG,
2368                                        DebugLoc dl) const {
2369  MachineFunction &MF = DAG.getMachineFunction();
2370  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2371
2372  TargetRegisterClass *RC;
2373  if (AFI->isThumb1OnlyFunction())
2374    RC = ARM::tGPRRegisterClass;
2375  else
2376    RC = ARM::GPRRegisterClass;
2377
2378  // Transform the arguments stored in physical registers into virtual ones.
2379  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2380  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2381
2382  SDValue ArgValue2;
2383  if (NextVA.isMemLoc()) {
2384    MachineFrameInfo *MFI = MF.getFrameInfo();
2385    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2386
2387    // Create load node to retrieve arguments from the stack.
2388    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2389    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2390                            MachinePointerInfo::getFixedStack(FI),
2391                            false, false, 0);
2392  } else {
2393    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2394    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2395  }
2396
2397  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2398}
2399
2400void
2401ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
2402                                  unsigned &VARegSize, unsigned &VARegSaveSize)
2403  const {
2404  unsigned NumGPRs;
2405  if (CCInfo.isFirstByValRegValid())
2406    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
2407  else {
2408    unsigned int firstUnalloced;
2409    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
2410                                                sizeof(GPRArgRegs) /
2411                                                sizeof(GPRArgRegs[0]));
2412    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
2413  }
2414
2415  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
2416  VARegSize = NumGPRs * 4;
2417  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2418}
2419
2420// The remaining GPRs hold either the beginning of variable-argument
2421// data, or the beginning of an aggregate passed by value (usuall
2422// byval).  Either way, we allocate stack slots adjacent to the data
2423// provided by our caller, and store the unallocated registers there.
2424// If this is a variadic function, the va_list pointer will begin with
2425// these values; otherwise, this reassembles a (byval) structure that
2426// was split between registers and memory.
2427void
2428ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
2429                                        DebugLoc dl, SDValue &Chain,
2430                                        unsigned ArgOffset) const {
2431  MachineFunction &MF = DAG.getMachineFunction();
2432  MachineFrameInfo *MFI = MF.getFrameInfo();
2433  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2434  unsigned firstRegToSaveIndex;
2435  if (CCInfo.isFirstByValRegValid())
2436    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
2437  else {
2438    firstRegToSaveIndex = CCInfo.getFirstUnallocated
2439      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2440  }
2441
2442  unsigned VARegSize, VARegSaveSize;
2443  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2444  if (VARegSaveSize) {
2445    // If this function is vararg, store any remaining integer argument regs
2446    // to their spots on the stack so that they may be loaded by deferencing
2447    // the result of va_next.
2448    AFI->setVarArgsRegSaveSize(VARegSaveSize);
2449    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
2450                                                     ArgOffset + VARegSaveSize
2451                                                     - VARegSize,
2452                                                     false));
2453    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2454                                    getPointerTy());
2455
2456    SmallVector<SDValue, 4> MemOps;
2457    for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
2458      TargetRegisterClass *RC;
2459      if (AFI->isThumb1OnlyFunction())
2460        RC = ARM::tGPRRegisterClass;
2461      else
2462        RC = ARM::GPRRegisterClass;
2463
2464      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
2465      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2466      SDValue Store =
2467        DAG.getStore(Val.getValue(1), dl, Val, FIN,
2468                 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
2469                     false, false, 0);
2470      MemOps.push_back(Store);
2471      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2472                        DAG.getConstant(4, getPointerTy()));
2473    }
2474    if (!MemOps.empty())
2475      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2476                          &MemOps[0], MemOps.size());
2477  } else
2478    // This will point to the next argument passed via stack.
2479    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2480}
2481
2482SDValue
2483ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2484                                        CallingConv::ID CallConv, bool isVarArg,
2485                                        const SmallVectorImpl<ISD::InputArg>
2486                                          &Ins,
2487                                        DebugLoc dl, SelectionDAG &DAG,
2488                                        SmallVectorImpl<SDValue> &InVals)
2489                                          const {
2490  MachineFunction &MF = DAG.getMachineFunction();
2491  MachineFrameInfo *MFI = MF.getFrameInfo();
2492
2493  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2494
2495  // Assign locations to all of the incoming arguments.
2496  SmallVector<CCValAssign, 16> ArgLocs;
2497  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2498                    getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
2499  CCInfo.AnalyzeFormalArguments(Ins,
2500                                CCAssignFnForNode(CallConv, /* Return*/ false,
2501                                                  isVarArg));
2502
2503  SmallVector<SDValue, 16> ArgValues;
2504  int lastInsIndex = -1;
2505
2506  SDValue ArgValue;
2507  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2508    CCValAssign &VA = ArgLocs[i];
2509
2510    // Arguments stored in registers.
2511    if (VA.isRegLoc()) {
2512      EVT RegVT = VA.getLocVT();
2513
2514      if (VA.needsCustom()) {
2515        // f64 and vector types are split up into multiple registers or
2516        // combinations of registers and stack slots.
2517        if (VA.getLocVT() == MVT::v2f64) {
2518          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2519                                                   Chain, DAG, dl);
2520          VA = ArgLocs[++i]; // skip ahead to next loc
2521          SDValue ArgValue2;
2522          if (VA.isMemLoc()) {
2523            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2524            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2525            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2526                                    MachinePointerInfo::getFixedStack(FI),
2527                                    false, false, 0);
2528          } else {
2529            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2530                                             Chain, DAG, dl);
2531          }
2532          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2533          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2534                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2535          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2536                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2537        } else
2538          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2539
2540      } else {
2541        TargetRegisterClass *RC;
2542
2543        if (RegVT == MVT::f32)
2544          RC = ARM::SPRRegisterClass;
2545        else if (RegVT == MVT::f64)
2546          RC = ARM::DPRRegisterClass;
2547        else if (RegVT == MVT::v2f64)
2548          RC = ARM::QPRRegisterClass;
2549        else if (RegVT == MVT::i32)
2550          RC = (AFI->isThumb1OnlyFunction() ?
2551                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2552        else
2553          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2554
2555        // Transform the arguments in physical registers into virtual ones.
2556        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2557        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2558      }
2559
2560      // If this is an 8 or 16-bit value, it is really passed promoted
2561      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2562      // truncate to the right size.
2563      switch (VA.getLocInfo()) {
2564      default: llvm_unreachable("Unknown loc info!");
2565      case CCValAssign::Full: break;
2566      case CCValAssign::BCvt:
2567        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2568        break;
2569      case CCValAssign::SExt:
2570        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2571                               DAG.getValueType(VA.getValVT()));
2572        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2573        break;
2574      case CCValAssign::ZExt:
2575        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2576                               DAG.getValueType(VA.getValVT()));
2577        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2578        break;
2579      }
2580
2581      InVals.push_back(ArgValue);
2582
2583    } else { // VA.isRegLoc()
2584
2585      // sanity check
2586      assert(VA.isMemLoc());
2587      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2588
2589      int index = ArgLocs[i].getValNo();
2590
2591      // Some Ins[] entries become multiple ArgLoc[] entries.
2592      // Process them only once.
2593      if (index != lastInsIndex)
2594        {
2595          ISD::ArgFlagsTy Flags = Ins[index].Flags;
2596          // FIXME: For now, all byval parameter objects are marked mutable.
2597          // This can be changed with more analysis.
2598          // In case of tail call optimization mark all arguments mutable.
2599          // Since they could be overwritten by lowering of arguments in case of
2600          // a tail call.
2601          if (Flags.isByVal()) {
2602            unsigned VARegSize, VARegSaveSize;
2603            computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2604            VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
2605            unsigned Bytes = Flags.getByValSize() - VARegSize;
2606            if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2607            int FI = MFI->CreateFixedObject(Bytes,
2608                                            VA.getLocMemOffset(), false);
2609            InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
2610          } else {
2611            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
2612                                            VA.getLocMemOffset(), true);
2613
2614            // Create load nodes to retrieve arguments from the stack.
2615            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2616            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2617                                         MachinePointerInfo::getFixedStack(FI),
2618                                         false, false, 0));
2619          }
2620          lastInsIndex = index;
2621        }
2622    }
2623  }
2624
2625  // varargs
2626  if (isVarArg)
2627    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
2628
2629  return Chain;
2630}
2631
2632/// isFloatingPointZero - Return true if this is +0.0.
2633static bool isFloatingPointZero(SDValue Op) {
2634  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2635    return CFP->getValueAPF().isPosZero();
2636  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2637    // Maybe this has already been legalized into the constant pool?
2638    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2639      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2640      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2641        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2642          return CFP->getValueAPF().isPosZero();
2643    }
2644  }
2645  return false;
2646}
2647
2648/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2649/// the given operands.
2650SDValue
2651ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2652                             SDValue &ARMcc, SelectionDAG &DAG,
2653                             DebugLoc dl) const {
2654  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2655    unsigned C = RHSC->getZExtValue();
2656    if (!isLegalICmpImmediate(C)) {
2657      // Constant does not fit, try adjusting it by one?
2658      switch (CC) {
2659      default: break;
2660      case ISD::SETLT:
2661      case ISD::SETGE:
2662        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2663          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2664          RHS = DAG.getConstant(C-1, MVT::i32);
2665        }
2666        break;
2667      case ISD::SETULT:
2668      case ISD::SETUGE:
2669        if (C != 0 && isLegalICmpImmediate(C-1)) {
2670          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2671          RHS = DAG.getConstant(C-1, MVT::i32);
2672        }
2673        break;
2674      case ISD::SETLE:
2675      case ISD::SETGT:
2676        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2677          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2678          RHS = DAG.getConstant(C+1, MVT::i32);
2679        }
2680        break;
2681      case ISD::SETULE:
2682      case ISD::SETUGT:
2683        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2684          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2685          RHS = DAG.getConstant(C+1, MVT::i32);
2686        }
2687        break;
2688      }
2689    }
2690  }
2691
2692  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2693  ARMISD::NodeType CompareType;
2694  switch (CondCode) {
2695  default:
2696    CompareType = ARMISD::CMP;
2697    break;
2698  case ARMCC::EQ:
2699  case ARMCC::NE:
2700    // Uses only Z Flag
2701    CompareType = ARMISD::CMPZ;
2702    break;
2703  }
2704  ARMcc = DAG.getConstant(CondCode, MVT::i32);
2705  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
2706}
2707
2708/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2709SDValue
2710ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2711                             DebugLoc dl) const {
2712  SDValue Cmp;
2713  if (!isFloatingPointZero(RHS))
2714    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
2715  else
2716    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
2717  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
2718}
2719
2720/// duplicateCmp - Glue values can have only one use, so this function
2721/// duplicates a comparison node.
2722SDValue
2723ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
2724  unsigned Opc = Cmp.getOpcode();
2725  DebugLoc DL = Cmp.getDebugLoc();
2726  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
2727    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
2728
2729  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
2730  Cmp = Cmp.getOperand(0);
2731  Opc = Cmp.getOpcode();
2732  if (Opc == ARMISD::CMPFP)
2733    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
2734  else {
2735    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
2736    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
2737  }
2738  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
2739}
2740
2741SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2742  SDValue Cond = Op.getOperand(0);
2743  SDValue SelectTrue = Op.getOperand(1);
2744  SDValue SelectFalse = Op.getOperand(2);
2745  DebugLoc dl = Op.getDebugLoc();
2746
2747  // Convert:
2748  //
2749  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2750  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2751  //
2752  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2753    const ConstantSDNode *CMOVTrue =
2754      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2755    const ConstantSDNode *CMOVFalse =
2756      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2757
2758    if (CMOVTrue && CMOVFalse) {
2759      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2760      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2761
2762      SDValue True;
2763      SDValue False;
2764      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2765        True = SelectTrue;
2766        False = SelectFalse;
2767      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2768        True = SelectFalse;
2769        False = SelectTrue;
2770      }
2771
2772      if (True.getNode() && False.getNode()) {
2773        EVT VT = Op.getValueType();
2774        SDValue ARMcc = Cond.getOperand(2);
2775        SDValue CCR = Cond.getOperand(3);
2776        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
2777        assert(True.getValueType() == VT);
2778        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2779      }
2780    }
2781  }
2782
2783  return DAG.getSelectCC(dl, Cond,
2784                         DAG.getConstant(0, Cond.getValueType()),
2785                         SelectTrue, SelectFalse, ISD::SETNE);
2786}
2787
2788SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2789  EVT VT = Op.getValueType();
2790  SDValue LHS = Op.getOperand(0);
2791  SDValue RHS = Op.getOperand(1);
2792  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2793  SDValue TrueVal = Op.getOperand(2);
2794  SDValue FalseVal = Op.getOperand(3);
2795  DebugLoc dl = Op.getDebugLoc();
2796
2797  if (LHS.getValueType() == MVT::i32) {
2798    SDValue ARMcc;
2799    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2800    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2801    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2802  }
2803
2804  ARMCC::CondCodes CondCode, CondCode2;
2805  FPCCToARMCC(CC, CondCode, CondCode2);
2806
2807  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2808  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2809  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2810  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2811                               ARMcc, CCR, Cmp);
2812  if (CondCode2 != ARMCC::AL) {
2813    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2814    // FIXME: Needs another CMP because flag can have but one use.
2815    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2816    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2817                         Result, TrueVal, ARMcc2, CCR, Cmp2);
2818  }
2819  return Result;
2820}
2821
2822/// canChangeToInt - Given the fp compare operand, return true if it is suitable
2823/// to morph to an integer compare sequence.
2824static bool canChangeToInt(SDValue Op, bool &SeenZero,
2825                           const ARMSubtarget *Subtarget) {
2826  SDNode *N = Op.getNode();
2827  if (!N->hasOneUse())
2828    // Otherwise it requires moving the value from fp to integer registers.
2829    return false;
2830  if (!N->getNumValues())
2831    return false;
2832  EVT VT = Op.getValueType();
2833  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2834    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2835    // vmrs are very slow, e.g. cortex-a8.
2836    return false;
2837
2838  if (isFloatingPointZero(Op)) {
2839    SeenZero = true;
2840    return true;
2841  }
2842  return ISD::isNormalLoad(N);
2843}
2844
2845static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2846  if (isFloatingPointZero(Op))
2847    return DAG.getConstant(0, MVT::i32);
2848
2849  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2850    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2851                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
2852                       Ld->isVolatile(), Ld->isNonTemporal(),
2853                       Ld->getAlignment());
2854
2855  llvm_unreachable("Unknown VFP cmp argument!");
2856}
2857
2858static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2859                           SDValue &RetVal1, SDValue &RetVal2) {
2860  if (isFloatingPointZero(Op)) {
2861    RetVal1 = DAG.getConstant(0, MVT::i32);
2862    RetVal2 = DAG.getConstant(0, MVT::i32);
2863    return;
2864  }
2865
2866  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2867    SDValue Ptr = Ld->getBasePtr();
2868    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2869                          Ld->getChain(), Ptr,
2870                          Ld->getPointerInfo(),
2871                          Ld->isVolatile(), Ld->isNonTemporal(),
2872                          Ld->getAlignment());
2873
2874    EVT PtrType = Ptr.getValueType();
2875    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2876    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2877                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
2878    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2879                          Ld->getChain(), NewPtr,
2880                          Ld->getPointerInfo().getWithOffset(4),
2881                          Ld->isVolatile(), Ld->isNonTemporal(),
2882                          NewAlign);
2883    return;
2884  }
2885
2886  llvm_unreachable("Unknown VFP cmp argument!");
2887}
2888
2889/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2890/// f32 and even f64 comparisons to integer ones.
2891SDValue
2892ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2893  SDValue Chain = Op.getOperand(0);
2894  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2895  SDValue LHS = Op.getOperand(2);
2896  SDValue RHS = Op.getOperand(3);
2897  SDValue Dest = Op.getOperand(4);
2898  DebugLoc dl = Op.getDebugLoc();
2899
2900  bool SeenZero = false;
2901  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
2902      canChangeToInt(RHS, SeenZero, Subtarget) &&
2903      // If one of the operand is zero, it's safe to ignore the NaN case since
2904      // we only care about equality comparisons.
2905      (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
2906    // If unsafe fp math optimization is enabled and there are no other uses of
2907    // the CMP operands, and the condition code is EQ or NE, we can optimize it
2908    // to an integer comparison.
2909    if (CC == ISD::SETOEQ)
2910      CC = ISD::SETEQ;
2911    else if (CC == ISD::SETUNE)
2912      CC = ISD::SETNE;
2913
2914    SDValue ARMcc;
2915    if (LHS.getValueType() == MVT::f32) {
2916      LHS = bitcastf32Toi32(LHS, DAG);
2917      RHS = bitcastf32Toi32(RHS, DAG);
2918      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2919      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2920      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2921                         Chain, Dest, ARMcc, CCR, Cmp);
2922    }
2923
2924    SDValue LHS1, LHS2;
2925    SDValue RHS1, RHS2;
2926    expandf64Toi32(LHS, DAG, LHS1, LHS2);
2927    expandf64Toi32(RHS, DAG, RHS1, RHS2);
2928    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2929    ARMcc = DAG.getConstant(CondCode, MVT::i32);
2930    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
2931    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
2932    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
2933  }
2934
2935  return SDValue();
2936}
2937
2938SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2939  SDValue Chain = Op.getOperand(0);
2940  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2941  SDValue LHS = Op.getOperand(2);
2942  SDValue RHS = Op.getOperand(3);
2943  SDValue Dest = Op.getOperand(4);
2944  DebugLoc dl = Op.getDebugLoc();
2945
2946  if (LHS.getValueType() == MVT::i32) {
2947    SDValue ARMcc;
2948    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2949    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2950    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2951                       Chain, Dest, ARMcc, CCR, Cmp);
2952  }
2953
2954  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2955
2956  if (UnsafeFPMath &&
2957      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2958       CC == ISD::SETNE || CC == ISD::SETUNE)) {
2959    SDValue Result = OptimizeVFPBrcond(Op, DAG);
2960    if (Result.getNode())
2961      return Result;
2962  }
2963
2964  ARMCC::CondCodes CondCode, CondCode2;
2965  FPCCToARMCC(CC, CondCode, CondCode2);
2966
2967  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2968  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2969  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2970  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
2971  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
2972  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2973  if (CondCode2 != ARMCC::AL) {
2974    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
2975    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
2976    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2977  }
2978  return Res;
2979}
2980
2981SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2982  SDValue Chain = Op.getOperand(0);
2983  SDValue Table = Op.getOperand(1);
2984  SDValue Index = Op.getOperand(2);
2985  DebugLoc dl = Op.getDebugLoc();
2986
2987  EVT PTy = getPointerTy();
2988  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2989  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2990  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2991  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2992  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2993  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2994  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2995  if (Subtarget->isThumb2()) {
2996    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2997    // which does another jump to the destination. This also makes it easier
2998    // to translate it to TBB / TBH later.
2999    // FIXME: This might not work if the function is extremely large.
3000    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
3001                       Addr, Op.getOperand(2), JTI, UId);
3002  }
3003  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
3004    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
3005                       MachinePointerInfo::getJumpTable(),
3006                       false, false, 0);
3007    Chain = Addr.getValue(1);
3008    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
3009    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3010  } else {
3011    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
3012                       MachinePointerInfo::getJumpTable(), false, false, 0);
3013    Chain = Addr.getValue(1);
3014    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3015  }
3016}
3017
3018static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3019  DebugLoc dl = Op.getDebugLoc();
3020  unsigned Opc;
3021
3022  switch (Op.getOpcode()) {
3023  default:
3024    assert(0 && "Invalid opcode!");
3025  case ISD::FP_TO_SINT:
3026    Opc = ARMISD::FTOSI;
3027    break;
3028  case ISD::FP_TO_UINT:
3029    Opc = ARMISD::FTOUI;
3030    break;
3031  }
3032  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
3033  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3034}
3035
3036static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3037  EVT VT = Op.getValueType();
3038  DebugLoc dl = Op.getDebugLoc();
3039
3040  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
3041         "Invalid type for custom lowering!");
3042  if (VT != MVT::v4f32)
3043    return DAG.UnrollVectorOp(Op.getNode());
3044
3045  unsigned CastOpc;
3046  unsigned Opc;
3047  switch (Op.getOpcode()) {
3048  default:
3049    assert(0 && "Invalid opcode!");
3050  case ISD::SINT_TO_FP:
3051    CastOpc = ISD::SIGN_EXTEND;
3052    Opc = ISD::SINT_TO_FP;
3053    break;
3054  case ISD::UINT_TO_FP:
3055    CastOpc = ISD::ZERO_EXTEND;
3056    Opc = ISD::UINT_TO_FP;
3057    break;
3058  }
3059
3060  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
3061  return DAG.getNode(Opc, dl, VT, Op);
3062}
3063
3064static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3065  EVT VT = Op.getValueType();
3066  if (VT.isVector())
3067    return LowerVectorINT_TO_FP(Op, DAG);
3068
3069  DebugLoc dl = Op.getDebugLoc();
3070  unsigned Opc;
3071
3072  switch (Op.getOpcode()) {
3073  default:
3074    assert(0 && "Invalid opcode!");
3075  case ISD::SINT_TO_FP:
3076    Opc = ARMISD::SITOF;
3077    break;
3078  case ISD::UINT_TO_FP:
3079    Opc = ARMISD::UITOF;
3080    break;
3081  }
3082
3083  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
3084  return DAG.getNode(Opc, dl, VT, Op);
3085}
3086
3087SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
3088  // Implement fcopysign with a fabs and a conditional fneg.
3089  SDValue Tmp0 = Op.getOperand(0);
3090  SDValue Tmp1 = Op.getOperand(1);
3091  DebugLoc dl = Op.getDebugLoc();
3092  EVT VT = Op.getValueType();
3093  EVT SrcVT = Tmp1.getValueType();
3094  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
3095    Tmp0.getOpcode() == ARMISD::VMOVDRR;
3096  bool UseNEON = !InGPR && Subtarget->hasNEON();
3097
3098  if (UseNEON) {
3099    // Use VBSL to copy the sign bit.
3100    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
3101    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
3102                               DAG.getTargetConstant(EncodedVal, MVT::i32));
3103    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
3104    if (VT == MVT::f64)
3105      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3106                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
3107                         DAG.getConstant(32, MVT::i32));
3108    else /*if (VT == MVT::f32)*/
3109      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
3110    if (SrcVT == MVT::f32) {
3111      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
3112      if (VT == MVT::f64)
3113        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3114                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
3115                           DAG.getConstant(32, MVT::i32));
3116    } else if (VT == MVT::f32)
3117      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
3118                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
3119                         DAG.getConstant(32, MVT::i32));
3120    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
3121    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
3122
3123    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
3124                                            MVT::i32);
3125    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
3126    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
3127                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
3128
3129    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
3130                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
3131                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
3132    if (VT == MVT::f32) {
3133      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
3134      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
3135                        DAG.getConstant(0, MVT::i32));
3136    } else {
3137      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
3138    }
3139
3140    return Res;
3141  }
3142
3143  // Bitcast operand 1 to i32.
3144  if (SrcVT == MVT::f64)
3145    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3146                       &Tmp1, 1).getValue(1);
3147  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
3148
3149  // Or in the signbit with integer operations.
3150  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
3151  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
3152  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
3153  if (VT == MVT::f32) {
3154    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
3155                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
3156    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
3157                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
3158  }
3159
3160  // f64: Or the high part with signbit and then combine two parts.
3161  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3162                     &Tmp0, 1);
3163  SDValue Lo = Tmp0.getValue(0);
3164  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
3165  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
3166  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
3167}
3168
3169SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
3170  MachineFunction &MF = DAG.getMachineFunction();
3171  MachineFrameInfo *MFI = MF.getFrameInfo();
3172  MFI->setReturnAddressIsTaken(true);
3173
3174  EVT VT = Op.getValueType();
3175  DebugLoc dl = Op.getDebugLoc();
3176  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3177  if (Depth) {
3178    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
3179    SDValue Offset = DAG.getConstant(4, MVT::i32);
3180    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
3181                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
3182                       MachinePointerInfo(), false, false, 0);
3183  }
3184
3185  // Return LR, which contains the return address. Mark it an implicit live-in.
3186  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3187  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
3188}
3189
3190SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
3191  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3192  MFI->setFrameAddressIsTaken(true);
3193
3194  EVT VT = Op.getValueType();
3195  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
3196  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3197  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
3198    ? ARM::R7 : ARM::R11;
3199  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
3200  while (Depth--)
3201    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
3202                            MachinePointerInfo(),
3203                            false, false, 0);
3204  return FrameAddr;
3205}
3206
3207/// ExpandBITCAST - If the target supports VFP, this function is called to
3208/// expand a bit convert where either the source or destination type is i64 to
3209/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
3210/// operand type is illegal (e.g., v2f32 for a target that doesn't support
3211/// vectors), since the legalizer won't know what to do with that.
3212static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
3213  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3214  DebugLoc dl = N->getDebugLoc();
3215  SDValue Op = N->getOperand(0);
3216
3217  // This function is only supposed to be called for i64 types, either as the
3218  // source or destination of the bit convert.
3219  EVT SrcVT = Op.getValueType();
3220  EVT DstVT = N->getValueType(0);
3221  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
3222         "ExpandBITCAST called for non-i64 type");
3223
3224  // Turn i64->f64 into VMOVDRR.
3225  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
3226    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3227                             DAG.getConstant(0, MVT::i32));
3228    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3229                             DAG.getConstant(1, MVT::i32));
3230    return DAG.getNode(ISD::BITCAST, dl, DstVT,
3231                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
3232  }
3233
3234  // Turn f64->i64 into VMOVRRD.
3235  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
3236    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3237                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
3238    // Merge the pieces into a single i64 value.
3239    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
3240  }
3241
3242  return SDValue();
3243}
3244
3245/// getZeroVector - Returns a vector of specified type with all zero elements.
3246/// Zero vectors are used to represent vector negation and in those cases
3247/// will be implemented with the NEON VNEG instruction.  However, VNEG does
3248/// not support i64 elements, so sometimes the zero vectors will need to be
3249/// explicitly constructed.  Regardless, use a canonical VMOV to create the
3250/// zero vector.
3251static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
3252  assert(VT.isVector() && "Expected a vector type");
3253  // The canonical modified immediate encoding of a zero vector is....0!
3254  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
3255  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
3256  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
3257  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3258}
3259
3260/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
3261/// i32 values and take a 2 x i32 value to shift plus a shift amount.
3262SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
3263                                                SelectionDAG &DAG) const {
3264  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
3265  EVT VT = Op.getValueType();
3266  unsigned VTBits = VT.getSizeInBits();
3267  DebugLoc dl = Op.getDebugLoc();
3268  SDValue ShOpLo = Op.getOperand(0);
3269  SDValue ShOpHi = Op.getOperand(1);
3270  SDValue ShAmt  = Op.getOperand(2);
3271  SDValue ARMcc;
3272  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
3273
3274  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
3275
3276  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
3277                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
3278  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
3279  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
3280                                   DAG.getConstant(VTBits, MVT::i32));
3281  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
3282  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
3283  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
3284
3285  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3286  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
3287                          ARMcc, DAG, dl);
3288  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
3289  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
3290                           CCR, Cmp);
3291
3292  SDValue Ops[2] = { Lo, Hi };
3293  return DAG.getMergeValues(Ops, 2, dl);
3294}
3295
3296/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
3297/// i32 values and take a 2 x i32 value to shift plus a shift amount.
3298SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
3299                                               SelectionDAG &DAG) const {
3300  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
3301  EVT VT = Op.getValueType();
3302  unsigned VTBits = VT.getSizeInBits();
3303  DebugLoc dl = Op.getDebugLoc();
3304  SDValue ShOpLo = Op.getOperand(0);
3305  SDValue ShOpHi = Op.getOperand(1);
3306  SDValue ShAmt  = Op.getOperand(2);
3307  SDValue ARMcc;
3308
3309  assert(Op.getOpcode() == ISD::SHL_PARTS);
3310  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
3311                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
3312  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
3313  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
3314                                   DAG.getConstant(VTBits, MVT::i32));
3315  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
3316  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
3317
3318  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
3319  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3320  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
3321                          ARMcc, DAG, dl);
3322  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
3323  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
3324                           CCR, Cmp);
3325
3326  SDValue Ops[2] = { Lo, Hi };
3327  return DAG.getMergeValues(Ops, 2, dl);
3328}
3329
3330SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3331                                            SelectionDAG &DAG) const {
3332  // The rounding mode is in bits 23:22 of the FPSCR.
3333  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3334  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3335  // so that the shift + and get folded into a bitfield extract.
3336  DebugLoc dl = Op.getDebugLoc();
3337  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
3338                              DAG.getConstant(Intrinsic::arm_get_fpscr,
3339                                              MVT::i32));
3340  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
3341                                  DAG.getConstant(1U << 22, MVT::i32));
3342  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3343                              DAG.getConstant(22, MVT::i32));
3344  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3345                     DAG.getConstant(3, MVT::i32));
3346}
3347
3348static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
3349                         const ARMSubtarget *ST) {
3350  EVT VT = N->getValueType(0);
3351  DebugLoc dl = N->getDebugLoc();
3352
3353  if (!ST->hasV6T2Ops())
3354    return SDValue();
3355
3356  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
3357  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
3358}
3359
3360static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
3361                          const ARMSubtarget *ST) {
3362  EVT VT = N->getValueType(0);
3363  DebugLoc dl = N->getDebugLoc();
3364
3365  if (!VT.isVector())
3366    return SDValue();
3367
3368  // Lower vector shifts on NEON to use VSHL.
3369  assert(ST->hasNEON() && "unexpected vector shift");
3370
3371  // Left shifts translate directly to the vshiftu intrinsic.
3372  if (N->getOpcode() == ISD::SHL)
3373    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
3374                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
3375                       N->getOperand(0), N->getOperand(1));
3376
3377  assert((N->getOpcode() == ISD::SRA ||
3378          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
3379
3380  // NEON uses the same intrinsics for both left and right shifts.  For
3381  // right shifts, the shift amounts are negative, so negate the vector of
3382  // shift amounts.
3383  EVT ShiftVT = N->getOperand(1).getValueType();
3384  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
3385                                     getZeroVector(ShiftVT, DAG, dl),
3386                                     N->getOperand(1));
3387  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
3388                             Intrinsic::arm_neon_vshifts :
3389                             Intrinsic::arm_neon_vshiftu);
3390  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
3391                     DAG.getConstant(vshiftInt, MVT::i32),
3392                     N->getOperand(0), NegatedCount);
3393}
3394
3395static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
3396                                const ARMSubtarget *ST) {
3397  EVT VT = N->getValueType(0);
3398  DebugLoc dl = N->getDebugLoc();
3399
3400  // We can get here for a node like i32 = ISD::SHL i32, i64
3401  if (VT != MVT::i64)
3402    return SDValue();
3403
3404  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
3405         "Unknown shift to lower!");
3406
3407  // We only lower SRA, SRL of 1 here, all others use generic lowering.
3408  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
3409      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
3410    return SDValue();
3411
3412  // If we are in thumb mode, we don't have RRX.
3413  if (ST->isThumb1Only()) return SDValue();
3414
3415  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
3416  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
3417                           DAG.getConstant(0, MVT::i32));
3418  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
3419                           DAG.getConstant(1, MVT::i32));
3420
3421  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
3422  // captures the result into a carry flag.
3423  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
3424  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
3425
3426  // The low part is an ARMISD::RRX operand, which shifts the carry in.
3427  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
3428
3429  // Merge the pieces into a single i64 value.
3430 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
3431}
3432
3433static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
3434  SDValue TmpOp0, TmpOp1;
3435  bool Invert = false;
3436  bool Swap = false;
3437  unsigned Opc = 0;
3438
3439  SDValue Op0 = Op.getOperand(0);
3440  SDValue Op1 = Op.getOperand(1);
3441  SDValue CC = Op.getOperand(2);
3442  EVT VT = Op.getValueType();
3443  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3444  DebugLoc dl = Op.getDebugLoc();
3445
3446  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
3447    switch (SetCCOpcode) {
3448    default: llvm_unreachable("Illegal FP comparison"); break;
3449    case ISD::SETUNE:
3450    case ISD::SETNE:  Invert = true; // Fallthrough
3451    case ISD::SETOEQ:
3452    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3453    case ISD::SETOLT:
3454    case ISD::SETLT: Swap = true; // Fallthrough
3455    case ISD::SETOGT:
3456    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3457    case ISD::SETOLE:
3458    case ISD::SETLE:  Swap = true; // Fallthrough
3459    case ISD::SETOGE:
3460    case ISD::SETGE: Opc = ARMISD::VCGE; break;
3461    case ISD::SETUGE: Swap = true; // Fallthrough
3462    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
3463    case ISD::SETUGT: Swap = true; // Fallthrough
3464    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
3465    case ISD::SETUEQ: Invert = true; // Fallthrough
3466    case ISD::SETONE:
3467      // Expand this to (OLT | OGT).
3468      TmpOp0 = Op0;
3469      TmpOp1 = Op1;
3470      Opc = ISD::OR;
3471      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3472      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
3473      break;
3474    case ISD::SETUO: Invert = true; // Fallthrough
3475    case ISD::SETO:
3476      // Expand this to (OLT | OGE).
3477      TmpOp0 = Op0;
3478      TmpOp1 = Op1;
3479      Opc = ISD::OR;
3480      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3481      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
3482      break;
3483    }
3484  } else {
3485    // Integer comparisons.
3486    switch (SetCCOpcode) {
3487    default: llvm_unreachable("Illegal integer comparison"); break;
3488    case ISD::SETNE:  Invert = true;
3489    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3490    case ISD::SETLT:  Swap = true;
3491    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3492    case ISD::SETLE:  Swap = true;
3493    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
3494    case ISD::SETULT: Swap = true;
3495    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
3496    case ISD::SETULE: Swap = true;
3497    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
3498    }
3499
3500    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
3501    if (Opc == ARMISD::VCEQ) {
3502
3503      SDValue AndOp;
3504      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3505        AndOp = Op0;
3506      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
3507        AndOp = Op1;
3508
3509      // Ignore bitconvert.
3510      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
3511        AndOp = AndOp.getOperand(0);
3512
3513      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
3514        Opc = ARMISD::VTST;
3515        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
3516        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
3517        Invert = !Invert;
3518      }
3519    }
3520  }
3521
3522  if (Swap)
3523    std::swap(Op0, Op1);
3524
3525  // If one of the operands is a constant vector zero, attempt to fold the
3526  // comparison to a specialized compare-against-zero form.
3527  SDValue SingleOp;
3528  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3529    SingleOp = Op0;
3530  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
3531    if (Opc == ARMISD::VCGE)
3532      Opc = ARMISD::VCLEZ;
3533    else if (Opc == ARMISD::VCGT)
3534      Opc = ARMISD::VCLTZ;
3535    SingleOp = Op1;
3536  }
3537
3538  SDValue Result;
3539  if (SingleOp.getNode()) {
3540    switch (Opc) {
3541    case ARMISD::VCEQ:
3542      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
3543    case ARMISD::VCGE:
3544      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
3545    case ARMISD::VCLEZ:
3546      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
3547    case ARMISD::VCGT:
3548      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
3549    case ARMISD::VCLTZ:
3550      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
3551    default:
3552      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3553    }
3554  } else {
3555     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3556  }
3557
3558  if (Invert)
3559    Result = DAG.getNOT(dl, Result, VT);
3560
3561  return Result;
3562}
3563
3564/// isNEONModifiedImm - Check if the specified splat value corresponds to a
3565/// valid vector constant for a NEON instruction with a "modified immediate"
3566/// operand (e.g., VMOV).  If so, return the encoded value.
3567static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3568                                 unsigned SplatBitSize, SelectionDAG &DAG,
3569                                 EVT &VT, bool is128Bits, NEONModImmType type) {
3570  unsigned OpCmode, Imm;
3571
3572  // SplatBitSize is set to the smallest size that splats the vector, so a
3573  // zero vector will always have SplatBitSize == 8.  However, NEON modified
3574  // immediate instructions others than VMOV do not support the 8-bit encoding
3575  // of a zero vector, and the default encoding of zero is supposed to be the
3576  // 32-bit version.
3577  if (SplatBits == 0)
3578    SplatBitSize = 32;
3579
3580  switch (SplatBitSize) {
3581  case 8:
3582    if (type != VMOVModImm)
3583      return SDValue();
3584    // Any 1-byte value is OK.  Op=0, Cmode=1110.
3585    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3586    OpCmode = 0xe;
3587    Imm = SplatBits;
3588    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3589    break;
3590
3591  case 16:
3592    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3593    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3594    if ((SplatBits & ~0xff) == 0) {
3595      // Value = 0x00nn: Op=x, Cmode=100x.
3596      OpCmode = 0x8;
3597      Imm = SplatBits;
3598      break;
3599    }
3600    if ((SplatBits & ~0xff00) == 0) {
3601      // Value = 0xnn00: Op=x, Cmode=101x.
3602      OpCmode = 0xa;
3603      Imm = SplatBits >> 8;
3604      break;
3605    }
3606    return SDValue();
3607
3608  case 32:
3609    // NEON's 32-bit VMOV supports splat values where:
3610    // * only one byte is nonzero, or
3611    // * the least significant byte is 0xff and the second byte is nonzero, or
3612    // * the least significant 2 bytes are 0xff and the third is nonzero.
3613    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3614    if ((SplatBits & ~0xff) == 0) {
3615      // Value = 0x000000nn: Op=x, Cmode=000x.
3616      OpCmode = 0;
3617      Imm = SplatBits;
3618      break;
3619    }
3620    if ((SplatBits & ~0xff00) == 0) {
3621      // Value = 0x0000nn00: Op=x, Cmode=001x.
3622      OpCmode = 0x2;
3623      Imm = SplatBits >> 8;
3624      break;
3625    }
3626    if ((SplatBits & ~0xff0000) == 0) {
3627      // Value = 0x00nn0000: Op=x, Cmode=010x.
3628      OpCmode = 0x4;
3629      Imm = SplatBits >> 16;
3630      break;
3631    }
3632    if ((SplatBits & ~0xff000000) == 0) {
3633      // Value = 0xnn000000: Op=x, Cmode=011x.
3634      OpCmode = 0x6;
3635      Imm = SplatBits >> 24;
3636      break;
3637    }
3638
3639    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
3640    if (type == OtherModImm) return SDValue();
3641
3642    if ((SplatBits & ~0xffff) == 0 &&
3643        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3644      // Value = 0x0000nnff: Op=x, Cmode=1100.
3645      OpCmode = 0xc;
3646      Imm = SplatBits >> 8;
3647      SplatBits |= 0xff;
3648      break;
3649    }
3650
3651    if ((SplatBits & ~0xffffff) == 0 &&
3652        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3653      // Value = 0x00nnffff: Op=x, Cmode=1101.
3654      OpCmode = 0xd;
3655      Imm = SplatBits >> 16;
3656      SplatBits |= 0xffff;
3657      break;
3658    }
3659
3660    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3661    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3662    // VMOV.I32.  A (very) minor optimization would be to replicate the value
3663    // and fall through here to test for a valid 64-bit splat.  But, then the
3664    // caller would also need to check and handle the change in size.
3665    return SDValue();
3666
3667  case 64: {
3668    if (type != VMOVModImm)
3669      return SDValue();
3670    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3671    uint64_t BitMask = 0xff;
3672    uint64_t Val = 0;
3673    unsigned ImmMask = 1;
3674    Imm = 0;
3675    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3676      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3677        Val |= BitMask;
3678        Imm |= ImmMask;
3679      } else if ((SplatBits & BitMask) != 0) {
3680        return SDValue();
3681      }
3682      BitMask <<= 8;
3683      ImmMask <<= 1;
3684    }
3685    // Op=1, Cmode=1110.
3686    OpCmode = 0x1e;
3687    SplatBits = Val;
3688    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3689    break;
3690  }
3691
3692  default:
3693    llvm_unreachable("unexpected size for isNEONModifiedImm");
3694    return SDValue();
3695  }
3696
3697  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3698  return DAG.getTargetConstant(EncodedVal, MVT::i32);
3699}
3700
3701static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
3702                       bool &ReverseVEXT, unsigned &Imm) {
3703  unsigned NumElts = VT.getVectorNumElements();
3704  ReverseVEXT = false;
3705
3706  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3707  if (M[0] < 0)
3708    return false;
3709
3710  Imm = M[0];
3711
3712  // If this is a VEXT shuffle, the immediate value is the index of the first
3713  // element.  The other shuffle indices must be the successive elements after
3714  // the first one.
3715  unsigned ExpectedElt = Imm;
3716  for (unsigned i = 1; i < NumElts; ++i) {
3717    // Increment the expected index.  If it wraps around, it may still be
3718    // a VEXT but the source vectors must be swapped.
3719    ExpectedElt += 1;
3720    if (ExpectedElt == NumElts * 2) {
3721      ExpectedElt = 0;
3722      ReverseVEXT = true;
3723    }
3724
3725    if (M[i] < 0) continue; // ignore UNDEF indices
3726    if (ExpectedElt != static_cast<unsigned>(M[i]))
3727      return false;
3728  }
3729
3730  // Adjust the index value if the source operands will be swapped.
3731  if (ReverseVEXT)
3732    Imm -= NumElts;
3733
3734  return true;
3735}
3736
3737/// isVREVMask - Check if a vector shuffle corresponds to a VREV
3738/// instruction with the specified blocksize.  (The order of the elements
3739/// within each block of the vector is reversed.)
3740static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
3741                       unsigned BlockSize) {
3742  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3743         "Only possible block sizes for VREV are: 16, 32, 64");
3744
3745  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3746  if (EltSz == 64)
3747    return false;
3748
3749  unsigned NumElts = VT.getVectorNumElements();
3750  unsigned BlockElts = M[0] + 1;
3751  // If the first shuffle index is UNDEF, be optimistic.
3752  if (M[0] < 0)
3753    BlockElts = BlockSize / EltSz;
3754
3755  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3756    return false;
3757
3758  for (unsigned i = 0; i < NumElts; ++i) {
3759    if (M[i] < 0) continue; // ignore UNDEF indices
3760    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3761      return false;
3762  }
3763
3764  return true;
3765}
3766
3767static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
3768  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
3769  // range, then 0 is placed into the resulting vector. So pretty much any mask
3770  // of 8 elements can work here.
3771  return VT == MVT::v8i8 && M.size() == 8;
3772}
3773
3774static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
3775                       unsigned &WhichResult) {
3776  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3777  if (EltSz == 64)
3778    return false;
3779
3780  unsigned NumElts = VT.getVectorNumElements();
3781  WhichResult = (M[0] == 0 ? 0 : 1);
3782  for (unsigned i = 0; i < NumElts; i += 2) {
3783    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3784        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3785      return false;
3786  }
3787  return true;
3788}
3789
3790/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3791/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3792/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3793static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3794                                unsigned &WhichResult) {
3795  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3796  if (EltSz == 64)
3797    return false;
3798
3799  unsigned NumElts = VT.getVectorNumElements();
3800  WhichResult = (M[0] == 0 ? 0 : 1);
3801  for (unsigned i = 0; i < NumElts; i += 2) {
3802    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3803        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
3804      return false;
3805  }
3806  return true;
3807}
3808
3809static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
3810                       unsigned &WhichResult) {
3811  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3812  if (EltSz == 64)
3813    return false;
3814
3815  unsigned NumElts = VT.getVectorNumElements();
3816  WhichResult = (M[0] == 0 ? 0 : 1);
3817  for (unsigned i = 0; i != NumElts; ++i) {
3818    if (M[i] < 0) continue; // ignore UNDEF indices
3819    if ((unsigned) M[i] != 2 * i + WhichResult)
3820      return false;
3821  }
3822
3823  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3824  if (VT.is64BitVector() && EltSz == 32)
3825    return false;
3826
3827  return true;
3828}
3829
3830/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3831/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3832/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3833static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3834                                unsigned &WhichResult) {
3835  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3836  if (EltSz == 64)
3837    return false;
3838
3839  unsigned Half = VT.getVectorNumElements() / 2;
3840  WhichResult = (M[0] == 0 ? 0 : 1);
3841  for (unsigned j = 0; j != 2; ++j) {
3842    unsigned Idx = WhichResult;
3843    for (unsigned i = 0; i != Half; ++i) {
3844      int MIdx = M[i + j * Half];
3845      if (MIdx >= 0 && (unsigned) MIdx != Idx)
3846        return false;
3847      Idx += 2;
3848    }
3849  }
3850
3851  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3852  if (VT.is64BitVector() && EltSz == 32)
3853    return false;
3854
3855  return true;
3856}
3857
3858static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
3859                       unsigned &WhichResult) {
3860  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3861  if (EltSz == 64)
3862    return false;
3863
3864  unsigned NumElts = VT.getVectorNumElements();
3865  WhichResult = (M[0] == 0 ? 0 : 1);
3866  unsigned Idx = WhichResult * NumElts / 2;
3867  for (unsigned i = 0; i != NumElts; i += 2) {
3868    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3869        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
3870      return false;
3871    Idx += 1;
3872  }
3873
3874  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3875  if (VT.is64BitVector() && EltSz == 32)
3876    return false;
3877
3878  return true;
3879}
3880
3881/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3882/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3883/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3884static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3885                                unsigned &WhichResult) {
3886  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3887  if (EltSz == 64)
3888    return false;
3889
3890  unsigned NumElts = VT.getVectorNumElements();
3891  WhichResult = (M[0] == 0 ? 0 : 1);
3892  unsigned Idx = WhichResult * NumElts / 2;
3893  for (unsigned i = 0; i != NumElts; i += 2) {
3894    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3895        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
3896      return false;
3897    Idx += 1;
3898  }
3899
3900  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3901  if (VT.is64BitVector() && EltSz == 32)
3902    return false;
3903
3904  return true;
3905}
3906
3907// If N is an integer constant that can be moved into a register in one
3908// instruction, return an SDValue of such a constant (will become a MOV
3909// instruction).  Otherwise return null.
3910static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
3911                                     const ARMSubtarget *ST, DebugLoc dl) {
3912  uint64_t Val;
3913  if (!isa<ConstantSDNode>(N))
3914    return SDValue();
3915  Val = cast<ConstantSDNode>(N)->getZExtValue();
3916
3917  if (ST->isThumb1Only()) {
3918    if (Val <= 255 || ~Val <= 255)
3919      return DAG.getConstant(Val, MVT::i32);
3920  } else {
3921    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
3922      return DAG.getConstant(Val, MVT::i32);
3923  }
3924  return SDValue();
3925}
3926
3927// If this is a case we can't handle, return null and let the default
3928// expansion code take care of it.
3929SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3930                                             const ARMSubtarget *ST) const {
3931  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3932  DebugLoc dl = Op.getDebugLoc();
3933  EVT VT = Op.getValueType();
3934
3935  APInt SplatBits, SplatUndef;
3936  unsigned SplatBitSize;
3937  bool HasAnyUndefs;
3938  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3939    if (SplatBitSize <= 64) {
3940      // Check if an immediate VMOV works.
3941      EVT VmovVT;
3942      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
3943                                      SplatUndef.getZExtValue(), SplatBitSize,
3944                                      DAG, VmovVT, VT.is128BitVector(),
3945                                      VMOVModImm);
3946      if (Val.getNode()) {
3947        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
3948        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3949      }
3950
3951      // Try an immediate VMVN.
3952      uint64_t NegatedImm = (~SplatBits).getZExtValue();
3953      Val = isNEONModifiedImm(NegatedImm,
3954                                      SplatUndef.getZExtValue(), SplatBitSize,
3955                                      DAG, VmovVT, VT.is128BitVector(),
3956                                      VMVNModImm);
3957      if (Val.getNode()) {
3958        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
3959        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3960      }
3961    }
3962  }
3963
3964  // Scan through the operands to see if only one value is used.
3965  unsigned NumElts = VT.getVectorNumElements();
3966  bool isOnlyLowElement = true;
3967  bool usesOnlyOneValue = true;
3968  bool isConstant = true;
3969  SDValue Value;
3970  for (unsigned i = 0; i < NumElts; ++i) {
3971    SDValue V = Op.getOperand(i);
3972    if (V.getOpcode() == ISD::UNDEF)
3973      continue;
3974    if (i > 0)
3975      isOnlyLowElement = false;
3976    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3977      isConstant = false;
3978
3979    if (!Value.getNode())
3980      Value = V;
3981    else if (V != Value)
3982      usesOnlyOneValue = false;
3983  }
3984
3985  if (!Value.getNode())
3986    return DAG.getUNDEF(VT);
3987
3988  if (isOnlyLowElement)
3989    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3990
3991  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3992
3993  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
3994  // i32 and try again.
3995  if (usesOnlyOneValue && EltSize <= 32) {
3996    if (!isConstant)
3997      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3998    if (VT.getVectorElementType().isFloatingPoint()) {
3999      SmallVector<SDValue, 8> Ops;
4000      for (unsigned i = 0; i < NumElts; ++i)
4001        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
4002                                  Op.getOperand(i)));
4003      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
4004      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
4005      Val = LowerBUILD_VECTOR(Val, DAG, ST);
4006      if (Val.getNode())
4007        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4008    }
4009    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
4010    if (Val.getNode())
4011      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
4012  }
4013
4014  // If all elements are constants and the case above didn't get hit, fall back
4015  // to the default expansion, which will generate a load from the constant
4016  // pool.
4017  if (isConstant)
4018    return SDValue();
4019
4020  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
4021  if (NumElts >= 4) {
4022    SDValue shuffle = ReconstructShuffle(Op, DAG);
4023    if (shuffle != SDValue())
4024      return shuffle;
4025  }
4026
4027  // Vectors with 32- or 64-bit elements can be built by directly assigning
4028  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
4029  // will be legalized.
4030  if (EltSize >= 32) {
4031    // Do the expansion with floating-point types, since that is what the VFP
4032    // registers are defined to use, and since i64 is not legal.
4033    EVT EltVT = EVT::getFloatingPointVT(EltSize);
4034    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
4035    SmallVector<SDValue, 8> Ops;
4036    for (unsigned i = 0; i < NumElts; ++i)
4037      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
4038    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
4039    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4040  }
4041
4042  return SDValue();
4043}
4044
4045// Gather data to see if the operation can be modelled as a
4046// shuffle in combination with VEXTs.
4047SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
4048                                              SelectionDAG &DAG) const {
4049  DebugLoc dl = Op.getDebugLoc();
4050  EVT VT = Op.getValueType();
4051  unsigned NumElts = VT.getVectorNumElements();
4052
4053  SmallVector<SDValue, 2> SourceVecs;
4054  SmallVector<unsigned, 2> MinElts;
4055  SmallVector<unsigned, 2> MaxElts;
4056
4057  for (unsigned i = 0; i < NumElts; ++i) {
4058    SDValue V = Op.getOperand(i);
4059    if (V.getOpcode() == ISD::UNDEF)
4060      continue;
4061    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
4062      // A shuffle can only come from building a vector from various
4063      // elements of other vectors.
4064      return SDValue();
4065    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
4066               VT.getVectorElementType()) {
4067      // This code doesn't know how to handle shuffles where the vector
4068      // element types do not match (this happens because type legalization
4069      // promotes the return type of EXTRACT_VECTOR_ELT).
4070      // FIXME: It might be appropriate to extend this code to handle
4071      // mismatched types.
4072      return SDValue();
4073    }
4074
4075    // Record this extraction against the appropriate vector if possible...
4076    SDValue SourceVec = V.getOperand(0);
4077    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
4078    bool FoundSource = false;
4079    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
4080      if (SourceVecs[j] == SourceVec) {
4081        if (MinElts[j] > EltNo)
4082          MinElts[j] = EltNo;
4083        if (MaxElts[j] < EltNo)
4084          MaxElts[j] = EltNo;
4085        FoundSource = true;
4086        break;
4087      }
4088    }
4089
4090    // Or record a new source if not...
4091    if (!FoundSource) {
4092      SourceVecs.push_back(SourceVec);
4093      MinElts.push_back(EltNo);
4094      MaxElts.push_back(EltNo);
4095    }
4096  }
4097
4098  // Currently only do something sane when at most two source vectors
4099  // involved.
4100  if (SourceVecs.size() > 2)
4101    return SDValue();
4102
4103  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
4104  int VEXTOffsets[2] = {0, 0};
4105
4106  // This loop extracts the usage patterns of the source vectors
4107  // and prepares appropriate SDValues for a shuffle if possible.
4108  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
4109    if (SourceVecs[i].getValueType() == VT) {
4110      // No VEXT necessary
4111      ShuffleSrcs[i] = SourceVecs[i];
4112      VEXTOffsets[i] = 0;
4113      continue;
4114    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
4115      // It probably isn't worth padding out a smaller vector just to
4116      // break it down again in a shuffle.
4117      return SDValue();
4118    }
4119
4120    // Since only 64-bit and 128-bit vectors are legal on ARM and
4121    // we've eliminated the other cases...
4122    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
4123           "unexpected vector sizes in ReconstructShuffle");
4124
4125    if (MaxElts[i] - MinElts[i] >= NumElts) {
4126      // Span too large for a VEXT to cope
4127      return SDValue();
4128    }
4129
4130    if (MinElts[i] >= NumElts) {
4131      // The extraction can just take the second half
4132      VEXTOffsets[i] = NumElts;
4133      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4134                                   SourceVecs[i],
4135                                   DAG.getIntPtrConstant(NumElts));
4136    } else if (MaxElts[i] < NumElts) {
4137      // The extraction can just take the first half
4138      VEXTOffsets[i] = 0;
4139      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4140                                   SourceVecs[i],
4141                                   DAG.getIntPtrConstant(0));
4142    } else {
4143      // An actual VEXT is needed
4144      VEXTOffsets[i] = MinElts[i];
4145      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4146                                     SourceVecs[i],
4147                                     DAG.getIntPtrConstant(0));
4148      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4149                                     SourceVecs[i],
4150                                     DAG.getIntPtrConstant(NumElts));
4151      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
4152                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
4153    }
4154  }
4155
4156  SmallVector<int, 8> Mask;
4157
4158  for (unsigned i = 0; i < NumElts; ++i) {
4159    SDValue Entry = Op.getOperand(i);
4160    if (Entry.getOpcode() == ISD::UNDEF) {
4161      Mask.push_back(-1);
4162      continue;
4163    }
4164
4165    SDValue ExtractVec = Entry.getOperand(0);
4166    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
4167                                          .getOperand(1))->getSExtValue();
4168    if (ExtractVec == SourceVecs[0]) {
4169      Mask.push_back(ExtractElt - VEXTOffsets[0]);
4170    } else {
4171      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
4172    }
4173  }
4174
4175  // Final check before we try to produce nonsense...
4176  if (isShuffleMaskLegal(Mask, VT))
4177    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
4178                                &Mask[0]);
4179
4180  return SDValue();
4181}
4182
4183/// isShuffleMaskLegal - Targets can use this to indicate that they only
4184/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4185/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4186/// are assumed to be legal.
4187bool
4188ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
4189                                      EVT VT) const {
4190  if (VT.getVectorNumElements() == 4 &&
4191      (VT.is128BitVector() || VT.is64BitVector())) {
4192    unsigned PFIndexes[4];
4193    for (unsigned i = 0; i != 4; ++i) {
4194      if (M[i] < 0)
4195        PFIndexes[i] = 8;
4196      else
4197        PFIndexes[i] = M[i];
4198    }
4199
4200    // Compute the index in the perfect shuffle table.
4201    unsigned PFTableIndex =
4202      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4203    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4204    unsigned Cost = (PFEntry >> 30);
4205
4206    if (Cost <= 4)
4207      return true;
4208  }
4209
4210  bool ReverseVEXT;
4211  unsigned Imm, WhichResult;
4212
4213  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4214  return (EltSize >= 32 ||
4215          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
4216          isVREVMask(M, VT, 64) ||
4217          isVREVMask(M, VT, 32) ||
4218          isVREVMask(M, VT, 16) ||
4219          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
4220          isVTBLMask(M, VT) ||
4221          isVTRNMask(M, VT, WhichResult) ||
4222          isVUZPMask(M, VT, WhichResult) ||
4223          isVZIPMask(M, VT, WhichResult) ||
4224          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
4225          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
4226          isVZIP_v_undef_Mask(M, VT, WhichResult));
4227}
4228
4229/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4230/// the specified operations to build the shuffle.
4231static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
4232                                      SDValue RHS, SelectionDAG &DAG,
4233                                      DebugLoc dl) {
4234  unsigned OpNum = (PFEntry >> 26) & 0x0F;
4235  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
4236  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
4237
4238  enum {
4239    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
4240    OP_VREV,
4241    OP_VDUP0,
4242    OP_VDUP1,
4243    OP_VDUP2,
4244    OP_VDUP3,
4245    OP_VEXT1,
4246    OP_VEXT2,
4247    OP_VEXT3,
4248    OP_VUZPL, // VUZP, left result
4249    OP_VUZPR, // VUZP, right result
4250    OP_VZIPL, // VZIP, left result
4251    OP_VZIPR, // VZIP, right result
4252    OP_VTRNL, // VTRN, left result
4253    OP_VTRNR  // VTRN, right result
4254  };
4255
4256  if (OpNum == OP_COPY) {
4257    if (LHSID == (1*9+2)*9+3) return LHS;
4258    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
4259    return RHS;
4260  }
4261
4262  SDValue OpLHS, OpRHS;
4263  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
4264  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
4265  EVT VT = OpLHS.getValueType();
4266
4267  switch (OpNum) {
4268  default: llvm_unreachable("Unknown shuffle opcode!");
4269  case OP_VREV:
4270    // VREV divides the vector in half and swaps within the half.
4271    if (VT.getVectorElementType() == MVT::i32 ||
4272        VT.getVectorElementType() == MVT::f32)
4273      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
4274    // vrev <4 x i16> -> VREV32
4275    if (VT.getVectorElementType() == MVT::i16)
4276      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
4277    // vrev <4 x i8> -> VREV16
4278    assert(VT.getVectorElementType() == MVT::i8);
4279    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
4280  case OP_VDUP0:
4281  case OP_VDUP1:
4282  case OP_VDUP2:
4283  case OP_VDUP3:
4284    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
4285                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
4286  case OP_VEXT1:
4287  case OP_VEXT2:
4288  case OP_VEXT3:
4289    return DAG.getNode(ARMISD::VEXT, dl, VT,
4290                       OpLHS, OpRHS,
4291                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
4292  case OP_VUZPL:
4293  case OP_VUZPR:
4294    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4295                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
4296  case OP_VZIPL:
4297  case OP_VZIPR:
4298    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4299                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
4300  case OP_VTRNL:
4301  case OP_VTRNR:
4302    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4303                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
4304  }
4305}
4306
4307static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
4308                                       SmallVectorImpl<int> &ShuffleMask,
4309                                       SelectionDAG &DAG) {
4310  // Check to see if we can use the VTBL instruction.
4311  SDValue V1 = Op.getOperand(0);
4312  SDValue V2 = Op.getOperand(1);
4313  DebugLoc DL = Op.getDebugLoc();
4314
4315  SmallVector<SDValue, 8> VTBLMask;
4316  for (SmallVectorImpl<int>::iterator
4317         I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
4318    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
4319
4320  if (V2.getNode()->getOpcode() == ISD::UNDEF)
4321    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
4322                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
4323                                   &VTBLMask[0], 8));
4324
4325  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
4326                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
4327                                 &VTBLMask[0], 8));
4328}
4329
4330static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
4331  SDValue V1 = Op.getOperand(0);
4332  SDValue V2 = Op.getOperand(1);
4333  DebugLoc dl = Op.getDebugLoc();
4334  EVT VT = Op.getValueType();
4335  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4336  SmallVector<int, 8> ShuffleMask;
4337
4338  // Convert shuffles that are directly supported on NEON to target-specific
4339  // DAG nodes, instead of keeping them as shuffles and matching them again
4340  // during code selection.  This is more efficient and avoids the possibility
4341  // of inconsistencies between legalization and selection.
4342  // FIXME: floating-point vectors should be canonicalized to integer vectors
4343  // of the same time so that they get CSEd properly.
4344  SVN->getMask(ShuffleMask);
4345
4346  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4347  if (EltSize <= 32) {
4348    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
4349      int Lane = SVN->getSplatIndex();
4350      // If this is undef splat, generate it via "just" vdup, if possible.
4351      if (Lane == -1) Lane = 0;
4352
4353      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4354        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
4355      }
4356      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
4357                         DAG.getConstant(Lane, MVT::i32));
4358    }
4359
4360    bool ReverseVEXT;
4361    unsigned Imm;
4362    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
4363      if (ReverseVEXT)
4364        std::swap(V1, V2);
4365      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
4366                         DAG.getConstant(Imm, MVT::i32));
4367    }
4368
4369    if (isVREVMask(ShuffleMask, VT, 64))
4370      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
4371    if (isVREVMask(ShuffleMask, VT, 32))
4372      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
4373    if (isVREVMask(ShuffleMask, VT, 16))
4374      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
4375
4376    // Check for Neon shuffles that modify both input vectors in place.
4377    // If both results are used, i.e., if there are two shuffles with the same
4378    // source operands and with masks corresponding to both results of one of
4379    // these operations, DAG memoization will ensure that a single node is
4380    // used for both shuffles.
4381    unsigned WhichResult;
4382    if (isVTRNMask(ShuffleMask, VT, WhichResult))
4383      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4384                         V1, V2).getValue(WhichResult);
4385    if (isVUZPMask(ShuffleMask, VT, WhichResult))
4386      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4387                         V1, V2).getValue(WhichResult);
4388    if (isVZIPMask(ShuffleMask, VT, WhichResult))
4389      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4390                         V1, V2).getValue(WhichResult);
4391
4392    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
4393      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4394                         V1, V1).getValue(WhichResult);
4395    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
4396      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4397                         V1, V1).getValue(WhichResult);
4398    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
4399      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4400                         V1, V1).getValue(WhichResult);
4401  }
4402
4403  // If the shuffle is not directly supported and it has 4 elements, use
4404  // the PerfectShuffle-generated table to synthesize it from other shuffles.
4405  unsigned NumElts = VT.getVectorNumElements();
4406  if (NumElts == 4) {
4407    unsigned PFIndexes[4];
4408    for (unsigned i = 0; i != 4; ++i) {
4409      if (ShuffleMask[i] < 0)
4410        PFIndexes[i] = 8;
4411      else
4412        PFIndexes[i] = ShuffleMask[i];
4413    }
4414
4415    // Compute the index in the perfect shuffle table.
4416    unsigned PFTableIndex =
4417      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4418    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4419    unsigned Cost = (PFEntry >> 30);
4420
4421    if (Cost <= 4)
4422      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
4423  }
4424
4425  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
4426  if (EltSize >= 32) {
4427    // Do the expansion with floating-point types, since that is what the VFP
4428    // registers are defined to use, and since i64 is not legal.
4429    EVT EltVT = EVT::getFloatingPointVT(EltSize);
4430    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
4431    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
4432    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
4433    SmallVector<SDValue, 8> Ops;
4434    for (unsigned i = 0; i < NumElts; ++i) {
4435      if (ShuffleMask[i] < 0)
4436        Ops.push_back(DAG.getUNDEF(EltVT));
4437      else
4438        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
4439                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
4440                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
4441                                                  MVT::i32)));
4442    }
4443    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
4444    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4445  }
4446
4447  if (VT == MVT::v8i8) {
4448    SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
4449    if (NewOp.getNode())
4450      return NewOp;
4451  }
4452
4453  return SDValue();
4454}
4455
4456static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
4457  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
4458  SDValue Lane = Op.getOperand(1);
4459  if (!isa<ConstantSDNode>(Lane))
4460    return SDValue();
4461
4462  SDValue Vec = Op.getOperand(0);
4463  if (Op.getValueType() == MVT::i32 &&
4464      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
4465    DebugLoc dl = Op.getDebugLoc();
4466    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
4467  }
4468
4469  return Op;
4470}
4471
4472static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
4473  // The only time a CONCAT_VECTORS operation can have legal types is when
4474  // two 64-bit vectors are concatenated to a 128-bit vector.
4475  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
4476         "unexpected CONCAT_VECTORS");
4477  DebugLoc dl = Op.getDebugLoc();
4478  SDValue Val = DAG.getUNDEF(MVT::v2f64);
4479  SDValue Op0 = Op.getOperand(0);
4480  SDValue Op1 = Op.getOperand(1);
4481  if (Op0.getOpcode() != ISD::UNDEF)
4482    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
4483                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
4484                      DAG.getIntPtrConstant(0));
4485  if (Op1.getOpcode() != ISD::UNDEF)
4486    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
4487                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
4488                      DAG.getIntPtrConstant(1));
4489  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
4490}
4491
4492/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
4493/// element has been zero/sign-extended, depending on the isSigned parameter,
4494/// from an integer type half its size.
4495static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
4496                                   bool isSigned) {
4497  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
4498  EVT VT = N->getValueType(0);
4499  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
4500    SDNode *BVN = N->getOperand(0).getNode();
4501    if (BVN->getValueType(0) != MVT::v4i32 ||
4502        BVN->getOpcode() != ISD::BUILD_VECTOR)
4503      return false;
4504    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4505    unsigned HiElt = 1 - LoElt;
4506    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
4507    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
4508    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
4509    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
4510    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
4511      return false;
4512    if (isSigned) {
4513      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
4514          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
4515        return true;
4516    } else {
4517      if (Hi0->isNullValue() && Hi1->isNullValue())
4518        return true;
4519    }
4520    return false;
4521  }
4522
4523  if (N->getOpcode() != ISD::BUILD_VECTOR)
4524    return false;
4525
4526  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
4527    SDNode *Elt = N->getOperand(i).getNode();
4528    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
4529      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4530      unsigned HalfSize = EltSize / 2;
4531      if (isSigned) {
4532        if (!isIntN(HalfSize, C->getSExtValue()))
4533          return false;
4534      } else {
4535        if (!isUIntN(HalfSize, C->getZExtValue()))
4536          return false;
4537      }
4538      continue;
4539    }
4540    return false;
4541  }
4542
4543  return true;
4544}
4545
4546/// isSignExtended - Check if a node is a vector value that is sign-extended
4547/// or a constant BUILD_VECTOR with sign-extended elements.
4548static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
4549  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
4550    return true;
4551  if (isExtendedBUILD_VECTOR(N, DAG, true))
4552    return true;
4553  return false;
4554}
4555
4556/// isZeroExtended - Check if a node is a vector value that is zero-extended
4557/// or a constant BUILD_VECTOR with zero-extended elements.
4558static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
4559  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
4560    return true;
4561  if (isExtendedBUILD_VECTOR(N, DAG, false))
4562    return true;
4563  return false;
4564}
4565
4566/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
4567/// load, or BUILD_VECTOR with extended elements, return the unextended value.
4568static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
4569  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
4570    return N->getOperand(0);
4571  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
4572    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
4573                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
4574                       LD->isNonTemporal(), LD->getAlignment());
4575  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
4576  // have been legalized as a BITCAST from v4i32.
4577  if (N->getOpcode() == ISD::BITCAST) {
4578    SDNode *BVN = N->getOperand(0).getNode();
4579    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
4580           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
4581    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4582    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
4583                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
4584  }
4585  // Construct a new BUILD_VECTOR with elements truncated to half the size.
4586  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
4587  EVT VT = N->getValueType(0);
4588  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
4589  unsigned NumElts = VT.getVectorNumElements();
4590  MVT TruncVT = MVT::getIntegerVT(EltSize);
4591  SmallVector<SDValue, 8> Ops;
4592  for (unsigned i = 0; i != NumElts; ++i) {
4593    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
4594    const APInt &CInt = C->getAPIntValue();
4595    Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
4596  }
4597  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
4598                     MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
4599}
4600
4601static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
4602  unsigned Opcode = N->getOpcode();
4603  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4604    SDNode *N0 = N->getOperand(0).getNode();
4605    SDNode *N1 = N->getOperand(1).getNode();
4606    return N0->hasOneUse() && N1->hasOneUse() &&
4607      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
4608  }
4609  return false;
4610}
4611
4612static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
4613  unsigned Opcode = N->getOpcode();
4614  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4615    SDNode *N0 = N->getOperand(0).getNode();
4616    SDNode *N1 = N->getOperand(1).getNode();
4617    return N0->hasOneUse() && N1->hasOneUse() &&
4618      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
4619  }
4620  return false;
4621}
4622
4623static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
4624  // Multiplications are only custom-lowered for 128-bit vectors so that
4625  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
4626  EVT VT = Op.getValueType();
4627  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
4628  SDNode *N0 = Op.getOperand(0).getNode();
4629  SDNode *N1 = Op.getOperand(1).getNode();
4630  unsigned NewOpc = 0;
4631  bool isMLA = false;
4632  bool isN0SExt = isSignExtended(N0, DAG);
4633  bool isN1SExt = isSignExtended(N1, DAG);
4634  if (isN0SExt && isN1SExt)
4635    NewOpc = ARMISD::VMULLs;
4636  else {
4637    bool isN0ZExt = isZeroExtended(N0, DAG);
4638    bool isN1ZExt = isZeroExtended(N1, DAG);
4639    if (isN0ZExt && isN1ZExt)
4640      NewOpc = ARMISD::VMULLu;
4641    else if (isN1SExt || isN1ZExt) {
4642      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
4643      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
4644      if (isN1SExt && isAddSubSExt(N0, DAG)) {
4645        NewOpc = ARMISD::VMULLs;
4646        isMLA = true;
4647      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
4648        NewOpc = ARMISD::VMULLu;
4649        isMLA = true;
4650      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
4651        std::swap(N0, N1);
4652        NewOpc = ARMISD::VMULLu;
4653        isMLA = true;
4654      }
4655    }
4656
4657    if (!NewOpc) {
4658      if (VT == MVT::v2i64)
4659        // Fall through to expand this.  It is not legal.
4660        return SDValue();
4661      else
4662        // Other vector multiplications are legal.
4663        return Op;
4664    }
4665  }
4666
4667  // Legalize to a VMULL instruction.
4668  DebugLoc DL = Op.getDebugLoc();
4669  SDValue Op0;
4670  SDValue Op1 = SkipExtension(N1, DAG);
4671  if (!isMLA) {
4672    Op0 = SkipExtension(N0, DAG);
4673    assert(Op0.getValueType().is64BitVector() &&
4674           Op1.getValueType().is64BitVector() &&
4675           "unexpected types for extended operands to VMULL");
4676    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
4677  }
4678
4679  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
4680  // isel lowering to take advantage of no-stall back to back vmul + vmla.
4681  //   vmull q0, d4, d6
4682  //   vmlal q0, d5, d6
4683  // is faster than
4684  //   vaddl q0, d4, d5
4685  //   vmovl q1, d6
4686  //   vmul  q0, q0, q1
4687  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
4688  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
4689  EVT Op1VT = Op1.getValueType();
4690  return DAG.getNode(N0->getOpcode(), DL, VT,
4691                     DAG.getNode(NewOpc, DL, VT,
4692                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
4693                     DAG.getNode(NewOpc, DL, VT,
4694                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
4695}
4696
4697static SDValue
4698LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
4699  // Convert to float
4700  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
4701  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
4702  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
4703  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
4704  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
4705  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
4706  // Get reciprocal estimate.
4707  // float4 recip = vrecpeq_f32(yf);
4708  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4709                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
4710  // Because char has a smaller range than uchar, we can actually get away
4711  // without any newton steps.  This requires that we use a weird bias
4712  // of 0xb000, however (again, this has been exhaustively tested).
4713  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
4714  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
4715  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
4716  Y = DAG.getConstant(0xb000, MVT::i32);
4717  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
4718  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
4719  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
4720  // Convert back to short.
4721  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
4722  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
4723  return X;
4724}
4725
4726static SDValue
4727LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
4728  SDValue N2;
4729  // Convert to float.
4730  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
4731  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
4732  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
4733  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
4734  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
4735  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
4736
4737  // Use reciprocal estimate and one refinement step.
4738  // float4 recip = vrecpeq_f32(yf);
4739  // recip *= vrecpsq_f32(yf, recip);
4740  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4741                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
4742  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4743                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
4744                   N1, N2);
4745  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
4746  // Because short has a smaller range than ushort, we can actually get away
4747  // with only a single newton step.  This requires that we use a weird bias
4748  // of 89, however (again, this has been exhaustively tested).
4749  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
4750  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
4751  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
4752  N1 = DAG.getConstant(0x89, MVT::i32);
4753  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
4754  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
4755  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
4756  // Convert back to integer and return.
4757  // return vmovn_s32(vcvt_s32_f32(result));
4758  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
4759  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
4760  return N0;
4761}
4762
4763static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
4764  EVT VT = Op.getValueType();
4765  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
4766         "unexpected type for custom-lowering ISD::SDIV");
4767
4768  DebugLoc dl = Op.getDebugLoc();
4769  SDValue N0 = Op.getOperand(0);
4770  SDValue N1 = Op.getOperand(1);
4771  SDValue N2, N3;
4772
4773  if (VT == MVT::v8i8) {
4774    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
4775    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
4776
4777    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
4778                     DAG.getIntPtrConstant(4));
4779    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
4780                     DAG.getIntPtrConstant(4));
4781    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
4782                     DAG.getIntPtrConstant(0));
4783    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
4784                     DAG.getIntPtrConstant(0));
4785
4786    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
4787    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
4788
4789    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
4790    N0 = LowerCONCAT_VECTORS(N0, DAG);
4791
4792    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
4793    return N0;
4794  }
4795  return LowerSDIV_v4i16(N0, N1, dl, DAG);
4796}
4797
4798static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
4799  EVT VT = Op.getValueType();
4800  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
4801         "unexpected type for custom-lowering ISD::UDIV");
4802
4803  DebugLoc dl = Op.getDebugLoc();
4804  SDValue N0 = Op.getOperand(0);
4805  SDValue N1 = Op.getOperand(1);
4806  SDValue N2, N3;
4807
4808  if (VT == MVT::v8i8) {
4809    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
4810    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
4811
4812    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
4813                     DAG.getIntPtrConstant(4));
4814    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
4815                     DAG.getIntPtrConstant(4));
4816    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
4817                     DAG.getIntPtrConstant(0));
4818    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
4819                     DAG.getIntPtrConstant(0));
4820
4821    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
4822    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
4823
4824    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
4825    N0 = LowerCONCAT_VECTORS(N0, DAG);
4826
4827    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
4828                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
4829                     N0);
4830    return N0;
4831  }
4832
4833  // v4i16 sdiv ... Convert to float.
4834  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
4835  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
4836  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
4837  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
4838  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
4839  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
4840
4841  // Use reciprocal estimate and two refinement steps.
4842  // float4 recip = vrecpeq_f32(yf);
4843  // recip *= vrecpsq_f32(yf, recip);
4844  // recip *= vrecpsq_f32(yf, recip);
4845  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4846                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
4847  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4848                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
4849                   BN1, N2);
4850  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
4851  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
4852                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
4853                   BN1, N2);
4854  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
4855  // Simply multiplying by the reciprocal estimate can leave us a few ulps
4856  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
4857  // and that it will never cause us to return an answer too large).
4858  // float4 result = as_float4(as_int4(xf*recip) + 2);
4859  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
4860  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
4861  N1 = DAG.getConstant(2, MVT::i32);
4862  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
4863  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
4864  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
4865  // Convert back to integer and return.
4866  // return vmovn_u32(vcvt_s32_f32(result));
4867  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
4868  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
4869  return N0;
4870}
4871
4872static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
4873  EVT VT = Op.getNode()->getValueType(0);
4874  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4875
4876  unsigned Opc;
4877  bool ExtraOp = false;
4878  switch (Op.getOpcode()) {
4879  default: assert(0 && "Invalid code");
4880  case ISD::ADDC: Opc = ARMISD::ADDC; break;
4881  case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
4882  case ISD::SUBC: Opc = ARMISD::SUBC; break;
4883  case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
4884  }
4885
4886  if (!ExtraOp)
4887    return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
4888                       Op.getOperand(1));
4889  return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
4890                     Op.getOperand(1), Op.getOperand(2));
4891}
4892
4893static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
4894  // Monotonic load/store is legal for all targets
4895  if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
4896    return Op;
4897
4898  // Aquire/Release load/store is not legal for targets without a
4899  // dmb or equivalent available.
4900  return SDValue();
4901}
4902
4903
4904static void
4905ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
4906                    SelectionDAG &DAG, unsigned NewOp) {
4907  DebugLoc dl = Node->getDebugLoc();
4908  assert (Node->getValueType(0) == MVT::i64 &&
4909          "Only know how to expand i64 atomics");
4910
4911  SmallVector<SDValue, 6> Ops;
4912  Ops.push_back(Node->getOperand(0)); // Chain
4913  Ops.push_back(Node->getOperand(1)); // Ptr
4914  // Low part of Val1
4915  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
4916                            Node->getOperand(2), DAG.getIntPtrConstant(0)));
4917  // High part of Val1
4918  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
4919                            Node->getOperand(2), DAG.getIntPtrConstant(1)));
4920  if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
4921    // High part of Val1
4922    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
4923                              Node->getOperand(3), DAG.getIntPtrConstant(0)));
4924    // High part of Val2
4925    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
4926                              Node->getOperand(3), DAG.getIntPtrConstant(1)));
4927  }
4928  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
4929  SDValue Result =
4930    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
4931                            cast<MemSDNode>(Node)->getMemOperand());
4932  SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
4933  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
4934  Results.push_back(Result.getValue(2));
4935}
4936
4937SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
4938  switch (Op.getOpcode()) {
4939  default: llvm_unreachable("Don't know how to custom lower this!");
4940  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
4941  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
4942  case ISD::GlobalAddress:
4943    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
4944      LowerGlobalAddressELF(Op, DAG);
4945  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
4946  case ISD::SELECT:        return LowerSELECT(Op, DAG);
4947  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
4948  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
4949  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
4950  case ISD::VASTART:       return LowerVASTART(Op, DAG);
4951  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
4952  case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
4953  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
4954  case ISD::SINT_TO_FP:
4955  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
4956  case ISD::FP_TO_SINT:
4957  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
4958  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
4959  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
4960  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
4961  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
4962  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
4963  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
4964  case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
4965  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
4966                                                               Subtarget);
4967  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
4968  case ISD::SHL:
4969  case ISD::SRL:
4970  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
4971  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
4972  case ISD::SRL_PARTS:
4973  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
4974  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
4975  case ISD::SETCC:         return LowerVSETCC(Op, DAG);
4976  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
4977  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
4978  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4979  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
4980  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
4981  case ISD::MUL:           return LowerMUL(Op, DAG);
4982  case ISD::SDIV:          return LowerSDIV(Op, DAG);
4983  case ISD::UDIV:          return LowerUDIV(Op, DAG);
4984  case ISD::ADDC:
4985  case ISD::ADDE:
4986  case ISD::SUBC:
4987  case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4988  case ISD::ATOMIC_LOAD:
4989  case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
4990  }
4991  return SDValue();
4992}
4993
4994/// ReplaceNodeResults - Replace the results of node with an illegal result
4995/// type with new values built out of custom code.
4996void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
4997                                           SmallVectorImpl<SDValue>&Results,
4998                                           SelectionDAG &DAG) const {
4999  SDValue Res;
5000  switch (N->getOpcode()) {
5001  default:
5002    llvm_unreachable("Don't know how to custom expand this!");
5003    break;
5004  case ISD::BITCAST:
5005    Res = ExpandBITCAST(N, DAG);
5006    break;
5007  case ISD::SRL:
5008  case ISD::SRA:
5009    Res = Expand64BitShift(N, DAG, Subtarget);
5010    break;
5011  case ISD::ATOMIC_LOAD_ADD:
5012    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
5013    return;
5014  case ISD::ATOMIC_LOAD_AND:
5015    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
5016    return;
5017  case ISD::ATOMIC_LOAD_NAND:
5018    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
5019    return;
5020  case ISD::ATOMIC_LOAD_OR:
5021    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
5022    return;
5023  case ISD::ATOMIC_LOAD_SUB:
5024    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
5025    return;
5026  case ISD::ATOMIC_LOAD_XOR:
5027    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
5028    return;
5029  case ISD::ATOMIC_SWAP:
5030    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
5031    return;
5032  case ISD::ATOMIC_CMP_SWAP:
5033    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
5034    return;
5035  }
5036  if (Res.getNode())
5037    Results.push_back(Res);
5038}
5039
5040//===----------------------------------------------------------------------===//
5041//                           ARM Scheduler Hooks
5042//===----------------------------------------------------------------------===//
5043
5044MachineBasicBlock *
5045ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
5046                                     MachineBasicBlock *BB,
5047                                     unsigned Size) const {
5048  unsigned dest    = MI->getOperand(0).getReg();
5049  unsigned ptr     = MI->getOperand(1).getReg();
5050  unsigned oldval  = MI->getOperand(2).getReg();
5051  unsigned newval  = MI->getOperand(3).getReg();
5052  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5053  DebugLoc dl = MI->getDebugLoc();
5054  bool isThumb2 = Subtarget->isThumb2();
5055
5056  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5057  unsigned scratch =
5058    MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass
5059                                       : ARM::GPRRegisterClass);
5060
5061  if (isThumb2) {
5062    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
5063    MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass);
5064    MRI.constrainRegClass(newval, ARM::rGPRRegisterClass);
5065  }
5066
5067  unsigned ldrOpc, strOpc;
5068  switch (Size) {
5069  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5070  case 1:
5071    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5072    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5073    break;
5074  case 2:
5075    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5076    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5077    break;
5078  case 4:
5079    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5080    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5081    break;
5082  }
5083
5084  MachineFunction *MF = BB->getParent();
5085  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5086  MachineFunction::iterator It = BB;
5087  ++It; // insert the new blocks after the current block
5088
5089  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
5090  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
5091  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5092  MF->insert(It, loop1MBB);
5093  MF->insert(It, loop2MBB);
5094  MF->insert(It, exitMBB);
5095
5096  // Transfer the remainder of BB and its successor edges to exitMBB.
5097  exitMBB->splice(exitMBB->begin(), BB,
5098                  llvm::next(MachineBasicBlock::iterator(MI)),
5099                  BB->end());
5100  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5101
5102  //  thisMBB:
5103  //   ...
5104  //   fallthrough --> loop1MBB
5105  BB->addSuccessor(loop1MBB);
5106
5107  // loop1MBB:
5108  //   ldrex dest, [ptr]
5109  //   cmp dest, oldval
5110  //   bne exitMBB
5111  BB = loop1MBB;
5112  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5113  if (ldrOpc == ARM::t2LDREX)
5114    MIB.addImm(0);
5115  AddDefaultPred(MIB);
5116  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
5117                 .addReg(dest).addReg(oldval));
5118  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5119    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5120  BB->addSuccessor(loop2MBB);
5121  BB->addSuccessor(exitMBB);
5122
5123  // loop2MBB:
5124  //   strex scratch, newval, [ptr]
5125  //   cmp scratch, #0
5126  //   bne loop1MBB
5127  BB = loop2MBB;
5128  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
5129  if (strOpc == ARM::t2STREX)
5130    MIB.addImm(0);
5131  AddDefaultPred(MIB);
5132  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5133                 .addReg(scratch).addImm(0));
5134  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5135    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5136  BB->addSuccessor(loop1MBB);
5137  BB->addSuccessor(exitMBB);
5138
5139  //  exitMBB:
5140  //   ...
5141  BB = exitMBB;
5142
5143  MI->eraseFromParent();   // The instruction is gone now.
5144
5145  return BB;
5146}
5147
5148MachineBasicBlock *
5149ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
5150                                    unsigned Size, unsigned BinOpcode) const {
5151  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5152  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5153
5154  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5155  MachineFunction *MF = BB->getParent();
5156  MachineFunction::iterator It = BB;
5157  ++It;
5158
5159  unsigned dest = MI->getOperand(0).getReg();
5160  unsigned ptr = MI->getOperand(1).getReg();
5161  unsigned incr = MI->getOperand(2).getReg();
5162  DebugLoc dl = MI->getDebugLoc();
5163  bool isThumb2 = Subtarget->isThumb2();
5164
5165  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5166  if (isThumb2) {
5167    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
5168    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
5169  }
5170
5171  unsigned ldrOpc, strOpc;
5172  switch (Size) {
5173  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5174  case 1:
5175    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5176    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5177    break;
5178  case 2:
5179    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5180    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5181    break;
5182  case 4:
5183    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5184    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5185    break;
5186  }
5187
5188  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5189  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5190  MF->insert(It, loopMBB);
5191  MF->insert(It, exitMBB);
5192
5193  // Transfer the remainder of BB and its successor edges to exitMBB.
5194  exitMBB->splice(exitMBB->begin(), BB,
5195                  llvm::next(MachineBasicBlock::iterator(MI)),
5196                  BB->end());
5197  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5198
5199  TargetRegisterClass *TRC =
5200    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5201  unsigned scratch = MRI.createVirtualRegister(TRC);
5202  unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
5203
5204  //  thisMBB:
5205  //   ...
5206  //   fallthrough --> loopMBB
5207  BB->addSuccessor(loopMBB);
5208
5209  //  loopMBB:
5210  //   ldrex dest, ptr
5211  //   <binop> scratch2, dest, incr
5212  //   strex scratch, scratch2, ptr
5213  //   cmp scratch, #0
5214  //   bne- loopMBB
5215  //   fallthrough --> exitMBB
5216  BB = loopMBB;
5217  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5218  if (ldrOpc == ARM::t2LDREX)
5219    MIB.addImm(0);
5220  AddDefaultPred(MIB);
5221  if (BinOpcode) {
5222    // operand order needs to go the other way for NAND
5223    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
5224      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
5225                     addReg(incr).addReg(dest)).addReg(0);
5226    else
5227      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
5228                     addReg(dest).addReg(incr)).addReg(0);
5229  }
5230
5231  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
5232  if (strOpc == ARM::t2STREX)
5233    MIB.addImm(0);
5234  AddDefaultPred(MIB);
5235  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5236                 .addReg(scratch).addImm(0));
5237  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5238    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5239
5240  BB->addSuccessor(loopMBB);
5241  BB->addSuccessor(exitMBB);
5242
5243  //  exitMBB:
5244  //   ...
5245  BB = exitMBB;
5246
5247  MI->eraseFromParent();   // The instruction is gone now.
5248
5249  return BB;
5250}
5251
5252MachineBasicBlock *
5253ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
5254                                          MachineBasicBlock *BB,
5255                                          unsigned Size,
5256                                          bool signExtend,
5257                                          ARMCC::CondCodes Cond) const {
5258  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5259
5260  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5261  MachineFunction *MF = BB->getParent();
5262  MachineFunction::iterator It = BB;
5263  ++It;
5264
5265  unsigned dest = MI->getOperand(0).getReg();
5266  unsigned ptr = MI->getOperand(1).getReg();
5267  unsigned incr = MI->getOperand(2).getReg();
5268  unsigned oldval = dest;
5269  DebugLoc dl = MI->getDebugLoc();
5270  bool isThumb2 = Subtarget->isThumb2();
5271
5272  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5273  if (isThumb2) {
5274    MRI.constrainRegClass(dest, ARM::rGPRRegisterClass);
5275    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
5276  }
5277
5278  unsigned ldrOpc, strOpc, extendOpc;
5279  switch (Size) {
5280  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5281  case 1:
5282    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5283    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5284    extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
5285    break;
5286  case 2:
5287    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5288    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5289    extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
5290    break;
5291  case 4:
5292    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5293    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5294    extendOpc = 0;
5295    break;
5296  }
5297
5298  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5299  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5300  MF->insert(It, loopMBB);
5301  MF->insert(It, exitMBB);
5302
5303  // Transfer the remainder of BB and its successor edges to exitMBB.
5304  exitMBB->splice(exitMBB->begin(), BB,
5305                  llvm::next(MachineBasicBlock::iterator(MI)),
5306                  BB->end());
5307  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5308
5309  TargetRegisterClass *TRC =
5310    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5311  unsigned scratch = MRI.createVirtualRegister(TRC);
5312  unsigned scratch2 = MRI.createVirtualRegister(TRC);
5313
5314  //  thisMBB:
5315  //   ...
5316  //   fallthrough --> loopMBB
5317  BB->addSuccessor(loopMBB);
5318
5319  //  loopMBB:
5320  //   ldrex dest, ptr
5321  //   (sign extend dest, if required)
5322  //   cmp dest, incr
5323  //   cmov.cond scratch2, dest, incr
5324  //   strex scratch, scratch2, ptr
5325  //   cmp scratch, #0
5326  //   bne- loopMBB
5327  //   fallthrough --> exitMBB
5328  BB = loopMBB;
5329  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5330  if (ldrOpc == ARM::t2LDREX)
5331    MIB.addImm(0);
5332  AddDefaultPred(MIB);
5333
5334  // Sign extend the value, if necessary.
5335  if (signExtend && extendOpc) {
5336    oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass);
5337    AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
5338                     .addReg(dest)
5339                     .addImm(0));
5340  }
5341
5342  // Build compare and cmov instructions.
5343  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
5344                 .addReg(oldval).addReg(incr));
5345  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
5346         .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
5347
5348  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
5349  if (strOpc == ARM::t2STREX)
5350    MIB.addImm(0);
5351  AddDefaultPred(MIB);
5352  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5353                 .addReg(scratch).addImm(0));
5354  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5355    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5356
5357  BB->addSuccessor(loopMBB);
5358  BB->addSuccessor(exitMBB);
5359
5360  //  exitMBB:
5361  //   ...
5362  BB = exitMBB;
5363
5364  MI->eraseFromParent();   // The instruction is gone now.
5365
5366  return BB;
5367}
5368
5369MachineBasicBlock *
5370ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
5371                                      unsigned Op1, unsigned Op2,
5372                                      bool NeedsCarry, bool IsCmpxchg) const {
5373  // This also handles ATOMIC_SWAP, indicated by Op1==0.
5374  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5375
5376  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5377  MachineFunction *MF = BB->getParent();
5378  MachineFunction::iterator It = BB;
5379  ++It;
5380
5381  unsigned destlo = MI->getOperand(0).getReg();
5382  unsigned desthi = MI->getOperand(1).getReg();
5383  unsigned ptr = MI->getOperand(2).getReg();
5384  unsigned vallo = MI->getOperand(3).getReg();
5385  unsigned valhi = MI->getOperand(4).getReg();
5386  DebugLoc dl = MI->getDebugLoc();
5387  bool isThumb2 = Subtarget->isThumb2();
5388
5389  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5390  if (isThumb2) {
5391    MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
5392    MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
5393    MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
5394  }
5395
5396  unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
5397  unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
5398
5399  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5400  MachineBasicBlock *contBB = 0, *cont2BB = 0;
5401  if (IsCmpxchg) {
5402    contBB = MF->CreateMachineBasicBlock(LLVM_BB);
5403    cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
5404  }
5405  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5406  MF->insert(It, loopMBB);
5407  if (IsCmpxchg) {
5408    MF->insert(It, contBB);
5409    MF->insert(It, cont2BB);
5410  }
5411  MF->insert(It, exitMBB);
5412
5413  // Transfer the remainder of BB and its successor edges to exitMBB.
5414  exitMBB->splice(exitMBB->begin(), BB,
5415                  llvm::next(MachineBasicBlock::iterator(MI)),
5416                  BB->end());
5417  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5418
5419  TargetRegisterClass *TRC =
5420    isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5421  unsigned storesuccess = MRI.createVirtualRegister(TRC);
5422
5423  //  thisMBB:
5424  //   ...
5425  //   fallthrough --> loopMBB
5426  BB->addSuccessor(loopMBB);
5427
5428  //  loopMBB:
5429  //   ldrexd r2, r3, ptr
5430  //   <binopa> r0, r2, incr
5431  //   <binopb> r1, r3, incr
5432  //   strexd storesuccess, r0, r1, ptr
5433  //   cmp storesuccess, #0
5434  //   bne- loopMBB
5435  //   fallthrough --> exitMBB
5436  //
5437  // Note that the registers are explicitly specified because there is not any
5438  // way to force the register allocator to allocate a register pair.
5439  //
5440  // FIXME: The hardcoded registers are not necessary for Thumb2, but we
5441  // need to properly enforce the restriction that the two output registers
5442  // for ldrexd must be different.
5443  BB = loopMBB;
5444  // Load
5445  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
5446                 .addReg(ARM::R2, RegState::Define)
5447                 .addReg(ARM::R3, RegState::Define).addReg(ptr));
5448  // Copy r2/r3 into dest.  (This copy will normally be coalesced.)
5449  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
5450  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
5451
5452  if (IsCmpxchg) {
5453    // Add early exit
5454    for (unsigned i = 0; i < 2; i++) {
5455      AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
5456                                                         ARM::CMPrr))
5457                     .addReg(i == 0 ? destlo : desthi)
5458                     .addReg(i == 0 ? vallo : valhi));
5459      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5460        .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5461      BB->addSuccessor(exitMBB);
5462      BB->addSuccessor(i == 0 ? contBB : cont2BB);
5463      BB = (i == 0 ? contBB : cont2BB);
5464    }
5465
5466    // Copy to physregs for strexd
5467    unsigned setlo = MI->getOperand(5).getReg();
5468    unsigned sethi = MI->getOperand(6).getReg();
5469    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
5470    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
5471  } else if (Op1) {
5472    // Perform binary operation
5473    AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
5474                   .addReg(destlo).addReg(vallo))
5475        .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
5476    AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
5477                   .addReg(desthi).addReg(valhi)).addReg(0);
5478  } else {
5479    // Copy to physregs for strexd
5480    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
5481    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
5482  }
5483
5484  // Store
5485  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
5486                 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
5487  // Cmp+jump
5488  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5489                 .addReg(storesuccess).addImm(0));
5490  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5491    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5492
5493  BB->addSuccessor(loopMBB);
5494  BB->addSuccessor(exitMBB);
5495
5496  //  exitMBB:
5497  //   ...
5498  BB = exitMBB;
5499
5500  MI->eraseFromParent();   // The instruction is gone now.
5501
5502  return BB;
5503}
5504
5505/// EmitBasePointerRecalculation - For functions using a base pointer, we
5506/// rematerialize it (via the frame pointer).
5507void ARMTargetLowering::
5508EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
5509                             MachineBasicBlock *DispatchBB) const {
5510  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5511  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
5512  MachineFunction &MF = *MI->getParent()->getParent();
5513  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
5514  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
5515
5516  if (!RI.hasBasePointer(MF)) return;
5517
5518  MachineBasicBlock::iterator MBBI = MI;
5519
5520  int32_t NumBytes = AFI->getFramePtrSpillOffset();
5521  unsigned FramePtr = RI.getFrameRegister(MF);
5522  assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
5523         "Base pointer without frame pointer?");
5524
5525  if (AFI->isThumb2Function())
5526    llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
5527                                 FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
5528  else if (AFI->isThumbFunction())
5529    llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
5530                                    FramePtr, -NumBytes, *AII, RI);
5531  else
5532    llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
5533                                  FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
5534
5535  if (!RI.needsStackRealignment(MF)) return;
5536
5537  // If there's dynamic realignment, adjust for it.
5538  MachineFrameInfo *MFI = MF.getFrameInfo();
5539  unsigned MaxAlign = MFI->getMaxAlignment();
5540  assert(!AFI->isThumb1OnlyFunction());
5541
5542  // Emit bic r6, r6, MaxAlign
5543  unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
5544  AddDefaultCC(
5545    AddDefaultPred(
5546      BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
5547      .addReg(ARM::R6, RegState::Kill)
5548      .addImm(MaxAlign - 1)));
5549}
5550
5551/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
5552/// registers the function context.
5553void ARMTargetLowering::
5554SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
5555                       MachineBasicBlock *DispatchBB, int FI) const {
5556  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5557  DebugLoc dl = MI->getDebugLoc();
5558  MachineFunction *MF = MBB->getParent();
5559  MachineRegisterInfo *MRI = &MF->getRegInfo();
5560  MachineConstantPool *MCP = MF->getConstantPool();
5561  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
5562  const Function *F = MF->getFunction();
5563
5564  bool isThumb = Subtarget->isThumb();
5565  bool isThumb2 = Subtarget->isThumb2();
5566
5567  unsigned PCLabelId = AFI->createPICLabelUId();
5568  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
5569  ARMConstantPoolValue *CPV =
5570    ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
5571  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
5572
5573  const TargetRegisterClass *TRC =
5574    isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5575
5576  // Grab constant pool and fixed stack memory operands.
5577  MachineMemOperand *CPMMO =
5578    MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
5579                             MachineMemOperand::MOLoad, 4, 4);
5580
5581  MachineMemOperand *FIMMOSt =
5582    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
5583                             MachineMemOperand::MOStore, 4, 4);
5584
5585  EmitBasePointerRecalculation(MI, MBB, DispatchBB);
5586
5587  // Load the address of the dispatch MBB into the jump buffer.
5588  if (isThumb2) {
5589    // Incoming value: jbuf
5590    //   ldr.n  r5, LCPI1_1
5591    //   orr    r5, r5, #1
5592    //   add    r5, pc
5593    //   str    r5, [$jbuf, #+4] ; &jbuf[1]
5594    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5595    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
5596                   .addConstantPoolIndex(CPI)
5597                   .addMemOperand(CPMMO));
5598    // Set the low bit because of thumb mode.
5599    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5600    AddDefaultCC(
5601      AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
5602                     .addReg(NewVReg1, RegState::Kill)
5603                     .addImm(0x01)));
5604    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5605    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
5606      .addReg(NewVReg2, RegState::Kill)
5607      .addImm(PCLabelId);
5608    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
5609                   .addReg(NewVReg3, RegState::Kill)
5610                   .addFrameIndex(FI)
5611                   .addImm(36)  // &jbuf[1] :: pc
5612                   .addMemOperand(FIMMOSt));
5613  } else if (isThumb) {
5614    // Incoming value: jbuf
5615    //   ldr.n  r1, LCPI1_4
5616    //   add    r1, pc
5617    //   mov    r2, #1
5618    //   orrs   r1, r2
5619    //   add    r2, $jbuf, #+4 ; &jbuf[1]
5620    //   str    r1, [r2]
5621    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5622    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
5623                   .addConstantPoolIndex(CPI)
5624                   .addMemOperand(CPMMO));
5625    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5626    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
5627      .addReg(NewVReg1, RegState::Kill)
5628      .addImm(PCLabelId);
5629    // Set the low bit because of thumb mode.
5630    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5631    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
5632                   .addReg(ARM::CPSR, RegState::Define)
5633                   .addImm(1));
5634    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5635    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
5636                   .addReg(ARM::CPSR, RegState::Define)
5637                   .addReg(NewVReg2, RegState::Kill)
5638                   .addReg(NewVReg3, RegState::Kill));
5639    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
5640    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
5641                   .addFrameIndex(FI)
5642                   .addImm(36)); // &jbuf[1] :: pc
5643    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
5644                   .addReg(NewVReg4, RegState::Kill)
5645                   .addReg(NewVReg5, RegState::Kill)
5646                   .addImm(0)
5647                   .addMemOperand(FIMMOSt));
5648  } else {
5649    // Incoming value: jbuf
5650    //   ldr  r1, LCPI1_1
5651    //   add  r1, pc, r1
5652    //   str  r1, [$jbuf, #+4] ; &jbuf[1]
5653    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5654    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12),  NewVReg1)
5655                   .addConstantPoolIndex(CPI)
5656                   .addImm(0)
5657                   .addMemOperand(CPMMO));
5658    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5659    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
5660                   .addReg(NewVReg1, RegState::Kill)
5661                   .addImm(PCLabelId));
5662    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
5663                   .addReg(NewVReg2, RegState::Kill)
5664                   .addFrameIndex(FI)
5665                   .addImm(36)  // &jbuf[1] :: pc
5666                   .addMemOperand(FIMMOSt));
5667  }
5668}
5669
5670MachineBasicBlock *ARMTargetLowering::
5671EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
5672  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5673  DebugLoc dl = MI->getDebugLoc();
5674  MachineFunction *MF = MBB->getParent();
5675  MachineRegisterInfo *MRI = &MF->getRegInfo();
5676  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
5677  MachineFrameInfo *MFI = MF->getFrameInfo();
5678  int FI = MFI->getFunctionContextIndex();
5679
5680  const TargetRegisterClass *TRC =
5681    Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
5682
5683  // Get a mapping of the call site numbers to all of the landing pads they're
5684  // associated with.
5685  DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
5686  unsigned MaxCSNum = 0;
5687  MachineModuleInfo &MMI = MF->getMMI();
5688  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
5689    if (!BB->isLandingPad()) continue;
5690
5691    // FIXME: We should assert that the EH_LABEL is the first MI in the landing
5692    // pad.
5693    for (MachineBasicBlock::iterator
5694           II = BB->begin(), IE = BB->end(); II != IE; ++II) {
5695      if (!II->isEHLabel()) continue;
5696
5697      MCSymbol *Sym = II->getOperand(0).getMCSymbol();
5698      if (!MMI.hasCallSiteLandingPad(Sym)) continue;
5699
5700      SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
5701      for (SmallVectorImpl<unsigned>::iterator
5702             CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
5703           CSI != CSE; ++CSI) {
5704        CallSiteNumToLPad[*CSI].push_back(BB);
5705        MaxCSNum = std::max(MaxCSNum, *CSI);
5706      }
5707      break;
5708    }
5709  }
5710
5711  // Get an ordered list of the machine basic blocks for the jump table.
5712  std::vector<MachineBasicBlock*> LPadList;
5713  SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
5714  LPadList.reserve(CallSiteNumToLPad.size());
5715  for (unsigned I = 1; I <= MaxCSNum; ++I) {
5716    SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
5717    for (SmallVectorImpl<MachineBasicBlock*>::iterator
5718           II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
5719      LPadList.push_back(*II);
5720      InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
5721    }
5722  }
5723
5724  assert(!LPadList.empty() &&
5725         "No landing pad destinations for the dispatch jump table!");
5726
5727  // Create the jump table and associated information.
5728  MachineJumpTableInfo *JTI =
5729    MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
5730  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
5731  unsigned UId = AFI->createJumpTableUId();
5732
5733  // Create the MBBs for the dispatch code.
5734
5735  // Shove the dispatch's address into the return slot in the function context.
5736  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
5737  DispatchBB->setIsLandingPad();
5738
5739  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
5740  BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
5741  DispatchBB->addSuccessor(TrapBB);
5742
5743  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
5744  DispatchBB->addSuccessor(DispContBB);
5745
5746  // Insert and MBBs.
5747  MF->insert(MF->end(), DispatchBB);
5748  MF->insert(MF->end(), DispContBB);
5749  MF->insert(MF->end(), TrapBB);
5750
5751  // Insert code into the entry block that creates and registers the function
5752  // context.
5753  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
5754
5755  MachineMemOperand *FIMMOLd =
5756    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
5757                             MachineMemOperand::MOLoad |
5758                             MachineMemOperand::MOVolatile, 4, 4);
5759
5760  unsigned NumLPads = LPadList.size();
5761  if (Subtarget->isThumb2()) {
5762    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5763    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
5764                   .addFrameIndex(FI)
5765                   .addImm(4)
5766                   .addMemOperand(FIMMOLd));
5767
5768    if (NumLPads < 256) {
5769      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
5770                     .addReg(NewVReg1)
5771                     .addImm(LPadList.size()));
5772    } else {
5773      unsigned VReg1 = MRI->createVirtualRegister(TRC);
5774      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
5775                     .addImm(NumLPads & 0xFFFF));
5776
5777      unsigned VReg2 = VReg1;
5778      if ((NumLPads & 0xFFFF0000) != 0) {
5779        VReg2 = MRI->createVirtualRegister(TRC);
5780        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
5781                       .addReg(VReg1)
5782                       .addImm(NumLPads >> 16));
5783      }
5784
5785      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
5786                     .addReg(NewVReg1)
5787                     .addReg(VReg2));
5788    }
5789
5790    BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
5791      .addMBB(TrapBB)
5792      .addImm(ARMCC::HI)
5793      .addReg(ARM::CPSR);
5794
5795    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5796    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
5797                   .addJumpTableIndex(MJTI)
5798                   .addImm(UId));
5799
5800    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5801    AddDefaultCC(
5802      AddDefaultPred(
5803        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
5804        .addReg(NewVReg3, RegState::Kill)
5805        .addReg(NewVReg1)
5806        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
5807
5808    BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
5809      .addReg(NewVReg4, RegState::Kill)
5810      .addReg(NewVReg1)
5811      .addJumpTableIndex(MJTI)
5812      .addImm(UId);
5813  } else if (Subtarget->isThumb()) {
5814    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5815    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
5816                   .addFrameIndex(FI)
5817                   .addImm(1)
5818                   .addMemOperand(FIMMOLd));
5819
5820    if (NumLPads < 256) {
5821      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
5822                     .addReg(NewVReg1)
5823                     .addImm(NumLPads));
5824    } else {
5825      MachineConstantPool *ConstantPool = MF->getConstantPool();
5826      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
5827      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
5828
5829      // MachineConstantPool wants an explicit alignment.
5830      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
5831      if (Align == 0)
5832        Align = getTargetData()->getTypeAllocSize(C->getType());
5833      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
5834
5835      unsigned VReg1 = MRI->createVirtualRegister(TRC);
5836      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
5837                     .addReg(VReg1, RegState::Define)
5838                     .addConstantPoolIndex(Idx));
5839      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
5840                     .addReg(NewVReg1)
5841                     .addReg(VReg1));
5842    }
5843
5844    BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
5845      .addMBB(TrapBB)
5846      .addImm(ARMCC::HI)
5847      .addReg(ARM::CPSR);
5848
5849    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5850    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
5851                   .addReg(ARM::CPSR, RegState::Define)
5852                   .addReg(NewVReg1)
5853                   .addImm(2));
5854
5855    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5856    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
5857                   .addJumpTableIndex(MJTI)
5858                   .addImm(UId));
5859
5860    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5861    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
5862                   .addReg(ARM::CPSR, RegState::Define)
5863                   .addReg(NewVReg2, RegState::Kill)
5864                   .addReg(NewVReg3));
5865
5866    MachineMemOperand *JTMMOLd =
5867      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
5868                               MachineMemOperand::MOLoad, 4, 4);
5869
5870    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
5871    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
5872                   .addReg(NewVReg4, RegState::Kill)
5873                   .addImm(0)
5874                   .addMemOperand(JTMMOLd));
5875
5876    unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
5877    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
5878                   .addReg(ARM::CPSR, RegState::Define)
5879                   .addReg(NewVReg5, RegState::Kill)
5880                   .addReg(NewVReg3));
5881
5882    BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
5883      .addReg(NewVReg6, RegState::Kill)
5884      .addJumpTableIndex(MJTI)
5885      .addImm(UId);
5886  } else {
5887    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5888    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
5889                   .addFrameIndex(FI)
5890                   .addImm(4)
5891                   .addMemOperand(FIMMOLd));
5892
5893    if (NumLPads < 256) {
5894      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
5895                     .addReg(NewVReg1)
5896                     .addImm(NumLPads));
5897    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
5898      unsigned VReg1 = MRI->createVirtualRegister(TRC);
5899      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
5900                     .addImm(NumLPads & 0xFFFF));
5901
5902      unsigned VReg2 = VReg1;
5903      if ((NumLPads & 0xFFFF0000) != 0) {
5904        VReg2 = MRI->createVirtualRegister(TRC);
5905        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
5906                       .addReg(VReg1)
5907                       .addImm(NumLPads >> 16));
5908      }
5909
5910      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
5911                     .addReg(NewVReg1)
5912                     .addReg(VReg2));
5913    } else {
5914      MachineConstantPool *ConstantPool = MF->getConstantPool();
5915      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
5916      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
5917
5918      // MachineConstantPool wants an explicit alignment.
5919      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
5920      if (Align == 0)
5921        Align = getTargetData()->getTypeAllocSize(C->getType());
5922      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
5923
5924      unsigned VReg1 = MRI->createVirtualRegister(TRC);
5925      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
5926                     .addReg(VReg1, RegState::Define)
5927                     .addConstantPoolIndex(Idx));
5928      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
5929                     .addReg(NewVReg1)
5930                     .addReg(VReg1, RegState::Kill));
5931    }
5932
5933    BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
5934      .addMBB(TrapBB)
5935      .addImm(ARMCC::HI)
5936      .addReg(ARM::CPSR);
5937
5938    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5939    AddDefaultCC(
5940      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
5941                     .addReg(NewVReg1)
5942                     .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
5943    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5944    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
5945                   .addJumpTableIndex(MJTI)
5946                   .addImm(UId));
5947
5948    MachineMemOperand *JTMMOLd =
5949      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
5950                               MachineMemOperand::MOLoad, 4, 4);
5951    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
5952    AddDefaultPred(
5953      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
5954      .addReg(NewVReg3, RegState::Kill)
5955      .addReg(NewVReg4)
5956      .addImm(0)
5957      .addMemOperand(JTMMOLd));
5958
5959    BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
5960      .addReg(NewVReg5, RegState::Kill)
5961      .addReg(NewVReg4)
5962      .addJumpTableIndex(MJTI)
5963      .addImm(UId);
5964  }
5965
5966  // Add the jump table entries as successors to the MBB.
5967  MachineBasicBlock *PrevMBB = 0;
5968  for (std::vector<MachineBasicBlock*>::iterator
5969         I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
5970    MachineBasicBlock *CurMBB = *I;
5971    if (PrevMBB != CurMBB)
5972      DispContBB->addSuccessor(CurMBB);
5973    PrevMBB = CurMBB;
5974  }
5975
5976  // N.B. the order the invoke BBs are processed in doesn't matter here.
5977  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
5978  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
5979  const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
5980  SmallVector<MachineBasicBlock*, 64> MBBLPads;
5981  for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
5982         I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
5983    MachineBasicBlock *BB = *I;
5984
5985    // Remove the landing pad successor from the invoke block and replace it
5986    // with the new dispatch block.
5987    for (MachineBasicBlock::succ_iterator
5988           SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
5989      MachineBasicBlock *SMBB = *SI;
5990      if (SMBB->isLandingPad()) {
5991        BB->removeSuccessor(SMBB);
5992        MBBLPads.push_back(SMBB);
5993      }
5994    }
5995
5996    BB->addSuccessor(DispatchBB);
5997
5998    // Find the invoke call and mark all of the callee-saved registers as
5999    // 'implicit defined' so that they're spilled. This prevents code from
6000    // moving instructions to before the EH block, where they will never be
6001    // executed.
6002    for (MachineBasicBlock::reverse_iterator
6003           II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
6004      if (!II->getDesc().isCall()) continue;
6005
6006      DenseMap<unsigned, bool> DefRegs;
6007      for (MachineInstr::mop_iterator
6008             OI = II->operands_begin(), OE = II->operands_end();
6009           OI != OE; ++OI) {
6010        if (!OI->isReg()) continue;
6011        DefRegs[OI->getReg()] = true;
6012      }
6013
6014      MachineInstrBuilder MIB(&*II);
6015
6016      for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
6017        if (!TRC->contains(SavedRegs[i])) continue;
6018        if (!DefRegs[SavedRegs[i]])
6019          MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
6020      }
6021
6022      break;
6023    }
6024  }
6025
6026  // Mark all former landing pads as non-landing pads. The dispatch is the only
6027  // landing pad now.
6028  for (SmallVectorImpl<MachineBasicBlock*>::iterator
6029         I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
6030    (*I)->setIsLandingPad(false);
6031
6032  // The instruction is gone now.
6033  MI->eraseFromParent();
6034
6035  return MBB;
6036}
6037
6038static
6039MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
6040  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
6041       E = MBB->succ_end(); I != E; ++I)
6042    if (*I != Succ)
6043      return *I;
6044  llvm_unreachable("Expecting a BB with two successors!");
6045}
6046
6047MachineBasicBlock *
6048ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
6049                                               MachineBasicBlock *BB) const {
6050  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6051  DebugLoc dl = MI->getDebugLoc();
6052  bool isThumb2 = Subtarget->isThumb2();
6053  switch (MI->getOpcode()) {
6054  default: {
6055    MI->dump();
6056    llvm_unreachable("Unexpected instr type to insert");
6057  }
6058  // The Thumb2 pre-indexed stores have the same MI operands, they just
6059  // define them differently in the .td files from the isel patterns, so
6060  // they need pseudos.
6061  case ARM::t2STR_preidx:
6062    MI->setDesc(TII->get(ARM::t2STR_PRE));
6063    return BB;
6064  case ARM::t2STRB_preidx:
6065    MI->setDesc(TII->get(ARM::t2STRB_PRE));
6066    return BB;
6067  case ARM::t2STRH_preidx:
6068    MI->setDesc(TII->get(ARM::t2STRH_PRE));
6069    return BB;
6070
6071  case ARM::STRi_preidx:
6072  case ARM::STRBi_preidx: {
6073    unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
6074      ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
6075    // Decode the offset.
6076    unsigned Offset = MI->getOperand(4).getImm();
6077    bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
6078    Offset = ARM_AM::getAM2Offset(Offset);
6079    if (isSub)
6080      Offset = -Offset;
6081
6082    MachineMemOperand *MMO = *MI->memoperands_begin();
6083    BuildMI(*BB, MI, dl, TII->get(NewOpc))
6084      .addOperand(MI->getOperand(0))  // Rn_wb
6085      .addOperand(MI->getOperand(1))  // Rt
6086      .addOperand(MI->getOperand(2))  // Rn
6087      .addImm(Offset)                 // offset (skip GPR==zero_reg)
6088      .addOperand(MI->getOperand(5))  // pred
6089      .addOperand(MI->getOperand(6))
6090      .addMemOperand(MMO);
6091    MI->eraseFromParent();
6092    return BB;
6093  }
6094  case ARM::STRr_preidx:
6095  case ARM::STRBr_preidx:
6096  case ARM::STRH_preidx: {
6097    unsigned NewOpc;
6098    switch (MI->getOpcode()) {
6099    default: llvm_unreachable("unexpected opcode!");
6100    case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
6101    case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
6102    case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
6103    }
6104    MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
6105    for (unsigned i = 0; i < MI->getNumOperands(); ++i)
6106      MIB.addOperand(MI->getOperand(i));
6107    MI->eraseFromParent();
6108    return BB;
6109  }
6110  case ARM::ATOMIC_LOAD_ADD_I8:
6111     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6112  case ARM::ATOMIC_LOAD_ADD_I16:
6113     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6114  case ARM::ATOMIC_LOAD_ADD_I32:
6115     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6116
6117  case ARM::ATOMIC_LOAD_AND_I8:
6118     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6119  case ARM::ATOMIC_LOAD_AND_I16:
6120     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6121  case ARM::ATOMIC_LOAD_AND_I32:
6122     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6123
6124  case ARM::ATOMIC_LOAD_OR_I8:
6125     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6126  case ARM::ATOMIC_LOAD_OR_I16:
6127     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6128  case ARM::ATOMIC_LOAD_OR_I32:
6129     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6130
6131  case ARM::ATOMIC_LOAD_XOR_I8:
6132     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6133  case ARM::ATOMIC_LOAD_XOR_I16:
6134     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6135  case ARM::ATOMIC_LOAD_XOR_I32:
6136     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6137
6138  case ARM::ATOMIC_LOAD_NAND_I8:
6139     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6140  case ARM::ATOMIC_LOAD_NAND_I16:
6141     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6142  case ARM::ATOMIC_LOAD_NAND_I32:
6143     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6144
6145  case ARM::ATOMIC_LOAD_SUB_I8:
6146     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6147  case ARM::ATOMIC_LOAD_SUB_I16:
6148     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6149  case ARM::ATOMIC_LOAD_SUB_I32:
6150     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6151
6152  case ARM::ATOMIC_LOAD_MIN_I8:
6153     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
6154  case ARM::ATOMIC_LOAD_MIN_I16:
6155     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
6156  case ARM::ATOMIC_LOAD_MIN_I32:
6157     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
6158
6159  case ARM::ATOMIC_LOAD_MAX_I8:
6160     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
6161  case ARM::ATOMIC_LOAD_MAX_I16:
6162     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
6163  case ARM::ATOMIC_LOAD_MAX_I32:
6164     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
6165
6166  case ARM::ATOMIC_LOAD_UMIN_I8:
6167     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
6168  case ARM::ATOMIC_LOAD_UMIN_I16:
6169     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
6170  case ARM::ATOMIC_LOAD_UMIN_I32:
6171     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
6172
6173  case ARM::ATOMIC_LOAD_UMAX_I8:
6174     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
6175  case ARM::ATOMIC_LOAD_UMAX_I16:
6176     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
6177  case ARM::ATOMIC_LOAD_UMAX_I32:
6178     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
6179
6180  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
6181  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
6182  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
6183
6184  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
6185  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
6186  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
6187
6188
6189  case ARM::ATOMADD6432:
6190    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
6191                              isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
6192                              /*NeedsCarry*/ true);
6193  case ARM::ATOMSUB6432:
6194    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
6195                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
6196                              /*NeedsCarry*/ true);
6197  case ARM::ATOMOR6432:
6198    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
6199                              isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6200  case ARM::ATOMXOR6432:
6201    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
6202                              isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6203  case ARM::ATOMAND6432:
6204    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
6205                              isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6206  case ARM::ATOMSWAP6432:
6207    return EmitAtomicBinary64(MI, BB, 0, 0, false);
6208  case ARM::ATOMCMPXCHG6432:
6209    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
6210                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
6211                              /*NeedsCarry*/ false, /*IsCmpxchg*/true);
6212
6213  case ARM::tMOVCCr_pseudo: {
6214    // To "insert" a SELECT_CC instruction, we actually have to insert the
6215    // diamond control-flow pattern.  The incoming instruction knows the
6216    // destination vreg to set, the condition code register to branch on, the
6217    // true/false values to select between, and a branch opcode to use.
6218    const BasicBlock *LLVM_BB = BB->getBasicBlock();
6219    MachineFunction::iterator It = BB;
6220    ++It;
6221
6222    //  thisMBB:
6223    //  ...
6224    //   TrueVal = ...
6225    //   cmpTY ccX, r1, r2
6226    //   bCC copy1MBB
6227    //   fallthrough --> copy0MBB
6228    MachineBasicBlock *thisMBB  = BB;
6229    MachineFunction *F = BB->getParent();
6230    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
6231    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
6232    F->insert(It, copy0MBB);
6233    F->insert(It, sinkMBB);
6234
6235    // Transfer the remainder of BB and its successor edges to sinkMBB.
6236    sinkMBB->splice(sinkMBB->begin(), BB,
6237                    llvm::next(MachineBasicBlock::iterator(MI)),
6238                    BB->end());
6239    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
6240
6241    BB->addSuccessor(copy0MBB);
6242    BB->addSuccessor(sinkMBB);
6243
6244    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
6245      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
6246
6247    //  copy0MBB:
6248    //   %FalseValue = ...
6249    //   # fallthrough to sinkMBB
6250    BB = copy0MBB;
6251
6252    // Update machine-CFG edges
6253    BB->addSuccessor(sinkMBB);
6254
6255    //  sinkMBB:
6256    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
6257    //  ...
6258    BB = sinkMBB;
6259    BuildMI(*BB, BB->begin(), dl,
6260            TII->get(ARM::PHI), MI->getOperand(0).getReg())
6261      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
6262      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
6263
6264    MI->eraseFromParent();   // The pseudo instruction is gone now.
6265    return BB;
6266  }
6267
6268  case ARM::BCCi64:
6269  case ARM::BCCZi64: {
6270    // If there is an unconditional branch to the other successor, remove it.
6271    BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
6272
6273    // Compare both parts that make up the double comparison separately for
6274    // equality.
6275    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
6276
6277    unsigned LHS1 = MI->getOperand(1).getReg();
6278    unsigned LHS2 = MI->getOperand(2).getReg();
6279    if (RHSisZero) {
6280      AddDefaultPred(BuildMI(BB, dl,
6281                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6282                     .addReg(LHS1).addImm(0));
6283      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6284        .addReg(LHS2).addImm(0)
6285        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
6286    } else {
6287      unsigned RHS1 = MI->getOperand(3).getReg();
6288      unsigned RHS2 = MI->getOperand(4).getReg();
6289      AddDefaultPred(BuildMI(BB, dl,
6290                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
6291                     .addReg(LHS1).addReg(RHS1));
6292      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
6293        .addReg(LHS2).addReg(RHS2)
6294        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
6295    }
6296
6297    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
6298    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
6299    if (MI->getOperand(0).getImm() == ARMCC::NE)
6300      std::swap(destMBB, exitMBB);
6301
6302    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
6303      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
6304    if (isThumb2)
6305      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
6306    else
6307      BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
6308
6309    MI->eraseFromParent();   // The pseudo instruction is gone now.
6310    return BB;
6311  }
6312
6313  case ARM::Int_eh_sjlj_setjmp:
6314  case ARM::Int_eh_sjlj_setjmp_nofp:
6315  case ARM::tInt_eh_sjlj_setjmp:
6316  case ARM::t2Int_eh_sjlj_setjmp:
6317  case ARM::t2Int_eh_sjlj_setjmp_nofp:
6318    EmitSjLjDispatchBlock(MI, BB);
6319    return BB;
6320
6321  case ARM::ABS:
6322  case ARM::t2ABS: {
6323    // To insert an ABS instruction, we have to insert the
6324    // diamond control-flow pattern.  The incoming instruction knows the
6325    // source vreg to test against 0, the destination vreg to set,
6326    // the condition code register to branch on, the
6327    // true/false values to select between, and a branch opcode to use.
6328    // It transforms
6329    //     V1 = ABS V0
6330    // into
6331    //     V2 = MOVS V0
6332    //     BCC                      (branch to SinkBB if V0 >= 0)
6333    //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
6334    //     SinkBB: V1 = PHI(V2, V3)
6335    const BasicBlock *LLVM_BB = BB->getBasicBlock();
6336    MachineFunction::iterator BBI = BB;
6337    ++BBI;
6338    MachineFunction *Fn = BB->getParent();
6339    MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
6340    MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
6341    Fn->insert(BBI, RSBBB);
6342    Fn->insert(BBI, SinkBB);
6343
6344    unsigned int ABSSrcReg = MI->getOperand(1).getReg();
6345    unsigned int ABSDstReg = MI->getOperand(0).getReg();
6346    bool isThumb2 = Subtarget->isThumb2();
6347    MachineRegisterInfo &MRI = Fn->getRegInfo();
6348    // In Thumb mode S must not be specified if source register is the SP or
6349    // PC and if destination register is the SP, so restrict register class
6350    unsigned NewMovDstReg = MRI.createVirtualRegister(
6351      isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
6352    unsigned NewRsbDstReg = MRI.createVirtualRegister(
6353      isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
6354
6355    // Transfer the remainder of BB and its successor edges to sinkMBB.
6356    SinkBB->splice(SinkBB->begin(), BB,
6357      llvm::next(MachineBasicBlock::iterator(MI)),
6358      BB->end());
6359    SinkBB->transferSuccessorsAndUpdatePHIs(BB);
6360
6361    BB->addSuccessor(RSBBB);
6362    BB->addSuccessor(SinkBB);
6363
6364    // fall through to SinkMBB
6365    RSBBB->addSuccessor(SinkBB);
6366
6367    // insert a movs at the end of BB
6368    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
6369      NewMovDstReg)
6370      .addReg(ABSSrcReg, RegState::Kill)
6371      .addImm((unsigned)ARMCC::AL).addReg(0)
6372      .addReg(ARM::CPSR, RegState::Define);
6373
6374    // insert a bcc with opposite CC to ARMCC::MI at the end of BB
6375    BuildMI(BB, dl,
6376      TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
6377      .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
6378
6379    // insert rsbri in RSBBB
6380    // Note: BCC and rsbri will be converted into predicated rsbmi
6381    // by if-conversion pass
6382    BuildMI(*RSBBB, RSBBB->begin(), dl,
6383      TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
6384      .addReg(NewMovDstReg, RegState::Kill)
6385      .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
6386
6387    // insert PHI in SinkBB,
6388    // reuse ABSDstReg to not change uses of ABS instruction
6389    BuildMI(*SinkBB, SinkBB->begin(), dl,
6390      TII->get(ARM::PHI), ABSDstReg)
6391      .addReg(NewRsbDstReg).addMBB(RSBBB)
6392      .addReg(NewMovDstReg).addMBB(BB);
6393
6394    // remove ABS instruction
6395    MI->eraseFromParent();
6396
6397    // return last added BB
6398    return SinkBB;
6399  }
6400  }
6401}
6402
6403void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
6404                                                      SDNode *Node) const {
6405  const MCInstrDesc *MCID = &MI->getDesc();
6406  if (!MCID->hasPostISelHook()) {
6407    assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
6408           "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
6409    return;
6410  }
6411
6412  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
6413  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
6414  // operand is still set to noreg. If needed, set the optional operand's
6415  // register to CPSR, and remove the redundant implicit def.
6416  //
6417  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
6418
6419  // Rename pseudo opcodes.
6420  unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
6421  if (NewOpc) {
6422    const ARMBaseInstrInfo *TII =
6423      static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
6424    MCID = &TII->get(NewOpc);
6425
6426    assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
6427           "converted opcode should be the same except for cc_out");
6428
6429    MI->setDesc(*MCID);
6430
6431    // Add the optional cc_out operand
6432    MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
6433  }
6434  unsigned ccOutIdx = MCID->getNumOperands() - 1;
6435
6436  // Any ARM instruction that sets the 's' bit should specify an optional
6437  // "cc_out" operand in the last operand position.
6438  if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
6439    assert(!NewOpc && "Optional cc_out operand required");
6440    return;
6441  }
6442  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
6443  // since we already have an optional CPSR def.
6444  bool definesCPSR = false;
6445  bool deadCPSR = false;
6446  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
6447       i != e; ++i) {
6448    const MachineOperand &MO = MI->getOperand(i);
6449    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
6450      definesCPSR = true;
6451      if (MO.isDead())
6452        deadCPSR = true;
6453      MI->RemoveOperand(i);
6454      break;
6455    }
6456  }
6457  if (!definesCPSR) {
6458    assert(!NewOpc && "Optional cc_out operand required");
6459    return;
6460  }
6461  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
6462  if (deadCPSR) {
6463    assert(!MI->getOperand(ccOutIdx).getReg() &&
6464           "expect uninitialized optional cc_out operand");
6465    return;
6466  }
6467
6468  // If this instruction was defined with an optional CPSR def and its dag node
6469  // had a live implicit CPSR def, then activate the optional CPSR def.
6470  MachineOperand &MO = MI->getOperand(ccOutIdx);
6471  MO.setReg(ARM::CPSR);
6472  MO.setIsDef(true);
6473}
6474
6475//===----------------------------------------------------------------------===//
6476//                           ARM Optimization Hooks
6477//===----------------------------------------------------------------------===//
6478
6479static
6480SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
6481                            TargetLowering::DAGCombinerInfo &DCI) {
6482  SelectionDAG &DAG = DCI.DAG;
6483  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6484  EVT VT = N->getValueType(0);
6485  unsigned Opc = N->getOpcode();
6486  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
6487  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
6488  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
6489  ISD::CondCode CC = ISD::SETCC_INVALID;
6490
6491  if (isSlctCC) {
6492    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
6493  } else {
6494    SDValue CCOp = Slct.getOperand(0);
6495    if (CCOp.getOpcode() == ISD::SETCC)
6496      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
6497  }
6498
6499  bool DoXform = false;
6500  bool InvCC = false;
6501  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
6502          "Bad input!");
6503
6504  if (LHS.getOpcode() == ISD::Constant &&
6505      cast<ConstantSDNode>(LHS)->isNullValue()) {
6506    DoXform = true;
6507  } else if (CC != ISD::SETCC_INVALID &&
6508             RHS.getOpcode() == ISD::Constant &&
6509             cast<ConstantSDNode>(RHS)->isNullValue()) {
6510    std::swap(LHS, RHS);
6511    SDValue Op0 = Slct.getOperand(0);
6512    EVT OpVT = isSlctCC ? Op0.getValueType() :
6513                          Op0.getOperand(0).getValueType();
6514    bool isInt = OpVT.isInteger();
6515    CC = ISD::getSetCCInverse(CC, isInt);
6516
6517    if (!TLI.isCondCodeLegal(CC, OpVT))
6518      return SDValue();         // Inverse operator isn't legal.
6519
6520    DoXform = true;
6521    InvCC = true;
6522  }
6523
6524  if (DoXform) {
6525    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
6526    if (isSlctCC)
6527      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
6528                             Slct.getOperand(0), Slct.getOperand(1), CC);
6529    SDValue CCOp = Slct.getOperand(0);
6530    if (InvCC)
6531      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
6532                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
6533    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
6534                       CCOp, OtherOp, Result);
6535  }
6536  return SDValue();
6537}
6538
6539// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
6540// (only after legalization).
6541static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
6542                                 TargetLowering::DAGCombinerInfo &DCI,
6543                                 const ARMSubtarget *Subtarget) {
6544
6545  // Only perform optimization if after legalize, and if NEON is available. We
6546  // also expected both operands to be BUILD_VECTORs.
6547  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
6548      || N0.getOpcode() != ISD::BUILD_VECTOR
6549      || N1.getOpcode() != ISD::BUILD_VECTOR)
6550    return SDValue();
6551
6552  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
6553  EVT VT = N->getValueType(0);
6554  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
6555    return SDValue();
6556
6557  // Check that the vector operands are of the right form.
6558  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
6559  // operands, where N is the size of the formed vector.
6560  // Each EXTRACT_VECTOR should have the same input vector and odd or even
6561  // index such that we have a pair wise add pattern.
6562
6563  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
6564  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6565    return SDValue();
6566  SDValue Vec = N0->getOperand(0)->getOperand(0);
6567  SDNode *V = Vec.getNode();
6568  unsigned nextIndex = 0;
6569
6570  // For each operands to the ADD which are BUILD_VECTORs,
6571  // check to see if each of their operands are an EXTRACT_VECTOR with
6572  // the same vector and appropriate index.
6573  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
6574    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
6575        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
6576
6577      SDValue ExtVec0 = N0->getOperand(i);
6578      SDValue ExtVec1 = N1->getOperand(i);
6579
6580      // First operand is the vector, verify its the same.
6581      if (V != ExtVec0->getOperand(0).getNode() ||
6582          V != ExtVec1->getOperand(0).getNode())
6583        return SDValue();
6584
6585      // Second is the constant, verify its correct.
6586      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
6587      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
6588
6589      // For the constant, we want to see all the even or all the odd.
6590      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
6591          || C1->getZExtValue() != nextIndex+1)
6592        return SDValue();
6593
6594      // Increment index.
6595      nextIndex+=2;
6596    } else
6597      return SDValue();
6598  }
6599
6600  // Create VPADDL node.
6601  SelectionDAG &DAG = DCI.DAG;
6602  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6603
6604  // Build operand list.
6605  SmallVector<SDValue, 8> Ops;
6606  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
6607                                TLI.getPointerTy()));
6608
6609  // Input is the vector.
6610  Ops.push_back(Vec);
6611
6612  // Get widened type and narrowed type.
6613  MVT widenType;
6614  unsigned numElem = VT.getVectorNumElements();
6615  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
6616    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
6617    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
6618    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
6619    default:
6620      assert(0 && "Invalid vector element type for padd optimization.");
6621  }
6622
6623  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
6624                            widenType, &Ops[0], Ops.size());
6625  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
6626}
6627
6628/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
6629/// operands N0 and N1.  This is a helper for PerformADDCombine that is
6630/// called with the default operands, and if that fails, with commuted
6631/// operands.
6632static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
6633                                          TargetLowering::DAGCombinerInfo &DCI,
6634                                          const ARMSubtarget *Subtarget){
6635
6636  // Attempt to create vpaddl for this add.
6637  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
6638  if (Result.getNode())
6639    return Result;
6640
6641  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
6642  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
6643    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
6644    if (Result.getNode()) return Result;
6645  }
6646  return SDValue();
6647}
6648
6649/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
6650///
6651static SDValue PerformADDCombine(SDNode *N,
6652                                 TargetLowering::DAGCombinerInfo &DCI,
6653                                 const ARMSubtarget *Subtarget) {
6654  SDValue N0 = N->getOperand(0);
6655  SDValue N1 = N->getOperand(1);
6656
6657  // First try with the default operand order.
6658  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
6659  if (Result.getNode())
6660    return Result;
6661
6662  // If that didn't work, try again with the operands commuted.
6663  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
6664}
6665
6666/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
6667///
6668static SDValue PerformSUBCombine(SDNode *N,
6669                                 TargetLowering::DAGCombinerInfo &DCI) {
6670  SDValue N0 = N->getOperand(0);
6671  SDValue N1 = N->getOperand(1);
6672
6673  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
6674  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
6675    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
6676    if (Result.getNode()) return Result;
6677  }
6678
6679  return SDValue();
6680}
6681
6682/// PerformVMULCombine
6683/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
6684/// special multiplier accumulator forwarding.
6685///   vmul d3, d0, d2
6686///   vmla d3, d1, d2
6687/// is faster than
6688///   vadd d3, d0, d1
6689///   vmul d3, d3, d2
6690static SDValue PerformVMULCombine(SDNode *N,
6691                                  TargetLowering::DAGCombinerInfo &DCI,
6692                                  const ARMSubtarget *Subtarget) {
6693  if (!Subtarget->hasVMLxForwarding())
6694    return SDValue();
6695
6696  SelectionDAG &DAG = DCI.DAG;
6697  SDValue N0 = N->getOperand(0);
6698  SDValue N1 = N->getOperand(1);
6699  unsigned Opcode = N0.getOpcode();
6700  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
6701      Opcode != ISD::FADD && Opcode != ISD::FSUB) {
6702    Opcode = N1.getOpcode();
6703    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
6704        Opcode != ISD::FADD && Opcode != ISD::FSUB)
6705      return SDValue();
6706    std::swap(N0, N1);
6707  }
6708
6709  EVT VT = N->getValueType(0);
6710  DebugLoc DL = N->getDebugLoc();
6711  SDValue N00 = N0->getOperand(0);
6712  SDValue N01 = N0->getOperand(1);
6713  return DAG.getNode(Opcode, DL, VT,
6714                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),
6715                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));
6716}
6717
6718static SDValue PerformMULCombine(SDNode *N,
6719                                 TargetLowering::DAGCombinerInfo &DCI,
6720                                 const ARMSubtarget *Subtarget) {
6721  SelectionDAG &DAG = DCI.DAG;
6722
6723  if (Subtarget->isThumb1Only())
6724    return SDValue();
6725
6726  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
6727    return SDValue();
6728
6729  EVT VT = N->getValueType(0);
6730  if (VT.is64BitVector() || VT.is128BitVector())
6731    return PerformVMULCombine(N, DCI, Subtarget);
6732  if (VT != MVT::i32)
6733    return SDValue();
6734
6735  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
6736  if (!C)
6737    return SDValue();
6738
6739  uint64_t MulAmt = C->getZExtValue();
6740  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
6741  ShiftAmt = ShiftAmt & (32 - 1);
6742  SDValue V = N->getOperand(0);
6743  DebugLoc DL = N->getDebugLoc();
6744
6745  SDValue Res;
6746  MulAmt >>= ShiftAmt;
6747  if (isPowerOf2_32(MulAmt - 1)) {
6748    // (mul x, 2^N + 1) => (add (shl x, N), x)
6749    Res = DAG.getNode(ISD::ADD, DL, VT,
6750                      V, DAG.getNode(ISD::SHL, DL, VT,
6751                                     V, DAG.getConstant(Log2_32(MulAmt-1),
6752                                                        MVT::i32)));
6753  } else if (isPowerOf2_32(MulAmt + 1)) {
6754    // (mul x, 2^N - 1) => (sub (shl x, N), x)
6755    Res = DAG.getNode(ISD::SUB, DL, VT,
6756                      DAG.getNode(ISD::SHL, DL, VT,
6757                                  V, DAG.getConstant(Log2_32(MulAmt+1),
6758                                                     MVT::i32)),
6759                                                     V);
6760  } else
6761    return SDValue();
6762
6763  if (ShiftAmt != 0)
6764    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
6765                      DAG.getConstant(ShiftAmt, MVT::i32));
6766
6767  // Do not add new nodes to DAG combiner worklist.
6768  DCI.CombineTo(N, Res, false);
6769  return SDValue();
6770}
6771
6772static SDValue PerformANDCombine(SDNode *N,
6773                                TargetLowering::DAGCombinerInfo &DCI) {
6774
6775  // Attempt to use immediate-form VBIC
6776  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
6777  DebugLoc dl = N->getDebugLoc();
6778  EVT VT = N->getValueType(0);
6779  SelectionDAG &DAG = DCI.DAG;
6780
6781  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
6782    return SDValue();
6783
6784  APInt SplatBits, SplatUndef;
6785  unsigned SplatBitSize;
6786  bool HasAnyUndefs;
6787  if (BVN &&
6788      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6789    if (SplatBitSize <= 64) {
6790      EVT VbicVT;
6791      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
6792                                      SplatUndef.getZExtValue(), SplatBitSize,
6793                                      DAG, VbicVT, VT.is128BitVector(),
6794                                      OtherModImm);
6795      if (Val.getNode()) {
6796        SDValue Input =
6797          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
6798        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
6799        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
6800      }
6801    }
6802  }
6803
6804  return SDValue();
6805}
6806
6807/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
6808static SDValue PerformORCombine(SDNode *N,
6809                                TargetLowering::DAGCombinerInfo &DCI,
6810                                const ARMSubtarget *Subtarget) {
6811  // Attempt to use immediate-form VORR
6812  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
6813  DebugLoc dl = N->getDebugLoc();
6814  EVT VT = N->getValueType(0);
6815  SelectionDAG &DAG = DCI.DAG;
6816
6817  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
6818    return SDValue();
6819
6820  APInt SplatBits, SplatUndef;
6821  unsigned SplatBitSize;
6822  bool HasAnyUndefs;
6823  if (BVN && Subtarget->hasNEON() &&
6824      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6825    if (SplatBitSize <= 64) {
6826      EVT VorrVT;
6827      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6828                                      SplatUndef.getZExtValue(), SplatBitSize,
6829                                      DAG, VorrVT, VT.is128BitVector(),
6830                                      OtherModImm);
6831      if (Val.getNode()) {
6832        SDValue Input =
6833          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
6834        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
6835        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
6836      }
6837    }
6838  }
6839
6840  SDValue N0 = N->getOperand(0);
6841  if (N0.getOpcode() != ISD::AND)
6842    return SDValue();
6843  SDValue N1 = N->getOperand(1);
6844
6845  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
6846  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
6847      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
6848    APInt SplatUndef;
6849    unsigned SplatBitSize;
6850    bool HasAnyUndefs;
6851
6852    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
6853    APInt SplatBits0;
6854    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
6855                                  HasAnyUndefs) && !HasAnyUndefs) {
6856      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
6857      APInt SplatBits1;
6858      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
6859                                    HasAnyUndefs) && !HasAnyUndefs &&
6860          SplatBits0 == ~SplatBits1) {
6861        // Canonicalize the vector type to make instruction selection simpler.
6862        EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6863        SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
6864                                     N0->getOperand(1), N0->getOperand(0),
6865                                     N1->getOperand(0));
6866        return DAG.getNode(ISD::BITCAST, dl, VT, Result);
6867      }
6868    }
6869  }
6870
6871  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
6872  // reasonable.
6873
6874  // BFI is only available on V6T2+
6875  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
6876    return SDValue();
6877
6878  DebugLoc DL = N->getDebugLoc();
6879  // 1) or (and A, mask), val => ARMbfi A, val, mask
6880  //      iff (val & mask) == val
6881  //
6882  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
6883  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
6884  //          && mask == ~mask2
6885  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
6886  //          && ~mask == mask2
6887  //  (i.e., copy a bitfield value into another bitfield of the same width)
6888
6889  if (VT != MVT::i32)
6890    return SDValue();
6891
6892  SDValue N00 = N0.getOperand(0);
6893
6894  // The value and the mask need to be constants so we can verify this is
6895  // actually a bitfield set. If the mask is 0xffff, we can do better
6896  // via a movt instruction, so don't use BFI in that case.
6897  SDValue MaskOp = N0.getOperand(1);
6898  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
6899  if (!MaskC)
6900    return SDValue();
6901  unsigned Mask = MaskC->getZExtValue();
6902  if (Mask == 0xffff)
6903    return SDValue();
6904  SDValue Res;
6905  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
6906  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6907  if (N1C) {
6908    unsigned Val = N1C->getZExtValue();
6909    if ((Val & ~Mask) != Val)
6910      return SDValue();
6911
6912    if (ARM::isBitFieldInvertedMask(Mask)) {
6913      Val >>= CountTrailingZeros_32(~Mask);
6914
6915      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
6916                        DAG.getConstant(Val, MVT::i32),
6917                        DAG.getConstant(Mask, MVT::i32));
6918
6919      // Do not add new nodes to DAG combiner worklist.
6920      DCI.CombineTo(N, Res, false);
6921      return SDValue();
6922    }
6923  } else if (N1.getOpcode() == ISD::AND) {
6924    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
6925    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6926    if (!N11C)
6927      return SDValue();
6928    unsigned Mask2 = N11C->getZExtValue();
6929
6930    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
6931    // as is to match.
6932    if (ARM::isBitFieldInvertedMask(Mask) &&
6933        (Mask == ~Mask2)) {
6934      // The pack halfword instruction works better for masks that fit it,
6935      // so use that when it's available.
6936      if (Subtarget->hasT2ExtractPack() &&
6937          (Mask == 0xffff || Mask == 0xffff0000))
6938        return SDValue();
6939      // 2a
6940      unsigned amt = CountTrailingZeros_32(Mask2);
6941      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
6942                        DAG.getConstant(amt, MVT::i32));
6943      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
6944                        DAG.getConstant(Mask, MVT::i32));
6945      // Do not add new nodes to DAG combiner worklist.
6946      DCI.CombineTo(N, Res, false);
6947      return SDValue();
6948    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
6949               (~Mask == Mask2)) {
6950      // The pack halfword instruction works better for masks that fit it,
6951      // so use that when it's available.
6952      if (Subtarget->hasT2ExtractPack() &&
6953          (Mask2 == 0xffff || Mask2 == 0xffff0000))
6954        return SDValue();
6955      // 2b
6956      unsigned lsb = CountTrailingZeros_32(Mask);
6957      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
6958                        DAG.getConstant(lsb, MVT::i32));
6959      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
6960                        DAG.getConstant(Mask2, MVT::i32));
6961      // Do not add new nodes to DAG combiner worklist.
6962      DCI.CombineTo(N, Res, false);
6963      return SDValue();
6964    }
6965  }
6966
6967  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
6968      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
6969      ARM::isBitFieldInvertedMask(~Mask)) {
6970    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
6971    // where lsb(mask) == #shamt and masked bits of B are known zero.
6972    SDValue ShAmt = N00.getOperand(1);
6973    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6974    unsigned LSB = CountTrailingZeros_32(Mask);
6975    if (ShAmtC != LSB)
6976      return SDValue();
6977
6978    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
6979                      DAG.getConstant(~Mask, MVT::i32));
6980
6981    // Do not add new nodes to DAG combiner worklist.
6982    DCI.CombineTo(N, Res, false);
6983  }
6984
6985  return SDValue();
6986}
6987
6988/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
6989/// the bits being cleared by the AND are not demanded by the BFI.
6990static SDValue PerformBFICombine(SDNode *N,
6991                                 TargetLowering::DAGCombinerInfo &DCI) {
6992  SDValue N1 = N->getOperand(1);
6993  if (N1.getOpcode() == ISD::AND) {
6994    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6995    if (!N11C)
6996      return SDValue();
6997    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
6998    unsigned LSB = CountTrailingZeros_32(~InvMask);
6999    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
7000    unsigned Mask = (1 << Width)-1;
7001    unsigned Mask2 = N11C->getZExtValue();
7002    if ((Mask & (~Mask2)) == 0)
7003      return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
7004                             N->getOperand(0), N1.getOperand(0),
7005                             N->getOperand(2));
7006  }
7007  return SDValue();
7008}
7009
7010/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
7011/// ARMISD::VMOVRRD.
7012static SDValue PerformVMOVRRDCombine(SDNode *N,
7013                                     TargetLowering::DAGCombinerInfo &DCI) {
7014  // vmovrrd(vmovdrr x, y) -> x,y
7015  SDValue InDouble = N->getOperand(0);
7016  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
7017    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
7018
7019  // vmovrrd(load f64) -> (load i32), (load i32)
7020  SDNode *InNode = InDouble.getNode();
7021  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
7022      InNode->getValueType(0) == MVT::f64 &&
7023      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
7024      !cast<LoadSDNode>(InNode)->isVolatile()) {
7025    // TODO: Should this be done for non-FrameIndex operands?
7026    LoadSDNode *LD = cast<LoadSDNode>(InNode);
7027
7028    SelectionDAG &DAG = DCI.DAG;
7029    DebugLoc DL = LD->getDebugLoc();
7030    SDValue BasePtr = LD->getBasePtr();
7031    SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
7032                                 LD->getPointerInfo(), LD->isVolatile(),
7033                                 LD->isNonTemporal(), LD->getAlignment());
7034
7035    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
7036                                    DAG.getConstant(4, MVT::i32));
7037    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
7038                                 LD->getPointerInfo(), LD->isVolatile(),
7039                                 LD->isNonTemporal(),
7040                                 std::min(4U, LD->getAlignment() / 2));
7041
7042    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
7043    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
7044    DCI.RemoveFromWorklist(LD);
7045    DAG.DeleteNode(LD);
7046    return Result;
7047  }
7048
7049  return SDValue();
7050}
7051
7052/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
7053/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
7054static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
7055  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
7056  SDValue Op0 = N->getOperand(0);
7057  SDValue Op1 = N->getOperand(1);
7058  if (Op0.getOpcode() == ISD::BITCAST)
7059    Op0 = Op0.getOperand(0);
7060  if (Op1.getOpcode() == ISD::BITCAST)
7061    Op1 = Op1.getOperand(0);
7062  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
7063      Op0.getNode() == Op1.getNode() &&
7064      Op0.getResNo() == 0 && Op1.getResNo() == 1)
7065    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
7066                       N->getValueType(0), Op0.getOperand(0));
7067  return SDValue();
7068}
7069
7070/// PerformSTORECombine - Target-specific dag combine xforms for
7071/// ISD::STORE.
7072static SDValue PerformSTORECombine(SDNode *N,
7073                                   TargetLowering::DAGCombinerInfo &DCI) {
7074  // Bitcast an i64 store extracted from a vector to f64.
7075  // Otherwise, the i64 value will be legalized to a pair of i32 values.
7076  StoreSDNode *St = cast<StoreSDNode>(N);
7077  SDValue StVal = St->getValue();
7078  if (!ISD::isNormalStore(St) || St->isVolatile())
7079    return SDValue();
7080
7081  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
7082      StVal.getNode()->hasOneUse() && !St->isVolatile()) {
7083    SelectionDAG  &DAG = DCI.DAG;
7084    DebugLoc DL = St->getDebugLoc();
7085    SDValue BasePtr = St->getBasePtr();
7086    SDValue NewST1 = DAG.getStore(St->getChain(), DL,
7087                                  StVal.getNode()->getOperand(0), BasePtr,
7088                                  St->getPointerInfo(), St->isVolatile(),
7089                                  St->isNonTemporal(), St->getAlignment());
7090
7091    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
7092                                    DAG.getConstant(4, MVT::i32));
7093    return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
7094                        OffsetPtr, St->getPointerInfo(), St->isVolatile(),
7095                        St->isNonTemporal(),
7096                        std::min(4U, St->getAlignment() / 2));
7097  }
7098
7099  if (StVal.getValueType() != MVT::i64 ||
7100      StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7101    return SDValue();
7102
7103  SelectionDAG &DAG = DCI.DAG;
7104  DebugLoc dl = StVal.getDebugLoc();
7105  SDValue IntVec = StVal.getOperand(0);
7106  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
7107                                 IntVec.getValueType().getVectorNumElements());
7108  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
7109  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
7110                               Vec, StVal.getOperand(1));
7111  dl = N->getDebugLoc();
7112  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
7113  // Make the DAGCombiner fold the bitcasts.
7114  DCI.AddToWorklist(Vec.getNode());
7115  DCI.AddToWorklist(ExtElt.getNode());
7116  DCI.AddToWorklist(V.getNode());
7117  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
7118                      St->getPointerInfo(), St->isVolatile(),
7119                      St->isNonTemporal(), St->getAlignment(),
7120                      St->getTBAAInfo());
7121}
7122
7123/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
7124/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
7125/// i64 vector to have f64 elements, since the value can then be loaded
7126/// directly into a VFP register.
7127static bool hasNormalLoadOperand(SDNode *N) {
7128  unsigned NumElts = N->getValueType(0).getVectorNumElements();
7129  for (unsigned i = 0; i < NumElts; ++i) {
7130    SDNode *Elt = N->getOperand(i).getNode();
7131    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
7132      return true;
7133  }
7134  return false;
7135}
7136
7137/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
7138/// ISD::BUILD_VECTOR.
7139static SDValue PerformBUILD_VECTORCombine(SDNode *N,
7140                                          TargetLowering::DAGCombinerInfo &DCI){
7141  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
7142  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
7143  // into a pair of GPRs, which is fine when the value is used as a scalar,
7144  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
7145  SelectionDAG &DAG = DCI.DAG;
7146  if (N->getNumOperands() == 2) {
7147    SDValue RV = PerformVMOVDRRCombine(N, DAG);
7148    if (RV.getNode())
7149      return RV;
7150  }
7151
7152  // Load i64 elements as f64 values so that type legalization does not split
7153  // them up into i32 values.
7154  EVT VT = N->getValueType(0);
7155  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
7156    return SDValue();
7157  DebugLoc dl = N->getDebugLoc();
7158  SmallVector<SDValue, 8> Ops;
7159  unsigned NumElts = VT.getVectorNumElements();
7160  for (unsigned i = 0; i < NumElts; ++i) {
7161    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
7162    Ops.push_back(V);
7163    // Make the DAGCombiner fold the bitcast.
7164    DCI.AddToWorklist(V.getNode());
7165  }
7166  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
7167  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
7168  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
7169}
7170
7171/// PerformInsertEltCombine - Target-specific dag combine xforms for
7172/// ISD::INSERT_VECTOR_ELT.
7173static SDValue PerformInsertEltCombine(SDNode *N,
7174                                       TargetLowering::DAGCombinerInfo &DCI) {
7175  // Bitcast an i64 load inserted into a vector to f64.
7176  // Otherwise, the i64 value will be legalized to a pair of i32 values.
7177  EVT VT = N->getValueType(0);
7178  SDNode *Elt = N->getOperand(1).getNode();
7179  if (VT.getVectorElementType() != MVT::i64 ||
7180      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
7181    return SDValue();
7182
7183  SelectionDAG &DAG = DCI.DAG;
7184  DebugLoc dl = N->getDebugLoc();
7185  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
7186                                 VT.getVectorNumElements());
7187  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
7188  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
7189  // Make the DAGCombiner fold the bitcasts.
7190  DCI.AddToWorklist(Vec.getNode());
7191  DCI.AddToWorklist(V.getNode());
7192  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
7193                               Vec, V, N->getOperand(2));
7194  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
7195}
7196
7197/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
7198/// ISD::VECTOR_SHUFFLE.
7199static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
7200  // The LLVM shufflevector instruction does not require the shuffle mask
7201  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
7202  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
7203  // operands do not match the mask length, they are extended by concatenating
7204  // them with undef vectors.  That is probably the right thing for other
7205  // targets, but for NEON it is better to concatenate two double-register
7206  // size vector operands into a single quad-register size vector.  Do that
7207  // transformation here:
7208  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
7209  //   shuffle(concat(v1, v2), undef)
7210  SDValue Op0 = N->getOperand(0);
7211  SDValue Op1 = N->getOperand(1);
7212  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
7213      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
7214      Op0.getNumOperands() != 2 ||
7215      Op1.getNumOperands() != 2)
7216    return SDValue();
7217  SDValue Concat0Op1 = Op0.getOperand(1);
7218  SDValue Concat1Op1 = Op1.getOperand(1);
7219  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
7220      Concat1Op1.getOpcode() != ISD::UNDEF)
7221    return SDValue();
7222  // Skip the transformation if any of the types are illegal.
7223  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7224  EVT VT = N->getValueType(0);
7225  if (!TLI.isTypeLegal(VT) ||
7226      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
7227      !TLI.isTypeLegal(Concat1Op1.getValueType()))
7228    return SDValue();
7229
7230  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
7231                                  Op0.getOperand(0), Op1.getOperand(0));
7232  // Translate the shuffle mask.
7233  SmallVector<int, 16> NewMask;
7234  unsigned NumElts = VT.getVectorNumElements();
7235  unsigned HalfElts = NumElts/2;
7236  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7237  for (unsigned n = 0; n < NumElts; ++n) {
7238    int MaskElt = SVN->getMaskElt(n);
7239    int NewElt = -1;
7240    if (MaskElt < (int)HalfElts)
7241      NewElt = MaskElt;
7242    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
7243      NewElt = HalfElts + MaskElt - NumElts;
7244    NewMask.push_back(NewElt);
7245  }
7246  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
7247                              DAG.getUNDEF(VT), NewMask.data());
7248}
7249
7250/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
7251/// NEON load/store intrinsics to merge base address updates.
7252static SDValue CombineBaseUpdate(SDNode *N,
7253                                 TargetLowering::DAGCombinerInfo &DCI) {
7254  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
7255    return SDValue();
7256
7257  SelectionDAG &DAG = DCI.DAG;
7258  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
7259                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
7260  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
7261  SDValue Addr = N->getOperand(AddrOpIdx);
7262
7263  // Search for a use of the address operand that is an increment.
7264  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
7265         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
7266    SDNode *User = *UI;
7267    if (User->getOpcode() != ISD::ADD ||
7268        UI.getUse().getResNo() != Addr.getResNo())
7269      continue;
7270
7271    // Check that the add is independent of the load/store.  Otherwise, folding
7272    // it would create a cycle.
7273    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
7274      continue;
7275
7276    // Find the new opcode for the updating load/store.
7277    bool isLoad = true;
7278    bool isLaneOp = false;
7279    unsigned NewOpc = 0;
7280    unsigned NumVecs = 0;
7281    if (isIntrinsic) {
7282      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7283      switch (IntNo) {
7284      default: assert(0 && "unexpected intrinsic for Neon base update");
7285      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
7286        NumVecs = 1; break;
7287      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
7288        NumVecs = 2; break;
7289      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
7290        NumVecs = 3; break;
7291      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
7292        NumVecs = 4; break;
7293      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
7294        NumVecs = 2; isLaneOp = true; break;
7295      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
7296        NumVecs = 3; isLaneOp = true; break;
7297      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
7298        NumVecs = 4; isLaneOp = true; break;
7299      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
7300        NumVecs = 1; isLoad = false; break;
7301      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
7302        NumVecs = 2; isLoad = false; break;
7303      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
7304        NumVecs = 3; isLoad = false; break;
7305      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
7306        NumVecs = 4; isLoad = false; break;
7307      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
7308        NumVecs = 2; isLoad = false; isLaneOp = true; break;
7309      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
7310        NumVecs = 3; isLoad = false; isLaneOp = true; break;
7311      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
7312        NumVecs = 4; isLoad = false; isLaneOp = true; break;
7313      }
7314    } else {
7315      isLaneOp = true;
7316      switch (N->getOpcode()) {
7317      default: assert(0 && "unexpected opcode for Neon base update");
7318      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
7319      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
7320      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
7321      }
7322    }
7323
7324    // Find the size of memory referenced by the load/store.
7325    EVT VecTy;
7326    if (isLoad)
7327      VecTy = N->getValueType(0);
7328    else
7329      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
7330    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
7331    if (isLaneOp)
7332      NumBytes /= VecTy.getVectorNumElements();
7333
7334    // If the increment is a constant, it must match the memory ref size.
7335    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
7336    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
7337      uint64_t IncVal = CInc->getZExtValue();
7338      if (IncVal != NumBytes)
7339        continue;
7340    } else if (NumBytes >= 3 * 16) {
7341      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
7342      // separate instructions that make it harder to use a non-constant update.
7343      continue;
7344    }
7345
7346    // Create the new updating load/store node.
7347    EVT Tys[6];
7348    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
7349    unsigned n;
7350    for (n = 0; n < NumResultVecs; ++n)
7351      Tys[n] = VecTy;
7352    Tys[n++] = MVT::i32;
7353    Tys[n] = MVT::Other;
7354    SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
7355    SmallVector<SDValue, 8> Ops;
7356    Ops.push_back(N->getOperand(0)); // incoming chain
7357    Ops.push_back(N->getOperand(AddrOpIdx));
7358    Ops.push_back(Inc);
7359    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
7360      Ops.push_back(N->getOperand(i));
7361    }
7362    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
7363    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
7364                                           Ops.data(), Ops.size(),
7365                                           MemInt->getMemoryVT(),
7366                                           MemInt->getMemOperand());
7367
7368    // Update the uses.
7369    std::vector<SDValue> NewResults;
7370    for (unsigned i = 0; i < NumResultVecs; ++i) {
7371      NewResults.push_back(SDValue(UpdN.getNode(), i));
7372    }
7373    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
7374    DCI.CombineTo(N, NewResults);
7375    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
7376
7377    break;
7378  }
7379  return SDValue();
7380}
7381
7382/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
7383/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
7384/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
7385/// return true.
7386static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
7387  SelectionDAG &DAG = DCI.DAG;
7388  EVT VT = N->getValueType(0);
7389  // vldN-dup instructions only support 64-bit vectors for N > 1.
7390  if (!VT.is64BitVector())
7391    return false;
7392
7393  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
7394  SDNode *VLD = N->getOperand(0).getNode();
7395  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
7396    return false;
7397  unsigned NumVecs = 0;
7398  unsigned NewOpc = 0;
7399  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
7400  if (IntNo == Intrinsic::arm_neon_vld2lane) {
7401    NumVecs = 2;
7402    NewOpc = ARMISD::VLD2DUP;
7403  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
7404    NumVecs = 3;
7405    NewOpc = ARMISD::VLD3DUP;
7406  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
7407    NumVecs = 4;
7408    NewOpc = ARMISD::VLD4DUP;
7409  } else {
7410    return false;
7411  }
7412
7413  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
7414  // numbers match the load.
7415  unsigned VLDLaneNo =
7416    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
7417  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
7418       UI != UE; ++UI) {
7419    // Ignore uses of the chain result.
7420    if (UI.getUse().getResNo() == NumVecs)
7421      continue;
7422    SDNode *User = *UI;
7423    if (User->getOpcode() != ARMISD::VDUPLANE ||
7424        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
7425      return false;
7426  }
7427
7428  // Create the vldN-dup node.
7429  EVT Tys[5];
7430  unsigned n;
7431  for (n = 0; n < NumVecs; ++n)
7432    Tys[n] = VT;
7433  Tys[n] = MVT::Other;
7434  SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
7435  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
7436  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
7437  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
7438                                           Ops, 2, VLDMemInt->getMemoryVT(),
7439                                           VLDMemInt->getMemOperand());
7440
7441  // Update the uses.
7442  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
7443       UI != UE; ++UI) {
7444    unsigned ResNo = UI.getUse().getResNo();
7445    // Ignore uses of the chain result.
7446    if (ResNo == NumVecs)
7447      continue;
7448    SDNode *User = *UI;
7449    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
7450  }
7451
7452  // Now the vldN-lane intrinsic is dead except for its chain result.
7453  // Update uses of the chain.
7454  std::vector<SDValue> VLDDupResults;
7455  for (unsigned n = 0; n < NumVecs; ++n)
7456    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
7457  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
7458  DCI.CombineTo(VLD, VLDDupResults);
7459
7460  return true;
7461}
7462
7463/// PerformVDUPLANECombine - Target-specific dag combine xforms for
7464/// ARMISD::VDUPLANE.
7465static SDValue PerformVDUPLANECombine(SDNode *N,
7466                                      TargetLowering::DAGCombinerInfo &DCI) {
7467  SDValue Op = N->getOperand(0);
7468
7469  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
7470  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
7471  if (CombineVLDDUP(N, DCI))
7472    return SDValue(N, 0);
7473
7474  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
7475  // redundant.  Ignore bit_converts for now; element sizes are checked below.
7476  while (Op.getOpcode() == ISD::BITCAST)
7477    Op = Op.getOperand(0);
7478  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
7479    return SDValue();
7480
7481  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
7482  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
7483  // The canonical VMOV for a zero vector uses a 32-bit element size.
7484  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7485  unsigned EltBits;
7486  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
7487    EltSize = 8;
7488  EVT VT = N->getValueType(0);
7489  if (EltSize > VT.getVectorElementType().getSizeInBits())
7490    return SDValue();
7491
7492  return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
7493}
7494
7495// isConstVecPow2 - Return true if each vector element is a power of 2, all
7496// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
7497static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
7498{
7499  integerPart cN;
7500  integerPart c0 = 0;
7501  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
7502       I != E; I++) {
7503    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
7504    if (!C)
7505      return false;
7506
7507    bool isExact;
7508    APFloat APF = C->getValueAPF();
7509    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
7510        != APFloat::opOK || !isExact)
7511      return false;
7512
7513    c0 = (I == 0) ? cN : c0;
7514    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
7515      return false;
7516  }
7517  C = c0;
7518  return true;
7519}
7520
7521/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
7522/// can replace combinations of VMUL and VCVT (floating-point to integer)
7523/// when the VMUL has a constant operand that is a power of 2.
7524///
7525/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
7526///  vmul.f32        d16, d17, d16
7527///  vcvt.s32.f32    d16, d16
7528/// becomes:
7529///  vcvt.s32.f32    d16, d16, #3
7530static SDValue PerformVCVTCombine(SDNode *N,
7531                                  TargetLowering::DAGCombinerInfo &DCI,
7532                                  const ARMSubtarget *Subtarget) {
7533  SelectionDAG &DAG = DCI.DAG;
7534  SDValue Op = N->getOperand(0);
7535
7536  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
7537      Op.getOpcode() != ISD::FMUL)
7538    return SDValue();
7539
7540  uint64_t C;
7541  SDValue N0 = Op->getOperand(0);
7542  SDValue ConstVec = Op->getOperand(1);
7543  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
7544
7545  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
7546      !isConstVecPow2(ConstVec, isSigned, C))
7547    return SDValue();
7548
7549  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
7550    Intrinsic::arm_neon_vcvtfp2fxu;
7551  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
7552                     N->getValueType(0),
7553                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
7554                     DAG.getConstant(Log2_64(C), MVT::i32));
7555}
7556
7557/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
7558/// can replace combinations of VCVT (integer to floating-point) and VDIV
7559/// when the VDIV has a constant operand that is a power of 2.
7560///
7561/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
7562///  vcvt.f32.s32    d16, d16
7563///  vdiv.f32        d16, d17, d16
7564/// becomes:
7565///  vcvt.f32.s32    d16, d16, #3
7566static SDValue PerformVDIVCombine(SDNode *N,
7567                                  TargetLowering::DAGCombinerInfo &DCI,
7568                                  const ARMSubtarget *Subtarget) {
7569  SelectionDAG &DAG = DCI.DAG;
7570  SDValue Op = N->getOperand(0);
7571  unsigned OpOpcode = Op.getNode()->getOpcode();
7572
7573  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
7574      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
7575    return SDValue();
7576
7577  uint64_t C;
7578  SDValue ConstVec = N->getOperand(1);
7579  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
7580
7581  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
7582      !isConstVecPow2(ConstVec, isSigned, C))
7583    return SDValue();
7584
7585  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
7586    Intrinsic::arm_neon_vcvtfxu2fp;
7587  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
7588                     Op.getValueType(),
7589                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
7590                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
7591}
7592
7593/// Getvshiftimm - Check if this is a valid build_vector for the immediate
7594/// operand of a vector shift operation, where all the elements of the
7595/// build_vector must have the same constant integer value.
7596static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
7597  // Ignore bit_converts.
7598  while (Op.getOpcode() == ISD::BITCAST)
7599    Op = Op.getOperand(0);
7600  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7601  APInt SplatBits, SplatUndef;
7602  unsigned SplatBitSize;
7603  bool HasAnyUndefs;
7604  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
7605                                      HasAnyUndefs, ElementBits) ||
7606      SplatBitSize > ElementBits)
7607    return false;
7608  Cnt = SplatBits.getSExtValue();
7609  return true;
7610}
7611
7612/// isVShiftLImm - Check if this is a valid build_vector for the immediate
7613/// operand of a vector shift left operation.  That value must be in the range:
7614///   0 <= Value < ElementBits for a left shift; or
7615///   0 <= Value <= ElementBits for a long left shift.
7616static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
7617  assert(VT.isVector() && "vector shift count is not a vector type");
7618  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
7619  if (! getVShiftImm(Op, ElementBits, Cnt))
7620    return false;
7621  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
7622}
7623
7624/// isVShiftRImm - Check if this is a valid build_vector for the immediate
7625/// operand of a vector shift right operation.  For a shift opcode, the value
7626/// is positive, but for an intrinsic the value count must be negative. The
7627/// absolute value must be in the range:
7628///   1 <= |Value| <= ElementBits for a right shift; or
7629///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
7630static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
7631                         int64_t &Cnt) {
7632  assert(VT.isVector() && "vector shift count is not a vector type");
7633  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
7634  if (! getVShiftImm(Op, ElementBits, Cnt))
7635    return false;
7636  if (isIntrinsic)
7637    Cnt = -Cnt;
7638  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
7639}
7640
7641/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
7642static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
7643  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
7644  switch (IntNo) {
7645  default:
7646    // Don't do anything for most intrinsics.
7647    break;
7648
7649  // Vector shifts: check for immediate versions and lower them.
7650  // Note: This is done during DAG combining instead of DAG legalizing because
7651  // the build_vectors for 64-bit vector element shift counts are generally
7652  // not legal, and it is hard to see their values after they get legalized to
7653  // loads from a constant pool.
7654  case Intrinsic::arm_neon_vshifts:
7655  case Intrinsic::arm_neon_vshiftu:
7656  case Intrinsic::arm_neon_vshiftls:
7657  case Intrinsic::arm_neon_vshiftlu:
7658  case Intrinsic::arm_neon_vshiftn:
7659  case Intrinsic::arm_neon_vrshifts:
7660  case Intrinsic::arm_neon_vrshiftu:
7661  case Intrinsic::arm_neon_vrshiftn:
7662  case Intrinsic::arm_neon_vqshifts:
7663  case Intrinsic::arm_neon_vqshiftu:
7664  case Intrinsic::arm_neon_vqshiftsu:
7665  case Intrinsic::arm_neon_vqshiftns:
7666  case Intrinsic::arm_neon_vqshiftnu:
7667  case Intrinsic::arm_neon_vqshiftnsu:
7668  case Intrinsic::arm_neon_vqrshiftns:
7669  case Intrinsic::arm_neon_vqrshiftnu:
7670  case Intrinsic::arm_neon_vqrshiftnsu: {
7671    EVT VT = N->getOperand(1).getValueType();
7672    int64_t Cnt;
7673    unsigned VShiftOpc = 0;
7674
7675    switch (IntNo) {
7676    case Intrinsic::arm_neon_vshifts:
7677    case Intrinsic::arm_neon_vshiftu:
7678      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
7679        VShiftOpc = ARMISD::VSHL;
7680        break;
7681      }
7682      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
7683        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
7684                     ARMISD::VSHRs : ARMISD::VSHRu);
7685        break;
7686      }
7687      return SDValue();
7688
7689    case Intrinsic::arm_neon_vshiftls:
7690    case Intrinsic::arm_neon_vshiftlu:
7691      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
7692        break;
7693      llvm_unreachable("invalid shift count for vshll intrinsic");
7694
7695    case Intrinsic::arm_neon_vrshifts:
7696    case Intrinsic::arm_neon_vrshiftu:
7697      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
7698        break;
7699      return SDValue();
7700
7701    case Intrinsic::arm_neon_vqshifts:
7702    case Intrinsic::arm_neon_vqshiftu:
7703      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
7704        break;
7705      return SDValue();
7706
7707    case Intrinsic::arm_neon_vqshiftsu:
7708      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
7709        break;
7710      llvm_unreachable("invalid shift count for vqshlu intrinsic");
7711
7712    case Intrinsic::arm_neon_vshiftn:
7713    case Intrinsic::arm_neon_vrshiftn:
7714    case Intrinsic::arm_neon_vqshiftns:
7715    case Intrinsic::arm_neon_vqshiftnu:
7716    case Intrinsic::arm_neon_vqshiftnsu:
7717    case Intrinsic::arm_neon_vqrshiftns:
7718    case Intrinsic::arm_neon_vqrshiftnu:
7719    case Intrinsic::arm_neon_vqrshiftnsu:
7720      // Narrowing shifts require an immediate right shift.
7721      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
7722        break;
7723      llvm_unreachable("invalid shift count for narrowing vector shift "
7724                       "intrinsic");
7725
7726    default:
7727      llvm_unreachable("unhandled vector shift");
7728    }
7729
7730    switch (IntNo) {
7731    case Intrinsic::arm_neon_vshifts:
7732    case Intrinsic::arm_neon_vshiftu:
7733      // Opcode already set above.
7734      break;
7735    case Intrinsic::arm_neon_vshiftls:
7736    case Intrinsic::arm_neon_vshiftlu:
7737      if (Cnt == VT.getVectorElementType().getSizeInBits())
7738        VShiftOpc = ARMISD::VSHLLi;
7739      else
7740        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
7741                     ARMISD::VSHLLs : ARMISD::VSHLLu);
7742      break;
7743    case Intrinsic::arm_neon_vshiftn:
7744      VShiftOpc = ARMISD::VSHRN; break;
7745    case Intrinsic::arm_neon_vrshifts:
7746      VShiftOpc = ARMISD::VRSHRs; break;
7747    case Intrinsic::arm_neon_vrshiftu:
7748      VShiftOpc = ARMISD::VRSHRu; break;
7749    case Intrinsic::arm_neon_vrshiftn:
7750      VShiftOpc = ARMISD::VRSHRN; break;
7751    case Intrinsic::arm_neon_vqshifts:
7752      VShiftOpc = ARMISD::VQSHLs; break;
7753    case Intrinsic::arm_neon_vqshiftu:
7754      VShiftOpc = ARMISD::VQSHLu; break;
7755    case Intrinsic::arm_neon_vqshiftsu:
7756      VShiftOpc = ARMISD::VQSHLsu; break;
7757    case Intrinsic::arm_neon_vqshiftns:
7758      VShiftOpc = ARMISD::VQSHRNs; break;
7759    case Intrinsic::arm_neon_vqshiftnu:
7760      VShiftOpc = ARMISD::VQSHRNu; break;
7761    case Intrinsic::arm_neon_vqshiftnsu:
7762      VShiftOpc = ARMISD::VQSHRNsu; break;
7763    case Intrinsic::arm_neon_vqrshiftns:
7764      VShiftOpc = ARMISD::VQRSHRNs; break;
7765    case Intrinsic::arm_neon_vqrshiftnu:
7766      VShiftOpc = ARMISD::VQRSHRNu; break;
7767    case Intrinsic::arm_neon_vqrshiftnsu:
7768      VShiftOpc = ARMISD::VQRSHRNsu; break;
7769    }
7770
7771    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
7772                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
7773  }
7774
7775  case Intrinsic::arm_neon_vshiftins: {
7776    EVT VT = N->getOperand(1).getValueType();
7777    int64_t Cnt;
7778    unsigned VShiftOpc = 0;
7779
7780    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
7781      VShiftOpc = ARMISD::VSLI;
7782    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
7783      VShiftOpc = ARMISD::VSRI;
7784    else {
7785      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
7786    }
7787
7788    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
7789                       N->getOperand(1), N->getOperand(2),
7790                       DAG.getConstant(Cnt, MVT::i32));
7791  }
7792
7793  case Intrinsic::arm_neon_vqrshifts:
7794  case Intrinsic::arm_neon_vqrshiftu:
7795    // No immediate versions of these to check for.
7796    break;
7797  }
7798
7799  return SDValue();
7800}
7801
7802/// PerformShiftCombine - Checks for immediate versions of vector shifts and
7803/// lowers them.  As with the vector shift intrinsics, this is done during DAG
7804/// combining instead of DAG legalizing because the build_vectors for 64-bit
7805/// vector element shift counts are generally not legal, and it is hard to see
7806/// their values after they get legalized to loads from a constant pool.
7807static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
7808                                   const ARMSubtarget *ST) {
7809  EVT VT = N->getValueType(0);
7810
7811  // Nothing to be done for scalar shifts.
7812  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7813  if (!VT.isVector() || !TLI.isTypeLegal(VT))
7814    return SDValue();
7815
7816  assert(ST->hasNEON() && "unexpected vector shift");
7817  int64_t Cnt;
7818
7819  switch (N->getOpcode()) {
7820  default: llvm_unreachable("unexpected shift opcode");
7821
7822  case ISD::SHL:
7823    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
7824      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
7825                         DAG.getConstant(Cnt, MVT::i32));
7826    break;
7827
7828  case ISD::SRA:
7829  case ISD::SRL:
7830    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
7831      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
7832                            ARMISD::VSHRs : ARMISD::VSHRu);
7833      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
7834                         DAG.getConstant(Cnt, MVT::i32));
7835    }
7836  }
7837  return SDValue();
7838}
7839
7840/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
7841/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
7842static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
7843                                    const ARMSubtarget *ST) {
7844  SDValue N0 = N->getOperand(0);
7845
7846  // Check for sign- and zero-extensions of vector extract operations of 8-
7847  // and 16-bit vector elements.  NEON supports these directly.  They are
7848  // handled during DAG combining because type legalization will promote them
7849  // to 32-bit types and it is messy to recognize the operations after that.
7850  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
7851    SDValue Vec = N0.getOperand(0);
7852    SDValue Lane = N0.getOperand(1);
7853    EVT VT = N->getValueType(0);
7854    EVT EltVT = N0.getValueType();
7855    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7856
7857    if (VT == MVT::i32 &&
7858        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
7859        TLI.isTypeLegal(Vec.getValueType()) &&
7860        isa<ConstantSDNode>(Lane)) {
7861
7862      unsigned Opc = 0;
7863      switch (N->getOpcode()) {
7864      default: llvm_unreachable("unexpected opcode");
7865      case ISD::SIGN_EXTEND:
7866        Opc = ARMISD::VGETLANEs;
7867        break;
7868      case ISD::ZERO_EXTEND:
7869      case ISD::ANY_EXTEND:
7870        Opc = ARMISD::VGETLANEu;
7871        break;
7872      }
7873      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
7874    }
7875  }
7876
7877  return SDValue();
7878}
7879
7880/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
7881/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
7882static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
7883                                       const ARMSubtarget *ST) {
7884  // If the target supports NEON, try to use vmax/vmin instructions for f32
7885  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
7886  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
7887  // a NaN; only do the transformation when it matches that behavior.
7888
7889  // For now only do this when using NEON for FP operations; if using VFP, it
7890  // is not obvious that the benefit outweighs the cost of switching to the
7891  // NEON pipeline.
7892  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
7893      N->getValueType(0) != MVT::f32)
7894    return SDValue();
7895
7896  SDValue CondLHS = N->getOperand(0);
7897  SDValue CondRHS = N->getOperand(1);
7898  SDValue LHS = N->getOperand(2);
7899  SDValue RHS = N->getOperand(3);
7900  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
7901
7902  unsigned Opcode = 0;
7903  bool IsReversed;
7904  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
7905    IsReversed = false; // x CC y ? x : y
7906  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
7907    IsReversed = true ; // x CC y ? y : x
7908  } else {
7909    return SDValue();
7910  }
7911
7912  bool IsUnordered;
7913  switch (CC) {
7914  default: break;
7915  case ISD::SETOLT:
7916  case ISD::SETOLE:
7917  case ISD::SETLT:
7918  case ISD::SETLE:
7919  case ISD::SETULT:
7920  case ISD::SETULE:
7921    // If LHS is NaN, an ordered comparison will be false and the result will
7922    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
7923    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
7924    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
7925    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
7926      break;
7927    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
7928    // will return -0, so vmin can only be used for unsafe math or if one of
7929    // the operands is known to be nonzero.
7930    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
7931        !UnsafeFPMath &&
7932        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
7933      break;
7934    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
7935    break;
7936
7937  case ISD::SETOGT:
7938  case ISD::SETOGE:
7939  case ISD::SETGT:
7940  case ISD::SETGE:
7941  case ISD::SETUGT:
7942  case ISD::SETUGE:
7943    // If LHS is NaN, an ordered comparison will be false and the result will
7944    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
7945    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
7946    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
7947    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
7948      break;
7949    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
7950    // will return +0, so vmax can only be used for unsafe math or if one of
7951    // the operands is known to be nonzero.
7952    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
7953        !UnsafeFPMath &&
7954        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
7955      break;
7956    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
7957    break;
7958  }
7959
7960  if (!Opcode)
7961    return SDValue();
7962  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
7963}
7964
7965/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
7966SDValue
7967ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
7968  SDValue Cmp = N->getOperand(4);
7969  if (Cmp.getOpcode() != ARMISD::CMPZ)
7970    // Only looking at EQ and NE cases.
7971    return SDValue();
7972
7973  EVT VT = N->getValueType(0);
7974  DebugLoc dl = N->getDebugLoc();
7975  SDValue LHS = Cmp.getOperand(0);
7976  SDValue RHS = Cmp.getOperand(1);
7977  SDValue FalseVal = N->getOperand(0);
7978  SDValue TrueVal = N->getOperand(1);
7979  SDValue ARMcc = N->getOperand(2);
7980  ARMCC::CondCodes CC =
7981    (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
7982
7983  // Simplify
7984  //   mov     r1, r0
7985  //   cmp     r1, x
7986  //   mov     r0, y
7987  //   moveq   r0, x
7988  // to
7989  //   cmp     r0, x
7990  //   movne   r0, y
7991  //
7992  //   mov     r1, r0
7993  //   cmp     r1, x
7994  //   mov     r0, x
7995  //   movne   r0, y
7996  // to
7997  //   cmp     r0, x
7998  //   movne   r0, y
7999  /// FIXME: Turn this into a target neutral optimization?
8000  SDValue Res;
8001  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
8002    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
8003                      N->getOperand(3), Cmp);
8004  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
8005    SDValue ARMcc;
8006    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
8007    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
8008                      N->getOperand(3), NewCmp);
8009  }
8010
8011  if (Res.getNode()) {
8012    APInt KnownZero, KnownOne;
8013    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
8014    DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
8015    // Capture demanded bits information that would be otherwise lost.
8016    if (KnownZero == 0xfffffffe)
8017      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8018                        DAG.getValueType(MVT::i1));
8019    else if (KnownZero == 0xffffff00)
8020      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8021                        DAG.getValueType(MVT::i8));
8022    else if (KnownZero == 0xffff0000)
8023      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8024                        DAG.getValueType(MVT::i16));
8025  }
8026
8027  return Res;
8028}
8029
8030SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
8031                                             DAGCombinerInfo &DCI) const {
8032  switch (N->getOpcode()) {
8033  default: break;
8034  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
8035  case ISD::SUB:        return PerformSUBCombine(N, DCI);
8036  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
8037  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
8038  case ISD::AND:        return PerformANDCombine(N, DCI);
8039  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
8040  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
8041  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
8042  case ISD::STORE:      return PerformSTORECombine(N, DCI);
8043  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
8044  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
8045  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
8046  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
8047  case ISD::FP_TO_SINT:
8048  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
8049  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
8050  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
8051  case ISD::SHL:
8052  case ISD::SRA:
8053  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
8054  case ISD::SIGN_EXTEND:
8055  case ISD::ZERO_EXTEND:
8056  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
8057  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
8058  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
8059  case ARMISD::VLD2DUP:
8060  case ARMISD::VLD3DUP:
8061  case ARMISD::VLD4DUP:
8062    return CombineBaseUpdate(N, DCI);
8063  case ISD::INTRINSIC_VOID:
8064  case ISD::INTRINSIC_W_CHAIN:
8065    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
8066    case Intrinsic::arm_neon_vld1:
8067    case Intrinsic::arm_neon_vld2:
8068    case Intrinsic::arm_neon_vld3:
8069    case Intrinsic::arm_neon_vld4:
8070    case Intrinsic::arm_neon_vld2lane:
8071    case Intrinsic::arm_neon_vld3lane:
8072    case Intrinsic::arm_neon_vld4lane:
8073    case Intrinsic::arm_neon_vst1:
8074    case Intrinsic::arm_neon_vst2:
8075    case Intrinsic::arm_neon_vst3:
8076    case Intrinsic::arm_neon_vst4:
8077    case Intrinsic::arm_neon_vst2lane:
8078    case Intrinsic::arm_neon_vst3lane:
8079    case Intrinsic::arm_neon_vst4lane:
8080      return CombineBaseUpdate(N, DCI);
8081    default: break;
8082    }
8083    break;
8084  }
8085  return SDValue();
8086}
8087
8088bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
8089                                                          EVT VT) const {
8090  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
8091}
8092
8093bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
8094  if (!Subtarget->allowsUnalignedMem())
8095    return false;
8096
8097  switch (VT.getSimpleVT().SimpleTy) {
8098  default:
8099    return false;
8100  case MVT::i8:
8101  case MVT::i16:
8102  case MVT::i32:
8103    return true;
8104  // FIXME: VLD1 etc with standard alignment is legal.
8105  }
8106}
8107
8108static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
8109  if (V < 0)
8110    return false;
8111
8112  unsigned Scale = 1;
8113  switch (VT.getSimpleVT().SimpleTy) {
8114  default: return false;
8115  case MVT::i1:
8116  case MVT::i8:
8117    // Scale == 1;
8118    break;
8119  case MVT::i16:
8120    // Scale == 2;
8121    Scale = 2;
8122    break;
8123  case MVT::i32:
8124    // Scale == 4;
8125    Scale = 4;
8126    break;
8127  }
8128
8129  if ((V & (Scale - 1)) != 0)
8130    return false;
8131  V /= Scale;
8132  return V == (V & ((1LL << 5) - 1));
8133}
8134
8135static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
8136                                      const ARMSubtarget *Subtarget) {
8137  bool isNeg = false;
8138  if (V < 0) {
8139    isNeg = true;
8140    V = - V;
8141  }
8142
8143  switch (VT.getSimpleVT().SimpleTy) {
8144  default: return false;
8145  case MVT::i1:
8146  case MVT::i8:
8147  case MVT::i16:
8148  case MVT::i32:
8149    // + imm12 or - imm8
8150    if (isNeg)
8151      return V == (V & ((1LL << 8) - 1));
8152    return V == (V & ((1LL << 12) - 1));
8153  case MVT::f32:
8154  case MVT::f64:
8155    // Same as ARM mode. FIXME: NEON?
8156    if (!Subtarget->hasVFP2())
8157      return false;
8158    if ((V & 3) != 0)
8159      return false;
8160    V >>= 2;
8161    return V == (V & ((1LL << 8) - 1));
8162  }
8163}
8164
8165/// isLegalAddressImmediate - Return true if the integer value can be used
8166/// as the offset of the target addressing mode for load / store of the
8167/// given type.
8168static bool isLegalAddressImmediate(int64_t V, EVT VT,
8169                                    const ARMSubtarget *Subtarget) {
8170  if (V == 0)
8171    return true;
8172
8173  if (!VT.isSimple())
8174    return false;
8175
8176  if (Subtarget->isThumb1Only())
8177    return isLegalT1AddressImmediate(V, VT);
8178  else if (Subtarget->isThumb2())
8179    return isLegalT2AddressImmediate(V, VT, Subtarget);
8180
8181  // ARM mode.
8182  if (V < 0)
8183    V = - V;
8184  switch (VT.getSimpleVT().SimpleTy) {
8185  default: return false;
8186  case MVT::i1:
8187  case MVT::i8:
8188  case MVT::i32:
8189    // +- imm12
8190    return V == (V & ((1LL << 12) - 1));
8191  case MVT::i16:
8192    // +- imm8
8193    return V == (V & ((1LL << 8) - 1));
8194  case MVT::f32:
8195  case MVT::f64:
8196    if (!Subtarget->hasVFP2()) // FIXME: NEON?
8197      return false;
8198    if ((V & 3) != 0)
8199      return false;
8200    V >>= 2;
8201    return V == (V & ((1LL << 8) - 1));
8202  }
8203}
8204
8205bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
8206                                                      EVT VT) const {
8207  int Scale = AM.Scale;
8208  if (Scale < 0)
8209    return false;
8210
8211  switch (VT.getSimpleVT().SimpleTy) {
8212  default: return false;
8213  case MVT::i1:
8214  case MVT::i8:
8215  case MVT::i16:
8216  case MVT::i32:
8217    if (Scale == 1)
8218      return true;
8219    // r + r << imm
8220    Scale = Scale & ~1;
8221    return Scale == 2 || Scale == 4 || Scale == 8;
8222  case MVT::i64:
8223    // r + r
8224    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
8225      return true;
8226    return false;
8227  case MVT::isVoid:
8228    // Note, we allow "void" uses (basically, uses that aren't loads or
8229    // stores), because arm allows folding a scale into many arithmetic
8230    // operations.  This should be made more precise and revisited later.
8231
8232    // Allow r << imm, but the imm has to be a multiple of two.
8233    if (Scale & 1) return false;
8234    return isPowerOf2_32(Scale);
8235  }
8236}
8237
8238/// isLegalAddressingMode - Return true if the addressing mode represented
8239/// by AM is legal for this target, for a load/store of the specified type.
8240bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
8241                                              Type *Ty) const {
8242  EVT VT = getValueType(Ty, true);
8243  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
8244    return false;
8245
8246  // Can never fold addr of global into load/store.
8247  if (AM.BaseGV)
8248    return false;
8249
8250  switch (AM.Scale) {
8251  case 0:  // no scale reg, must be "r+i" or "r", or "i".
8252    break;
8253  case 1:
8254    if (Subtarget->isThumb1Only())
8255      return false;
8256    // FALL THROUGH.
8257  default:
8258    // ARM doesn't support any R+R*scale+imm addr modes.
8259    if (AM.BaseOffs)
8260      return false;
8261
8262    if (!VT.isSimple())
8263      return false;
8264
8265    if (Subtarget->isThumb2())
8266      return isLegalT2ScaledAddressingMode(AM, VT);
8267
8268    int Scale = AM.Scale;
8269    switch (VT.getSimpleVT().SimpleTy) {
8270    default: return false;
8271    case MVT::i1:
8272    case MVT::i8:
8273    case MVT::i32:
8274      if (Scale < 0) Scale = -Scale;
8275      if (Scale == 1)
8276        return true;
8277      // r + r << imm
8278      return isPowerOf2_32(Scale & ~1);
8279    case MVT::i16:
8280    case MVT::i64:
8281      // r + r
8282      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
8283        return true;
8284      return false;
8285
8286    case MVT::isVoid:
8287      // Note, we allow "void" uses (basically, uses that aren't loads or
8288      // stores), because arm allows folding a scale into many arithmetic
8289      // operations.  This should be made more precise and revisited later.
8290
8291      // Allow r << imm, but the imm has to be a multiple of two.
8292      if (Scale & 1) return false;
8293      return isPowerOf2_32(Scale);
8294    }
8295    break;
8296  }
8297  return true;
8298}
8299
8300/// isLegalICmpImmediate - Return true if the specified immediate is legal
8301/// icmp immediate, that is the target has icmp instructions which can compare
8302/// a register against the immediate without having to materialize the
8303/// immediate into a register.
8304bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8305  if (!Subtarget->isThumb())
8306    return ARM_AM::getSOImmVal(Imm) != -1;
8307  if (Subtarget->isThumb2())
8308    return ARM_AM::getT2SOImmVal(Imm) != -1;
8309  return Imm >= 0 && Imm <= 255;
8310}
8311
8312/// isLegalAddImmediate - Return true if the specified immediate is legal
8313/// add immediate, that is the target has add instructions which can add
8314/// a register with the immediate without having to materialize the
8315/// immediate into a register.
8316bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8317  return ARM_AM::getSOImmVal(Imm) != -1;
8318}
8319
8320static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
8321                                      bool isSEXTLoad, SDValue &Base,
8322                                      SDValue &Offset, bool &isInc,
8323                                      SelectionDAG &DAG) {
8324  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
8325    return false;
8326
8327  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
8328    // AddressingMode 3
8329    Base = Ptr->getOperand(0);
8330    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
8331      int RHSC = (int)RHS->getZExtValue();
8332      if (RHSC < 0 && RHSC > -256) {
8333        assert(Ptr->getOpcode() == ISD::ADD);
8334        isInc = false;
8335        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
8336        return true;
8337      }
8338    }
8339    isInc = (Ptr->getOpcode() == ISD::ADD);
8340    Offset = Ptr->getOperand(1);
8341    return true;
8342  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
8343    // AddressingMode 2
8344    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
8345      int RHSC = (int)RHS->getZExtValue();
8346      if (RHSC < 0 && RHSC > -0x1000) {
8347        assert(Ptr->getOpcode() == ISD::ADD);
8348        isInc = false;
8349        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
8350        Base = Ptr->getOperand(0);
8351        return true;
8352      }
8353    }
8354
8355    if (Ptr->getOpcode() == ISD::ADD) {
8356      isInc = true;
8357      ARM_AM::ShiftOpc ShOpcVal=
8358        ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
8359      if (ShOpcVal != ARM_AM::no_shift) {
8360        Base = Ptr->getOperand(1);
8361        Offset = Ptr->getOperand(0);
8362      } else {
8363        Base = Ptr->getOperand(0);
8364        Offset = Ptr->getOperand(1);
8365      }
8366      return true;
8367    }
8368
8369    isInc = (Ptr->getOpcode() == ISD::ADD);
8370    Base = Ptr->getOperand(0);
8371    Offset = Ptr->getOperand(1);
8372    return true;
8373  }
8374
8375  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
8376  return false;
8377}
8378
8379static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
8380                                     bool isSEXTLoad, SDValue &Base,
8381                                     SDValue &Offset, bool &isInc,
8382                                     SelectionDAG &DAG) {
8383  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
8384    return false;
8385
8386  Base = Ptr->getOperand(0);
8387  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
8388    int RHSC = (int)RHS->getZExtValue();
8389    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
8390      assert(Ptr->getOpcode() == ISD::ADD);
8391      isInc = false;
8392      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
8393      return true;
8394    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
8395      isInc = Ptr->getOpcode() == ISD::ADD;
8396      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
8397      return true;
8398    }
8399  }
8400
8401  return false;
8402}
8403
8404/// getPreIndexedAddressParts - returns true by value, base pointer and
8405/// offset pointer and addressing mode by reference if the node's address
8406/// can be legally represented as pre-indexed load / store address.
8407bool
8408ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
8409                                             SDValue &Offset,
8410                                             ISD::MemIndexedMode &AM,
8411                                             SelectionDAG &DAG) const {
8412  if (Subtarget->isThumb1Only())
8413    return false;
8414
8415  EVT VT;
8416  SDValue Ptr;
8417  bool isSEXTLoad = false;
8418  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
8419    Ptr = LD->getBasePtr();
8420    VT  = LD->getMemoryVT();
8421    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
8422  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
8423    Ptr = ST->getBasePtr();
8424    VT  = ST->getMemoryVT();
8425  } else
8426    return false;
8427
8428  bool isInc;
8429  bool isLegal = false;
8430  if (Subtarget->isThumb2())
8431    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
8432                                       Offset, isInc, DAG);
8433  else
8434    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
8435                                        Offset, isInc, DAG);
8436  if (!isLegal)
8437    return false;
8438
8439  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
8440  return true;
8441}
8442
8443/// getPostIndexedAddressParts - returns true by value, base pointer and
8444/// offset pointer and addressing mode by reference if this node can be
8445/// combined with a load / store to form a post-indexed load / store.
8446bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
8447                                                   SDValue &Base,
8448                                                   SDValue &Offset,
8449                                                   ISD::MemIndexedMode &AM,
8450                                                   SelectionDAG &DAG) const {
8451  if (Subtarget->isThumb1Only())
8452    return false;
8453
8454  EVT VT;
8455  SDValue Ptr;
8456  bool isSEXTLoad = false;
8457  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
8458    VT  = LD->getMemoryVT();
8459    Ptr = LD->getBasePtr();
8460    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
8461  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
8462    VT  = ST->getMemoryVT();
8463    Ptr = ST->getBasePtr();
8464  } else
8465    return false;
8466
8467  bool isInc;
8468  bool isLegal = false;
8469  if (Subtarget->isThumb2())
8470    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
8471                                       isInc, DAG);
8472  else
8473    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
8474                                        isInc, DAG);
8475  if (!isLegal)
8476    return false;
8477
8478  if (Ptr != Base) {
8479    // Swap base ptr and offset to catch more post-index load / store when
8480    // it's legal. In Thumb2 mode, offset must be an immediate.
8481    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
8482        !Subtarget->isThumb2())
8483      std::swap(Base, Offset);
8484
8485    // Post-indexed load / store update the base pointer.
8486    if (Ptr != Base)
8487      return false;
8488  }
8489
8490  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
8491  return true;
8492}
8493
8494void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
8495                                                       const APInt &Mask,
8496                                                       APInt &KnownZero,
8497                                                       APInt &KnownOne,
8498                                                       const SelectionDAG &DAG,
8499                                                       unsigned Depth) const {
8500  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
8501  switch (Op.getOpcode()) {
8502  default: break;
8503  case ARMISD::CMOV: {
8504    // Bits are known zero/one if known on the LHS and RHS.
8505    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
8506    if (KnownZero == 0 && KnownOne == 0) return;
8507
8508    APInt KnownZeroRHS, KnownOneRHS;
8509    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
8510                          KnownZeroRHS, KnownOneRHS, Depth+1);
8511    KnownZero &= KnownZeroRHS;
8512    KnownOne  &= KnownOneRHS;
8513    return;
8514  }
8515  }
8516}
8517
8518//===----------------------------------------------------------------------===//
8519//                           ARM Inline Assembly Support
8520//===----------------------------------------------------------------------===//
8521
8522bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
8523  // Looking for "rev" which is V6+.
8524  if (!Subtarget->hasV6Ops())
8525    return false;
8526
8527  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
8528  std::string AsmStr = IA->getAsmString();
8529  SmallVector<StringRef, 4> AsmPieces;
8530  SplitString(AsmStr, AsmPieces, ";\n");
8531
8532  switch (AsmPieces.size()) {
8533  default: return false;
8534  case 1:
8535    AsmStr = AsmPieces[0];
8536    AsmPieces.clear();
8537    SplitString(AsmStr, AsmPieces, " \t,");
8538
8539    // rev $0, $1
8540    if (AsmPieces.size() == 3 &&
8541        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
8542        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
8543      IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
8544      if (Ty && Ty->getBitWidth() == 32)
8545        return IntrinsicLowering::LowerToByteSwap(CI);
8546    }
8547    break;
8548  }
8549
8550  return false;
8551}
8552
8553/// getConstraintType - Given a constraint letter, return the type of
8554/// constraint it is for this target.
8555ARMTargetLowering::ConstraintType
8556ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
8557  if (Constraint.size() == 1) {
8558    switch (Constraint[0]) {
8559    default:  break;
8560    case 'l': return C_RegisterClass;
8561    case 'w': return C_RegisterClass;
8562    case 'h': return C_RegisterClass;
8563    case 'x': return C_RegisterClass;
8564    case 't': return C_RegisterClass;
8565    case 'j': return C_Other; // Constant for movw.
8566      // An address with a single base register. Due to the way we
8567      // currently handle addresses it is the same as an 'r' memory constraint.
8568    case 'Q': return C_Memory;
8569    }
8570  } else if (Constraint.size() == 2) {
8571    switch (Constraint[0]) {
8572    default: break;
8573    // All 'U+' constraints are addresses.
8574    case 'U': return C_Memory;
8575    }
8576  }
8577  return TargetLowering::getConstraintType(Constraint);
8578}
8579
8580/// Examine constraint type and operand type and determine a weight value.
8581/// This object must already have been set up with the operand type
8582/// and the current alternative constraint selected.
8583TargetLowering::ConstraintWeight
8584ARMTargetLowering::getSingleConstraintMatchWeight(
8585    AsmOperandInfo &info, const char *constraint) const {
8586  ConstraintWeight weight = CW_Invalid;
8587  Value *CallOperandVal = info.CallOperandVal;
8588    // If we don't have a value, we can't do a match,
8589    // but allow it at the lowest weight.
8590  if (CallOperandVal == NULL)
8591    return CW_Default;
8592  Type *type = CallOperandVal->getType();
8593  // Look at the constraint type.
8594  switch (*constraint) {
8595  default:
8596    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
8597    break;
8598  case 'l':
8599    if (type->isIntegerTy()) {
8600      if (Subtarget->isThumb())
8601        weight = CW_SpecificReg;
8602      else
8603        weight = CW_Register;
8604    }
8605    break;
8606  case 'w':
8607    if (type->isFloatingPointTy())
8608      weight = CW_Register;
8609    break;
8610  }
8611  return weight;
8612}
8613
8614typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
8615RCPair
8616ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
8617                                                EVT VT) const {
8618  if (Constraint.size() == 1) {
8619    // GCC ARM Constraint Letters
8620    switch (Constraint[0]) {
8621    case 'l': // Low regs or general regs.
8622      if (Subtarget->isThumb())
8623        return RCPair(0U, ARM::tGPRRegisterClass);
8624      else
8625        return RCPair(0U, ARM::GPRRegisterClass);
8626    case 'h': // High regs or no regs.
8627      if (Subtarget->isThumb())
8628        return RCPair(0U, ARM::hGPRRegisterClass);
8629      break;
8630    case 'r':
8631      return RCPair(0U, ARM::GPRRegisterClass);
8632    case 'w':
8633      if (VT == MVT::f32)
8634        return RCPair(0U, ARM::SPRRegisterClass);
8635      if (VT.getSizeInBits() == 64)
8636        return RCPair(0U, ARM::DPRRegisterClass);
8637      if (VT.getSizeInBits() == 128)
8638        return RCPair(0U, ARM::QPRRegisterClass);
8639      break;
8640    case 'x':
8641      if (VT == MVT::f32)
8642        return RCPair(0U, ARM::SPR_8RegisterClass);
8643      if (VT.getSizeInBits() == 64)
8644        return RCPair(0U, ARM::DPR_8RegisterClass);
8645      if (VT.getSizeInBits() == 128)
8646        return RCPair(0U, ARM::QPR_8RegisterClass);
8647      break;
8648    case 't':
8649      if (VT == MVT::f32)
8650        return RCPair(0U, ARM::SPRRegisterClass);
8651      break;
8652    }
8653  }
8654  if (StringRef("{cc}").equals_lower(Constraint))
8655    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
8656
8657  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
8658}
8659
8660/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8661/// vector.  If it is invalid, don't add anything to Ops.
8662void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
8663                                                     std::string &Constraint,
8664                                                     std::vector<SDValue>&Ops,
8665                                                     SelectionDAG &DAG) const {
8666  SDValue Result(0, 0);
8667
8668  // Currently only support length 1 constraints.
8669  if (Constraint.length() != 1) return;
8670
8671  char ConstraintLetter = Constraint[0];
8672  switch (ConstraintLetter) {
8673  default: break;
8674  case 'j':
8675  case 'I': case 'J': case 'K': case 'L':
8676  case 'M': case 'N': case 'O':
8677    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
8678    if (!C)
8679      return;
8680
8681    int64_t CVal64 = C->getSExtValue();
8682    int CVal = (int) CVal64;
8683    // None of these constraints allow values larger than 32 bits.  Check
8684    // that the value fits in an int.
8685    if (CVal != CVal64)
8686      return;
8687
8688    switch (ConstraintLetter) {
8689      case 'j':
8690        // Constant suitable for movw, must be between 0 and
8691        // 65535.
8692        if (Subtarget->hasV6T2Ops())
8693          if (CVal >= 0 && CVal <= 65535)
8694            break;
8695        return;
8696      case 'I':
8697        if (Subtarget->isThumb1Only()) {
8698          // This must be a constant between 0 and 255, for ADD
8699          // immediates.
8700          if (CVal >= 0 && CVal <= 255)
8701            break;
8702        } else if (Subtarget->isThumb2()) {
8703          // A constant that can be used as an immediate value in a
8704          // data-processing instruction.
8705          if (ARM_AM::getT2SOImmVal(CVal) != -1)
8706            break;
8707        } else {
8708          // A constant that can be used as an immediate value in a
8709          // data-processing instruction.
8710          if (ARM_AM::getSOImmVal(CVal) != -1)
8711            break;
8712        }
8713        return;
8714
8715      case 'J':
8716        if (Subtarget->isThumb()) {  // FIXME thumb2
8717          // This must be a constant between -255 and -1, for negated ADD
8718          // immediates. This can be used in GCC with an "n" modifier that
8719          // prints the negated value, for use with SUB instructions. It is
8720          // not useful otherwise but is implemented for compatibility.
8721          if (CVal >= -255 && CVal <= -1)
8722            break;
8723        } else {
8724          // This must be a constant between -4095 and 4095. It is not clear
8725          // what this constraint is intended for. Implemented for
8726          // compatibility with GCC.
8727          if (CVal >= -4095 && CVal <= 4095)
8728            break;
8729        }
8730        return;
8731
8732      case 'K':
8733        if (Subtarget->isThumb1Only()) {
8734          // A 32-bit value where only one byte has a nonzero value. Exclude
8735          // zero to match GCC. This constraint is used by GCC internally for
8736          // constants that can be loaded with a move/shift combination.
8737          // It is not useful otherwise but is implemented for compatibility.
8738          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
8739            break;
8740        } else if (Subtarget->isThumb2()) {
8741          // A constant whose bitwise inverse can be used as an immediate
8742          // value in a data-processing instruction. This can be used in GCC
8743          // with a "B" modifier that prints the inverted value, for use with
8744          // BIC and MVN instructions. It is not useful otherwise but is
8745          // implemented for compatibility.
8746          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
8747            break;
8748        } else {
8749          // A constant whose bitwise inverse can be used as an immediate
8750          // value in a data-processing instruction. This can be used in GCC
8751          // with a "B" modifier that prints the inverted value, for use with
8752          // BIC and MVN instructions. It is not useful otherwise but is
8753          // implemented for compatibility.
8754          if (ARM_AM::getSOImmVal(~CVal) != -1)
8755            break;
8756        }
8757        return;
8758
8759      case 'L':
8760        if (Subtarget->isThumb1Only()) {
8761          // This must be a constant between -7 and 7,
8762          // for 3-operand ADD/SUB immediate instructions.
8763          if (CVal >= -7 && CVal < 7)
8764            break;
8765        } else if (Subtarget->isThumb2()) {
8766          // A constant whose negation can be used as an immediate value in a
8767          // data-processing instruction. This can be used in GCC with an "n"
8768          // modifier that prints the negated value, for use with SUB
8769          // instructions. It is not useful otherwise but is implemented for
8770          // compatibility.
8771          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
8772            break;
8773        } else {
8774          // A constant whose negation can be used as an immediate value in a
8775          // data-processing instruction. This can be used in GCC with an "n"
8776          // modifier that prints the negated value, for use with SUB
8777          // instructions. It is not useful otherwise but is implemented for
8778          // compatibility.
8779          if (ARM_AM::getSOImmVal(-CVal) != -1)
8780            break;
8781        }
8782        return;
8783
8784      case 'M':
8785        if (Subtarget->isThumb()) { // FIXME thumb2
8786          // This must be a multiple of 4 between 0 and 1020, for
8787          // ADD sp + immediate.
8788          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
8789            break;
8790        } else {
8791          // A power of two or a constant between 0 and 32.  This is used in
8792          // GCC for the shift amount on shifted register operands, but it is
8793          // useful in general for any shift amounts.
8794          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
8795            break;
8796        }
8797        return;
8798
8799      case 'N':
8800        if (Subtarget->isThumb()) {  // FIXME thumb2
8801          // This must be a constant between 0 and 31, for shift amounts.
8802          if (CVal >= 0 && CVal <= 31)
8803            break;
8804        }
8805        return;
8806
8807      case 'O':
8808        if (Subtarget->isThumb()) {  // FIXME thumb2
8809          // This must be a multiple of 4 between -508 and 508, for
8810          // ADD/SUB sp = sp + immediate.
8811          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
8812            break;
8813        }
8814        return;
8815    }
8816    Result = DAG.getTargetConstant(CVal, Op.getValueType());
8817    break;
8818  }
8819
8820  if (Result.getNode()) {
8821    Ops.push_back(Result);
8822    return;
8823  }
8824  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8825}
8826
8827bool
8828ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
8829  // The ARM target isn't yet aware of offsets.
8830  return false;
8831}
8832
8833bool ARM::isBitFieldInvertedMask(unsigned v) {
8834  if (v == 0xffffffff)
8835    return 0;
8836  // there can be 1's on either or both "outsides", all the "inside"
8837  // bits must be 0's
8838  unsigned int lsb = 0, msb = 31;
8839  while (v & (1 << msb)) --msb;
8840  while (v & (1 << lsb)) ++lsb;
8841  for (unsigned int i = lsb; i <= msb; ++i) {
8842    if (v & (1 << i))
8843      return 0;
8844  }
8845  return 1;
8846}
8847
8848/// isFPImmLegal - Returns true if the target can instruction select the
8849/// specified FP immediate natively. If false, the legalizer will
8850/// materialize the FP immediate as a load from a constant pool.
8851bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
8852  if (!Subtarget->hasVFP3())
8853    return false;
8854  if (VT == MVT::f32)
8855    return ARM_AM::getFP32Imm(Imm) != -1;
8856  if (VT == MVT::f64)
8857    return ARM_AM::getFP64Imm(Imm) != -1;
8858  return false;
8859}
8860
8861/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
8862/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
8863/// specified in the intrinsic calls.
8864bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
8865                                           const CallInst &I,
8866                                           unsigned Intrinsic) const {
8867  switch (Intrinsic) {
8868  case Intrinsic::arm_neon_vld1:
8869  case Intrinsic::arm_neon_vld2:
8870  case Intrinsic::arm_neon_vld3:
8871  case Intrinsic::arm_neon_vld4:
8872  case Intrinsic::arm_neon_vld2lane:
8873  case Intrinsic::arm_neon_vld3lane:
8874  case Intrinsic::arm_neon_vld4lane: {
8875    Info.opc = ISD::INTRINSIC_W_CHAIN;
8876    // Conservatively set memVT to the entire set of vectors loaded.
8877    uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
8878    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
8879    Info.ptrVal = I.getArgOperand(0);
8880    Info.offset = 0;
8881    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
8882    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
8883    Info.vol = false; // volatile loads with NEON intrinsics not supported
8884    Info.readMem = true;
8885    Info.writeMem = false;
8886    return true;
8887  }
8888  case Intrinsic::arm_neon_vst1:
8889  case Intrinsic::arm_neon_vst2:
8890  case Intrinsic::arm_neon_vst3:
8891  case Intrinsic::arm_neon_vst4:
8892  case Intrinsic::arm_neon_vst2lane:
8893  case Intrinsic::arm_neon_vst3lane:
8894  case Intrinsic::arm_neon_vst4lane: {
8895    Info.opc = ISD::INTRINSIC_VOID;
8896    // Conservatively set memVT to the entire set of vectors stored.
8897    unsigned NumElts = 0;
8898    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
8899      Type *ArgTy = I.getArgOperand(ArgI)->getType();
8900      if (!ArgTy->isVectorTy())
8901        break;
8902      NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
8903    }
8904    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
8905    Info.ptrVal = I.getArgOperand(0);
8906    Info.offset = 0;
8907    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
8908    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
8909    Info.vol = false; // volatile stores with NEON intrinsics not supported
8910    Info.readMem = false;
8911    Info.writeMem = true;
8912    return true;
8913  }
8914  case Intrinsic::arm_strexd: {
8915    Info.opc = ISD::INTRINSIC_W_CHAIN;
8916    Info.memVT = MVT::i64;
8917    Info.ptrVal = I.getArgOperand(2);
8918    Info.offset = 0;
8919    Info.align = 8;
8920    Info.vol = true;
8921    Info.readMem = false;
8922    Info.writeMem = true;
8923    return true;
8924  }
8925  case Intrinsic::arm_ldrexd: {
8926    Info.opc = ISD::INTRINSIC_W_CHAIN;
8927    Info.memVT = MVT::i64;
8928    Info.ptrVal = I.getArgOperand(0);
8929    Info.offset = 0;
8930    Info.align = 8;
8931    Info.vol = true;
8932    Info.readMem = true;
8933    Info.writeMem = false;
8934    return true;
8935  }
8936  default:
8937    break;
8938  }
8939
8940  return false;
8941}
8942