ARMISelLowering.cpp revision bb1078ea1370fd0cc32f52b1b53f0b245ded42e7
1b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2587c4b3e94c6ef877137d067d5d0f574f69b1391Mark Wielaard//
3de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard//                     The LLVM Compiler Infrastructure
4b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//
5b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// This file is distributed under the University of Illinois Open Source
6de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard// License. See LICENSE.TXT for details.
7de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard//
8de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard//===----------------------------------------------------------------------===//
9de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard//
10b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// This file defines the interfaces that ARM uses to lower LLVM code into a
11de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard// selection DAG.
12361df7da6dfecd817b27e62b91752ac316d7cdd4Ulrich Drepper//
13de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard//===----------------------------------------------------------------------===//
14de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard
15361df7da6dfecd817b27e62b91752ac316d7cdd4Ulrich Drepper#define DEBUG_TYPE "arm-isel"
16de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard#include "ARMISelLowering.h"
17de2ed97f33139af5c7a0811e4ec66fc896a13cf2Mark Wielaard#include "ARM.h"
18b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMCallingConv.h"
19b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMConstantPoolValue.h"
20b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMMachineFunctionInfo.h"
21b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMPerfectShuffle.h"
22b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMSubtarget.h"
23b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMTargetMachine.h"
24b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMTargetObjectFile.h"
25b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "MCTargetDesc/ARMAddressingModes.h"
26b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CallingConv.h"
27b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Constants.h"
28b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Function.h"
29b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/GlobalValue.h"
30b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Instruction.h"
31b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Instructions.h"
32b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Intrinsics.h"
33b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Type.h"
34b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/CallingConvLower.h"
35b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/IntrinsicLowering.h"
36b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/MachineBasicBlock.h"
37b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/MachineFrameInfo.h"
38b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/MachineFunction.h"
39b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/MachineInstrBuilder.h"
40b337b1fd5f3b3410fe522a690ccee70bce8519eeRoland McGrath#include "llvm/CodeGen/MachineModuleInfo.h"
41b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/MachineRegisterInfo.h"
42b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/CodeGen/SelectionDAG.h"
43b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/MC/MCSectionMachO.h"
44b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Target/TargetOptions.h"
45b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/ADT/StringExtras.h"
46b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/ADT/Statistic.h"
471662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard#include "llvm/Support/CommandLine.h"
48b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Support/ErrorHandling.h"
49b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Support/MathExtras.h"
50b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "llvm/Support/raw_ostream.h"
51fdc93e12a77866cafd1aae4463d89cef2c01d9b1Ulrich Drepperusing namespace llvm;
52b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
53b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSTATISTIC(NumTailCalls, "Number of tail calls");
54fdc93e12a77866cafd1aae4463d89cef2c01d9b1Ulrich DrepperSTATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
55b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSTATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
56b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
57b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// This option should go away when tail calls fully work.
58b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperstatic cl::opt<bool>
59b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperEnableARMTailCalls("arm-tail-calls", cl::Hidden,
600b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
611662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  cl::init(false));
62b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
63b08d5a8fb42f4586d756068065186b5af7e48daUlrich Dreppercl::opt<bool>
64b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperEnableARMLongCalls("arm-long-calls", cl::Hidden,
65b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  cl::desc("Generate calls via indirect call instructions"),
66b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  cl::init(false));
67b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
684be1524398af8e24011cfdfa77c66832f8654a56Roland McGrathstatic cl::opt<bool>
69b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMInterworking("arm-interworking", cl::Hidden,
70b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  cl::desc("Enable / disable ARM interworking (for debugging only)"),
71b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  cl::init(true));
72b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
738f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrathnamespace {
74b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  class ARMCCState : public CCState {
758f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath  public:
768f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
770b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath               const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
780b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath               LLVMContext &C, ParmContext PC)
79b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        : CCState(CC, isVarArg, MF, TM, locs, C) {
80b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      assert(((PC == Call) || (PC == Prologue)) &&
811662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard             "ARMCCState users must specify whether their context is call"
821662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard             "or prologue generation.");
83b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      CallOrPrologue = PC;
84b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
858f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath  };
86b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
87b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
88b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// The APCS parameter registers.
89b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperstatic const uint16_t GPRArgRegs[] = {
90b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  ARM::R0, ARM::R1, ARM::R2, ARM::R3
91b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper};
92b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
93b08d5a8fb42f4586d756068065186b5af7e48daUlrich Dreppervoid ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
94b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                       MVT PromotedBitwiseVT) {
95b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (VT != PromotedLdStVT) {
96b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::LOAD, VT, Promote);
97b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
98b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
99b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::STORE, VT, Promote);
100b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
101b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
102b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
103b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  MVT ElemTy = VT.getVectorElementType();
104b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
105b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SETCC, VT, Custom);
106b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
107b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
108b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (ElemTy == MVT::i32) {
109b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
110b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
111b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1128c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
113b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else {
114b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
115b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
116cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
117b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
1186d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  }
1196d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
1206d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
1216d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
1221ccdfb683ad6c7e59793136c3a657ddf131cafd1Mark Wielaard  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1236d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::SELECT,            VT, Expand);
124b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT_CC,         VT, Expand);
1256d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::VSELECT,           VT, Expand);
1266d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1276d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (VT.isInteger()) {
1286d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::SHL, VT, Custom);
1296d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::SRA, VT, Custom);
130b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SRL, VT, Custom);
131b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
132b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
133b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Promote all bit-wise operations.
134b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (VT.isInteger() && VT != PromotedBitwiseVT) {
135b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::AND, VT, Promote);
136b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
137b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::OR,  VT, Promote);
138b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
139b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::XOR, VT, Promote);
140b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
141b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
142b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
143b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Neon does not support vector divide/remainder operations.
144b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SDIV, VT, Expand);
145b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::UDIV, VT, Expand);
146b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FDIV, VT, Expand);
147b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SREM, VT, Expand);
148b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::UREM, VT, Expand);
149b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FREM, VT, Expand);
1500b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath}
1510b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath
1520b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrathvoid ARMTargetLowering::addDRTypeForNEON(MVT VT) {
153b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  addRegisterClass(VT, &ARM::DPRRegClass);
154b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
155b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1561662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
1571662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaardvoid ARMTargetLowering::addQRTypeForNEON(MVT VT) {
1581662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  addRegisterClass(VT, &ARM::QPRRegClass);
159b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
160b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
161b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
162b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperstatic TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
163b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
164b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return new TargetLoweringObjectFileMachO();
165b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
166b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return new ARMElfTargetObjectFile();
167b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
168b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
169b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
170b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    : TargetLowering(TM, createTLOF(TM)) {
171b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  Subtarget = &TM.getSubtarget<ARMSubtarget>();
172b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  RegInfo = TM.getRegisterInfo();
173b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  Itins = TM.getInstrItineraryData();
174b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
175b0243863149acde9e42b25688c7c2959830e69a9Ulrich Drepper  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
176b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
177b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->isTargetDarwin()) {
178b0243863149acde9e42b25688c7c2959830e69a9Ulrich Drepper    // Uses VFP for Thumb libfuncs if available.
179b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
180b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Single-precision floating-point arithmetic.
181b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
182b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
183b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
1841662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
1851662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
1861662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      // Double-precision floating-point arithmetic.
1871662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
188b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
189b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
190b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
191b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
192b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Single-precision comparisons.
193b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
194b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
195b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
196b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
197b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
198b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
199b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
200b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
201b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
202b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
203b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
204b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
205b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
206b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
207b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
208b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
209b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
210b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
211b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Double-precision comparisons.
212b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
213b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
214b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
215b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
216b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
217b0243863149acde9e42b25688c7c2959830e69a9Ulrich Drepper      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
218b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
219b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
220b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
221b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
2223a64a3087f53ab860c7de04da0e53dabef459520Ulrich Drepper      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
223b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
224b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
225b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
226b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
227b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
228b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
2291c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath
230b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Floating-point to integer conversions.
231b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // i64 conversions are done via library routines even when generating VFP
232b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // instructions, so use the same ones.
233b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
234b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
235b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
236b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
237b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
238b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Conversions between floating types.
239b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
240b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
241b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
242b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Integer to floating-point conversions.
243b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // i64 conversions are done via library routines even when generating VFP
244b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // instructions, so use the same ones.
245b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
246b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // e.g., __floatunsidf vs. __floatunssidfvfp.
247b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
248b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
249b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
250b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
251b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
252b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
253b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
254b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // These libcalls are not available in 32-bit.
255b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setLibcallName(RTLIB::SHL_I128, 0);
256b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setLibcallName(RTLIB::SRL_I128, 0);
257b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setLibcallName(RTLIB::SRA_I128, 0);
258b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
259b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
260b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Double-precision floating-point arithmetic helper functions
261b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.1.2, Table 2
262b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
263b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
2641662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
2651662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
2661662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
2671662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
268b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
269b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
270b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
271b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Double-precision floating-point comparison helper functions
2728f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    // RTABI chapter 4.1.2, Table 3
2738f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
2748f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
2758f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
2768f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
2778f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
2788f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
2798f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
2808f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
2818f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
2828f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
283b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
2848f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
2858f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
286b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
287b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
288b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
2890b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
2900b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
2910b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
2920b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
293b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
294b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
295b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
296b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
2978f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath
2988f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    // Single-precision floating-point arithmetic helper functions
2998f31c2c6f18323daf2a6ce30a24fa4dfe0a70fb3Roland McGrath    // RTABI chapter 4.1.2, Table 4
300b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
301b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
302b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
303b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
304b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
305b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
306b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
307b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
308b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
309b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Single-precision floating-point comparison helper functions
310b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.1.2, Table 5
311b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
312b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
313b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
3148c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
315b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
316b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
317b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
318b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
319b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
320e3f9b7db6c7361579ec5cc5eb5e414f7e93baeb6Ulrich Drepper    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
321b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
322b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
323b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
324b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
325b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
3268c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
3278c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
328b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
329b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
330b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
331095a01b14c2562a33615f079f300ac86da95bc56Roland McGrath    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
332b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
333b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
334e3f9b7db6c7361579ec5cc5eb5e414f7e93baeb6Ulrich Drepper    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
335b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
336b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Floating-point to integer conversions.
337b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.1.2, Table 6
338b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
339b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
340b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
341b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
342b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
343b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
344e3f9b7db6c7361579ec5cc5eb5e414f7e93baeb6Ulrich Drepper    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
345b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
346b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
347b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
348b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
349b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
350b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
351b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
352b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
353b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
354b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
355b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Conversions between floating types.
356b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.1.2, Table 7
357095a01b14c2562a33615f079f300ac86da95bc56Roland McGrath    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
358095a01b14c2562a33615f079f300ac86da95bc56Roland McGrath    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
359b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
360b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
361b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
362b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Integer to floating-point conversions.
363b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.1.2, Table 8
364b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
365b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
366b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
367b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
368b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
369b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
3708190db6a86a37aec86c81626ab1b083c96aff891Roland McGrath    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
371b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
3728190db6a86a37aec86c81626ab1b083c96aff891Roland McGrath    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
373b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
374b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
375b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
376b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
377cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
378cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
379cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
380cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard
381cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    // Long long helper functions
382cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    // RTABI chapter 4.2, Table 9
383cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
384cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
385cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
386cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
387cb499c489299de2b77472fc836a6b28ecf5361b9Mark Wielaard    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
388b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
389b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
390b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
391b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
392b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
393b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
394b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Integer division functions
395b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.3.1
396b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
397b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
398b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
399b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
400b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
401b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
402b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
403b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
404b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
405b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
406b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
407b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
408b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
409b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
4108c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
411b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
412b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
413b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Memory operations
414b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // RTABI chapter 4.3.4
415b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
416b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
4176d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
418b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
419e219f1c1b2a5a2e4f248714fdc93d031cdc5ee6aUlrich Drepper    setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
420b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
421b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
422b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
423b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Use divmod compiler-rt calls for iOS 5.0 and later.
424b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
425b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
426521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
427b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
428b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
429b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
430b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->isThumb1Only())
431b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
432b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  else
433b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addRegisterClass(MVT::i32, &ARM::GPRRegClass);
434b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
435b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      !Subtarget->isThumb1Only()) {
436b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addRegisterClass(MVT::f32, &ARM::SPRRegClass);
437b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (!Subtarget->isFPOnlySP())
438b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      addRegisterClass(MVT::f64, &ARM::DPRRegClass);
439b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
440b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
441b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
442b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
443b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
444b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
445b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
446b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
447b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setTruncStoreAction((MVT::SimpleValueType)VT,
448b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                          (MVT::SimpleValueType)InnerVT, Expand);
449b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
450a58951b183fb3e051870b19c95d8082a3efa3ddbMark Wielaard    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
451b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
452b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
453b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
454b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
455b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
456b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->hasNEON()) {
457b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addDRTypeForNEON(MVT::v2f32);
458b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addDRTypeForNEON(MVT::v8i8);
459b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addDRTypeForNEON(MVT::v4i16);
460b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addDRTypeForNEON(MVT::v2i32);
461b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addDRTypeForNEON(MVT::v1i64);
462b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
463b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addQRTypeForNEON(MVT::v4f32);
464b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addQRTypeForNEON(MVT::v2f64);
465b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addQRTypeForNEON(MVT::v16i8);
466b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addQRTypeForNEON(MVT::v8i16);
467b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addQRTypeForNEON(MVT::v4i32);
468b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    addQRTypeForNEON(MVT::v2i64);
469b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
470b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
4716d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // neither Neon nor VFP support any arithmetic operations on it.
472b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
4731662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // supported for v4f32.
4741662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
4751662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
4766cbd7adf7eeb3f30632b53c8a68c470e0e47252bUlrich Drepper    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
477b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Code duplication: FDIV and FREM are expanded always, see
4781662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // ARMTargetLowering::addTypeForNEON method for details.
479b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
480b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
481b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Create unittest.
482b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // In another words, find a way when "copysign" appears in DAG with vector
483b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // operands.
484b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
485b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Code duplication: SETCC has custom operation action, see
486b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // ARMTargetLowering::addTypeForNEON method for details.
487b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
488b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Create unittest for FNEG and for FABS.
489b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
490b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
491b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
492b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
493b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
494b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
495b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
4966d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
497b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
498b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
499b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
500b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
501b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
502b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
503b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
504b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
505b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
506b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
507b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
508b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
509b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
5101c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
5111c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
512b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
513b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
514b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
515b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
516f189493154d0041deced00e6a99cc5426dc9d260Ulrich Drepper    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
5176d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
5186d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
5196d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
5206d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // Neon does not support some operations on v1i64 and v2i64 types.
5216d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
522b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Custom handling for some quad-vector types to detect VMULL.
523712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
524712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
525712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
5266d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // Custom handling for some vector types to avoid expensive expansions
5276d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
5286d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
5296d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
530712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
531b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
532b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
533b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
5341c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    // a destination type that is wider than the source, and nor does
535b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // it have a FP_TO_[SU]INT instruction with a narrower destination than
536b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // source.
537b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
538b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
539b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
540b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
541b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
542712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard    setTargetDAGCombine(ISD::INTRINSIC_VOID);
543b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
544b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
5456d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setTargetDAGCombine(ISD::SHL);
546b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::SRL);
547b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::SRA);
548b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::SIGN_EXTEND);
549b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::ZERO_EXTEND);
550b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::ANY_EXTEND);
551712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard    setTargetDAGCombine(ISD::SELECT_CC);
552b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::BUILD_VECTOR);
553b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
5541c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
555b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::STORE);
556b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::FP_TO_SINT);
557b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::FP_TO_UINT);
558b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::FDIV);
559b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
560b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // It is legal to extload from v4i8 to v4i16 or v4i32.
561b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
562b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                  MVT::v4i16, MVT::v2i16,
563b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                  MVT::v2i32};
564b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    for (unsigned i = 0; i < 6; ++i) {
565b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
566712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard      setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
567b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
568b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
569b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
570b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
571b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // ARM and Thumb2 support UMLAL/SMLAL.
572b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->isThumb1Only())
573b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setTargetDAGCombine(ISD::ADDC);
574b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
575712c8faddc08844fb1f2814c8b6e817f03b0698eMark Wielaard
576b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  computeRegisterProperties();
577b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
5781c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  // ARM does not have f32 extending load.
579b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
580b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
581b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // ARM does not have i1 sign extending load.
582b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
583b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
584b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // ARM supports all 4 flavors of integer indexed load / store.
585b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->isThumb1Only()) {
5861c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    for (unsigned im = (unsigned)ISD::PRE_INC;
587f189493154d0041deced00e6a99cc5426dc9d260Ulrich Drepper         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
588b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setIndexedLoadAction(im,  MVT::i1,  Legal);
589b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setIndexedLoadAction(im,  MVT::i8,  Legal);
590b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setIndexedLoadAction(im,  MVT::i16, Legal);
591b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setIndexedLoadAction(im,  MVT::i32, Legal);
592b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setIndexedStoreAction(im, MVT::i1,  Legal);
593b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      setIndexedStoreAction(im, MVT::i8,  Legal);
59458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath      setIndexedStoreAction(im, MVT::i16, Legal);
59558d3619facfb708f4998d73270ca4082b20853b9Roland McGrath      setIndexedStoreAction(im, MVT::i32, Legal);
59658d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    }
59758d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  }
59858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath
599b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // i64 operation support.
600b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::MUL,     MVT::i64, Expand);
601b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::MULHU,   MVT::i32, Expand);
602b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->isThumb1Only()) {
603b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
604b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
605b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
606b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
607b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
608b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::MULHS, MVT::i32, Expand);
609b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
610b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
611b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
612b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
613b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SRL,       MVT::i64, Custom);
614b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SRA,       MVT::i64, Custom);
615b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
616b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->isThumb1Only()) {
617b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: We should do this for Thumb1 as well.
618b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ADDC,    MVT::i32, Custom);
619b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ADDE,    MVT::i32, Custom);
62058d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    setOperationAction(ISD::SUBC,    MVT::i32, Custom);
621b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SUBE,    MVT::i32, Custom);
622b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
623b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
624b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // ARM does not have ROTL.
625b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
626b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
627b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
628b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
629b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
630b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
631b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // These just redirect to CTTZ and CTLZ on ARM.
632b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
63358d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
634b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
635b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Only ARMv6 has BSWAP.
636b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->hasV6Ops())
637b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
638b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
639b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
640b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
641b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // These are expanded into libcalls if the cpu doesn't have HW divider.
642b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
64358d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
64458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  }
645b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SREM,  MVT::i32, Expand);
646b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::UREM,  MVT::i32, Expand);
6476d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
6486d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
649b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
6506d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
651b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
652b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
653b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
654b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
655b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
656b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::TRAP, MVT::Other, Legal);
657b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
65858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  // Use the default implementation.
659b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
660b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
661b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
662b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
663b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
664b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
665b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
6666d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (!Subtarget->isTargetDarwin()) {
6676d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // Non-Darwin platforms may return values in these registers via the
668b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // personality function.
669b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
670b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
6711c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    setExceptionPointerRegister(ARM::R0);
6721c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    setExceptionSelectorRegister(ARM::R1);
673b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
674b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
675b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
67658d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
67758d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  // the default expansion.
67858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  // FIXME: This should be checking for v6k, not just v6.
67958d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  if (Subtarget->hasDataBarrier() ||
68058d3619facfb708f4998d73270ca4082b20853b9Roland McGrath      (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
68158d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    // membarrier needs custom lowering; the rest are legal and handled
682b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // normally.
683b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
684b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
685b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Custom lowering for 64-bit ops
686b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
687b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Custom);
688b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
689b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
690b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
691b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_SWAP,  MVT::i64, Custom);
69258d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
693b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
694b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setInsertFencesForAtomic(true);
695b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else {
696b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Set them all for expansion, which will force libcalls.
697b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
698b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
699b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
70058d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
701b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
702b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
703b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
704b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
705b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
706b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
707b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
708b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
709b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
710b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
711b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
712b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Unordered/Monotonic case.
713b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
714b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
715b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Since the libcalls include locking, fold in the fences
716b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setShouldFoldAtomicFences(true);
717b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
718b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
719b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
720b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
721b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
722b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->hasV6Ops()) {
723b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
724b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
725b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
726f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
727f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath
728f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
729f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath      !Subtarget->isThumb1Only()) {
730b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
731b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // iff target supports vfp2.
732b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
733b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
734b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
735b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
736b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // We want to custom lower some of our intrinsics.
737ce4550aefd57ab68f6f279a5b662eb1716d7b361Roland McGrath  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
7386d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (Subtarget->isTargetDarwin()) {
7396d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
7406d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
741ce4550aefd57ab68f6f279a5b662eb1716d7b361Roland McGrath    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
7421c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  }
7431c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath
7441c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
7451c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
7461c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
747b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
748b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
749b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
750b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
751b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
752b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
753b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
754b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
755b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
756b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
757b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
758b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
759b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
760b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // We don't support sin/cos/fmod/copysign/pow
761b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
762b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
763b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
764b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
765b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FREM,      MVT::f64, Expand);
766b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FREM,      MVT::f32, Expand);
767b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
768b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      !Subtarget->isThumb1Only()) {
769b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
770b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
771b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
772b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
773b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
774b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
775b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Subtarget->hasVFP4()) {
776b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FMA, MVT::f64, Expand);
777b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setOperationAction(ISD::FMA, MVT::f32, Expand);
77858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  }
779b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
78058d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  // Various VFP goodness
78158d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
78258d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
78358d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    if (Subtarget->hasVFP2()) {
78458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
78558d3619facfb708f4998d73270ca4082b20853b9Roland McGrath      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
7867a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
7877a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
7887a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    }
7897a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    // Special handling for half-precision FP.
7907a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    if (!Subtarget->hasFP16()) {
7917a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
7927a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
7937a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    }
7947a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard  }
7957a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard
7966d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  // We have target-specific dag combine patterns for the following nodes:
7977a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
7986d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setTargetDAGCombine(ISD::ADD);
7996d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setTargetDAGCombine(ISD::SUB);
8006d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setTargetDAGCombine(ISD::MUL);
8016d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setTargetDAGCombine(ISD::AND);
8026d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  setTargetDAGCombine(ISD::OR);
8037a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard  setTargetDAGCombine(ISD::XOR);
8046d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
8056d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (Subtarget->hasV6Ops())
8067a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    setTargetDAGCombine(ISD::SRL);
807b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
808b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setStackPointerRegisterToSaveRestore(ARM::SP);
809b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
810b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
811b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      !Subtarget->hasVFP2())
812b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    setSchedulingPreference(Sched::RegPressure);
813b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  else
814521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    setSchedulingPreference(Sched::Hybrid);
815521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
816521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  //// temporary - rewrite interface to use type
817521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
818b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  maxStoresPerMemset = 16;
819b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
820b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
821b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // On ARM arguments smaller than 4 bytes are extended, so all arguments
822b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // are at least 4 bytes aligned.
823b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setMinStackArgumentAlignment(4);
824b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
825b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  benefitFromCodePlacementOpt = true;
826b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
8271c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  // Prefer likely predicted branches to selects on out-of-order cores.
828b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  predictableSelectIsExpensive = Subtarget->isLikeA9();
829b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
830b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
831b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
832b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
833b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// FIXME: It might make sense to define the representative register class as the
83458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
835b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
836b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// SPR's representative would be DPR_VFP2. This should work well if register
837b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// pressure tracking were modified such that a register use would increment the
838b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// pressure of the register class's representative and all of it's super
839b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// classes' representatives transitively. We have not implemented this because
840b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// of the difficulty prior to coalescing of modeling operand register classes
841b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// due to the common occurrence of cross class copies and subregister insertions
8421c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath// and extractions.
8431c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrathstd::pair<const TargetRegisterClass*, uint8_t>
844b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMTargetLowering::findRepresentativeClass(EVT VT) const{
845b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  const TargetRegisterClass *RRC = 0;
846b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  uint8_t Cost = 1;
847a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard  switch (VT.getSimpleVT().SimpleTy) {
848b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  default:
849b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return TargetLowering::findRepresentativeClass(VT);
850b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Use DPR as representative register class for all floating point
851b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
852b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // the cost is 1 for both f32 and f64.
853b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
854b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
8551c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    RRC = &ARM::DPRRegClass;
8561c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    // When NEON is used for SP, only half of the register file is available
8571c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath    // because operations that define both SP and DP results will be constrained
858b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // to the VFP2 class (D0-D15). We currently model this constraint prior to
859b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // coalescing by double-counting the SP regs. See the FIXME above.
860b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (Subtarget->useNEONForSinglePrecisionFP())
8611c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath      Cost = 2;
862b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    break;
863b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
864b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case MVT::v4f32: case MVT::v2f64:
865b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    RRC = &ARM::DPRRegClass;
866b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Cost = 2;
867b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    break;
868b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case MVT::v4i64:
869b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    RRC = &ARM::DPRRegClass;
870b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Cost = 4;
871b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    break;
872b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case MVT::v8i64:
87358d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    RRC = &ARM::DPRRegClass;
87458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    Cost = 8;
87558d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    break;
876b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
877521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  return std::make_pair(RRC, Cost);
878521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath}
879521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
880521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrathconst char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
881b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  switch (Opcode) {
882b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  default: return 0;
883b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
884b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
885b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
886b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
887b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CALL:          return "ARMISD::CALL";
888b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
889b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
890b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::tCALL:         return "ARMISD::tCALL";
891b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
892b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
893b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
894b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
895b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
896b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CMP:           return "ARMISD::CMP";
897b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CMN:           return "ARMISD::CMN";
898b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
899b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
900b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
901b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
902b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
903b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
904b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::CMOV:          return "ARMISD::CMOV";
905b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
90658d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::RBIT:          return "ARMISD::RBIT";
907b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
90858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
90958d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
91058d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::SITOF:         return "ARMISD::SITOF";
91158d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::UITOF:         return "ARMISD::UITOF";
91258d3619facfb708f4998d73270ca4082b20853b9Roland McGrath
91358d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
91458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
915b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::RRX:           return "ARMISD::RRX";
916b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
917b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::ADDC:          return "ARMISD::ADDC";
918b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::ADDE:          return "ARMISD::ADDE";
919b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::SUBC:          return "ARMISD::SUBC";
920521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::SUBE:          return "ARMISD::SUBE";
921521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
922521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
923521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
924521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
925521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
926521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
927521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
928521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
929521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
930521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
931521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
932521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
933521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
9346d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
93504a14163323bc4d2d335909a2af7259bc53ddf8bRoland McGrath  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
936521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
937521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
938521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
939521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
940521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
941521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCGE:          return "ARMISD::VCGE";
942521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
943521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
944521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
945521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCGT:          return "ARMISD::VCGT";
946521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
947521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
948521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
949521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VTST:          return "ARMISD::VTST";
950521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
951b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHL:          return "ARMISD::VSHL";
952b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
953b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
954b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
955b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
956b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
957b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
958b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
959b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
960b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
961b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
962b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
9636d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
9646d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
9656d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
9666d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
9676d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
9686d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
969b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
9701c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
971521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
9729aa8ef7fbb5ada14b7c4585d6c1361aa5eab6f88Roland McGrath  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
9737a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
9741c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
975b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VDUP:          return "ARMISD::VDUP";
976b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
9771c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  case ARMISD::VEXT:          return "ARMISD::VEXT";
978b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VREV64:        return "ARMISD::VREV64";
979b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VREV32:        return "ARMISD::VREV32";
980b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VREV16:        return "ARMISD::VREV16";
9813cbdd387c752999255aea91600b5cfdefbeac7d0Ulrich Drepper  case ARMISD::VZIP:          return "ARMISD::VZIP";
982b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VUZP:          return "ARMISD::VUZP";
983b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VTRN:          return "ARMISD::VTRN";
984b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VTBL1:         return "ARMISD::VTBL1";
985b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VTBL2:         return "ARMISD::VTBL2";
986b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
987b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
988b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::UMLAL:         return "ARMISD::UMLAL";
989b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::SMLAL:         return "ARMISD::SMLAL";
990b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
991b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::FMAX:          return "ARMISD::FMAX";
992b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::FMIN:          return "ARMISD::FMIN";
993b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::BFI:           return "ARMISD::BFI";
9941c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
995b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
996b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VBSL:          return "ARMISD::VBSL";
997b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
998b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
999b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
1000b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
1001b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
1002b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
10037a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
10047a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
1005521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
1006521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
1007521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
10086d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
10096d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
1010521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
1011521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
1012521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
1013b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
1014b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
1015b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
1016b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
1017b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1018b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1019b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1020b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperEVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
1021b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!VT.isVector()) return getPointerTy();
1022b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return VT.changeVectorElementTypeToInteger();
1023b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1024b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1025b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// getRegClassFor - Return the register class that should be used for the
1026b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// specified value type.
1027b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperconst TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
1028b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
10293cbdd387c752999255aea91600b5cfdefbeac7d0Ulrich Drepper  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1030b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // load / store 4 to 8 consecutive D registers.
1031b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->hasNEON()) {
1032b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (VT == MVT::v4i64)
1033b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return &ARM::QQPRRegClass;
1034b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (VT == MVT::v8i64)
1035b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return &ARM::QQQQPRRegClass;
1036b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1037b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return TargetLowering::getRegClassFor(VT);
1038b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1039b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1040b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// Create a fast isel object.
1041b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperFastISel *
1042b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1043b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                  const TargetLibraryInfo *libInfo) const {
1044b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return ARM::createFastISel(funcInfo, libInfo);
10456d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard}
10466d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
10476d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard/// getMaximalGlobalOffset - Returns the maximal possible offset which can
10486d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard/// be used for loads / stores from the global.
10496d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaardunsigned ARMTargetLowering::getMaximalGlobalOffset() const {
1050b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return (Subtarget->isThumb1Only() ? 127 : 4095);
1051b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1052b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1053b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1054b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  unsigned NumVals = N->getNumValues();
1055b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!NumVals)
1056b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Sched::RegPressure;
1057b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1058b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  for (unsigned i = 0; i != NumVals; ++i) {
1059b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    EVT VT = N->getValueType(i);
1060b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (VT == MVT::Glue || VT == MVT::Other)
10616d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      continue;
10626d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (VT.isFloatingPoint() || VT.isVector())
10636d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      return Sched::ILP;
10646d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  }
10656d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
10666d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (!N->isMachineOpcode())
1067b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Sched::RegPressure;
106858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath
1069b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Load are scheduled for latency even if there instruction itinerary
1070b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // is not available.
1071b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
1072b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1073b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1074b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (MCID.getNumDefs() == 0)
1075b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Sched::RegPressure;
1076b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!Itins->isEmpty() &&
1077b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1078b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Sched::ILP;
1079b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1080f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  return Sched::RegPressure;
1081b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1082b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1083b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//===----------------------------------------------------------------------===//
1084b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// Lowering Code
1085b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//===----------------------------------------------------------------------===//
1086b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1087b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1088b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperstatic ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1089b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  switch (CC) {
1090b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  default: llvm_unreachable("Unknown condition code!");
1091b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETNE:  return ARMCC::NE;
1092b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETEQ:  return ARMCC::EQ;
1093b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETGT:  return ARMCC::GT;
1094b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETGE:  return ARMCC::GE;
1095b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETLT:  return ARMCC::LT;
1096b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETLE:  return ARMCC::LE;
1097b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUGT: return ARMCC::HI;
1098b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUGE: return ARMCC::HS;
1099b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETULT: return ARMCC::LO;
1100b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETULE: return ARMCC::LS;
1101b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
11026d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard}
11036d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
11046d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
11056d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaardstatic void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
11066d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                        ARMCC::CondCodes &CondCode2) {
11076d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  CondCode2 = ARMCC::AL;
110858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  switch (CC) {
1109b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  default: llvm_unreachable("Unknown FP condition!");
1110b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETEQ:
1111b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
11126d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ISD::SETGT:
11136d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ISD::SETOGT: CondCode = ARMCC::GT; break;
11146d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ISD::SETGE:
11156d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ISD::SETOGE: CondCode = ARMCC::GE; break;
11166d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1117b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETOLE: CondCode = ARMCC::LS; break;
11181c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
11191c83bf1fd46b74492297694b642df36d18c6e7b5Roland McGrath  case ISD::SETO:   CondCode = ARMCC::VC; break;
1120b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUO:  CondCode = ARMCC::VS; break;
1121b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1122b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1123b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1124b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETLT:
1125b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETULT: CondCode = ARMCC::LT; break;
1126a58951b183fb3e051870b19c95d8082a3efa3ddbMark Wielaard  case ISD::SETLE:
1127a58951b183fb3e051870b19c95d8082a3efa3ddbMark Wielaard  case ISD::SETULE: CondCode = ARMCC::LE; break;
1128b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETNE:
1129b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1130b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1131b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1132b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1133b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//===----------------------------------------------------------------------===//
1134b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//                      Calling Convention Implementation
1135b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper//===----------------------------------------------------------------------===//
1136b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1137b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper#include "ARMGenCallingConv.inc"
1138b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1139b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
1140b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// given CallingConvention value.
1141b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperCCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1142b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                                 bool Return,
1143e219f1c1b2a5a2e4f248714fdc93d031cdc5ee6aUlrich Drepper                                                 bool isVarArg) const {
1144b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  switch (CC) {
1145b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  default:
1146b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    llvm_unreachable("Unsupported calling convention");
1147b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case CallingConv::Fast:
1148b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (Subtarget->hasVFP2() && !isVarArg) {
1149b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      if (!Subtarget->isAAPCS_ABI())
1150b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1151b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // For AAPCS ABI targets, just use VFP variant of the calling convention.
1152b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1153b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
1154b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Fallthrough
1155b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case CallingConv::C: {
1156b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Use target triple & subtarget features to do actual dispatch.
1157b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (!Subtarget->isAAPCS_ABI())
1158b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1159b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    else if (Subtarget->hasVFP2() &&
1160b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1161b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper             !isVarArg)
1162b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1163b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1164b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
11656d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case CallingConv::ARM_AAPCS_VFP:
11666d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (!isVarArg)
11676d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
11686d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // Fallthrough
11696d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  case CallingConv::ARM_AAPCS:
11706d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
117158d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  case CallingConv::ARM_APCS:
1172b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1173b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  case CallingConv::GHC:
1174b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1175b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1176b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
11776d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
11786d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard/// LowerCallResult - Lower the result values of a call into the
11796d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard/// appropriate copies out of appropriate physical registers.
11806d93c8c46d9b2b381c889e5f176451996845b055Mark WielaardSDValue
11816d93c8c46d9b2b381c889e5f176451996845b055Mark WielaardARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
11826d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                   CallingConv::ID CallConv, bool isVarArg,
1183b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                   const SmallVectorImpl<ISD::InputArg> &Ins,
1184b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                   DebugLoc dl, SelectionDAG &DAG,
1185b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                   SmallVectorImpl<SDValue> &InVals) const {
1186b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1187b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Assign locations to each value returned by this call.
1188b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SmallVector<CCValAssign, 16> RVLocs;
1189b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1190b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
1191b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  CCInfo.AnalyzeCallResult(Ins,
1192b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           CCAssignFnForNode(CallConv, /* Return*/ true,
1193b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                             isVarArg));
1194b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1195b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Copy all of the result registers out of their specified physreg.
11966d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1197b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    CCValAssign VA = RVLocs[i];
1198b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1199b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    SDValue Val;
1200b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (VA.needsCustom()) {
1201b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Handle f64 or half of a v2f64.
1202b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1203b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                      InFlag);
1204b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Chain = Lo.getValue(1);
1205b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      InFlag = Lo.getValue(2);
1206b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      VA = RVLocs[++i]; // skip ahead to next loc
1207b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1208b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                      InFlag);
12096d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      Chain = Hi.getValue(1);
12106d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      InFlag = Hi.getValue(2);
1211b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1212b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1213b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      if (VA.getLocVT() == MVT::v2f64) {
1214b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1215b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
12166d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                          DAG.getConstant(0, MVT::i32));
12176d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
12186d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        VA = RVLocs[++i]; // skip ahead to next loc
12196d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1220b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Chain = Lo.getValue(1);
1221b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        InFlag = Lo.getValue(2);
1222b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        VA = RVLocs[++i]; // skip ahead to next loc
1223b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1224b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Chain = Hi.getValue(1);
1225b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        InFlag = Hi.getValue(2);
1226b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1227b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
12283a64a3087f53ab860c7de04da0e53dabef459520Ulrich Drepper                          DAG.getConstant(1, MVT::i32));
1229b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      }
1230b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    } else {
1231b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1232b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                               InFlag);
1233b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Chain = Val.getValue(1);
1234b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      InFlag = Val.getValue(2);
1235b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
1236b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1237b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    switch (VA.getLocInfo()) {
1238b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    default: llvm_unreachable("Unknown loc info!");
1239b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::Full: break;
1240b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::BCvt:
1241b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1242b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      break;
1243b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
1244b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1245b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    InVals.push_back(Val);
1246b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1247b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1248b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return Chain;
1249521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath}
1250b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1251b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// LowerMemOpCallTo - Store the argument to the stack.
1252b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue
1253b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1254b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                    SDValue StackPtr, SDValue Arg,
1255b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                    DebugLoc dl, SelectionDAG &DAG,
1256b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                    const CCValAssign &VA,
1257a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard                                    ISD::ArgFlagsTy Flags) const {
1258b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  unsigned LocMemOffset = VA.getLocMemOffset();
1259b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1260b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
12616d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  return DAG.getStore(Chain, dl, Arg, PtrOff,
1262b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                      MachinePointerInfo::getStack(LocMemOffset),
1263b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                      false, false, 0);
1264b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1265b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1266b08d5a8fb42f4586d756068065186b5af7e48daUlrich Dreppervoid ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
126758d3619facfb708f4998d73270ca4082b20853b9Roland McGrath                                         SDValue Chain, SDValue &Arg,
12686d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                         RegsToPassVector &RegsToPass,
1269b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                         CCValAssign &VA, CCValAssign &NextVA,
1270b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                         SDValue &StackPtr,
1271b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                         SmallVector<SDValue, 8> &MemOpChains,
1272b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                         ISD::ArgFlagsTy Flags) const {
127358d3619facfb708f4998d73270ca4082b20853b9Roland McGrath
1274b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1275b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
1276b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1277b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1278b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (NextVA.isRegLoc())
1279b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
12806d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  else {
12816d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    assert(NextVA.isMemLoc());
12826d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (StackPtr.getNode() == 0)
12836d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1284b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1285b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1286b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                           dl, DAG, NextVA,
1287b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                           Flags));
1288b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1289b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
1290b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1291b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// LowerCall - Lowering a call into a callseq_start <-
1292b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1293b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// nodes.
1294b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue
1295b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1296b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                             SmallVectorImpl<SDValue> &InVals) const {
1297b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SelectionDAG &DAG                     = CLI.DAG;
1298b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  DebugLoc &dl                          = CLI.DL;
1299b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
1300b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
1301b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
1302b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue Chain                         = CLI.Chain;
1303b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue Callee                        = CLI.Callee;
1304b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  bool &isTailCall                      = CLI.IsTailCall;
1305b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  CallingConv::ID CallConv              = CLI.CallConv;
1306b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  bool doesNotRet                       = CLI.DoesNotReturn;
1307b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  bool isVarArg                         = CLI.IsVarArg;
1308b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1309b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  MachineFunction &MF = DAG.getMachineFunction();
1310b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1311b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  bool IsSibCall = false;
1312b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Disable tail calls if they're not supported.
1313b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
1314b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    isTailCall = false;
1315b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (isTailCall) {
1316b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Check if it's really possible to do a tail call.
1317b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1318b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1319b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                                   Outs, OutVals, Ins, DAG);
1320b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // We don't support GuaranteedTailCallOpt for ARM, only automatically
1321b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // detected sibcalls.
1322b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (isTailCall) {
1323b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      ++NumTailCalls;
1324b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      IsSibCall = true;
1325b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
1326b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
1327b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1328b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Analyze operands of the call, assigning locations to each operand.
1329b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SmallVector<CCValAssign, 16> ArgLocs;
1330b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1331b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                 getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
1332b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  CCInfo.AnalyzeCallOperands(Outs,
1333b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                             CCAssignFnForNode(CallConv, /* Return*/ false,
1334b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                               isVarArg));
1335b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1336b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Get a count of how many bytes are to be pushed on the stack.
13376d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  unsigned NumBytes = CCInfo.getNextStackOffset();
13386d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
1339b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // For tail calls, memory operands are available in our caller's stack.
1340b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (IsSibCall)
1341b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    NumBytes = 0;
1342b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1343b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Adjust the stack pointer for the new arguments...
1344b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // These operations are automatically eliminated by the prolog/epilog pass
1345b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!IsSibCall)
1346b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1347b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1348b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1349b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1350b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  RegsToPassVector RegsToPass;
1351b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SmallVector<SDValue, 8> MemOpChains;
1352b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1353b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Walk the register/memloc assignments, inserting copies/loads.  In the case
1354b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // of tail call optimization, arguments are handled later.
1355b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1356b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper       i != e;
1357b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper       ++i, ++realArgIdx) {
1358b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    CCValAssign &VA = ArgLocs[i];
135958d3619facfb708f4998d73270ca4082b20853b9Roland McGrath    SDValue Arg = OutVals[realArgIdx];
1360b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1361b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    bool isByVal = Flags.isByVal();
1362b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1363b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Promote the value if needed.
1364b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    switch (VA.getLocInfo()) {
1365b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    default: llvm_unreachable("Unknown loc info!");
1366b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::Full: break;
1367b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::SExt:
1368b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1369b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      break;
1370b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::ZExt:
1371b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1372b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      break;
1373b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::AExt:
1374b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1375b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      break;
1376b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case CCValAssign::BCvt:
1377b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1378b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      break;
1379521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
1380521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
13817a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
13827a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard    if (VA.needsCustom()) {
13837a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard      if (VA.getLocVT() == MVT::v2f64) {
13847a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
13857a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard                                  DAG.getConstant(0, MVT::i32));
138658d3619facfb708f4998d73270ca4082b20853b9Roland McGrath        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
138758d3619facfb708f4998d73270ca4082b20853b9Roland McGrath                                  DAG.getConstant(1, MVT::i32));
138858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath
138958d3619facfb708f4998d73270ca4082b20853b9Roland McGrath        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
139058d3619facfb708f4998d73270ca4082b20853b9Roland McGrath                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
13917a125b9306c4743eaee062bdab301f890e3c6309Mark Wielaard
1392b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        VA = ArgLocs[++i]; // skip ahead to next loc
1393b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        if (VA.isRegLoc()) {
1394b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1395b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1396b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        } else {
1397b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          assert(VA.isMemLoc());
1398b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1399b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1400b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                                 dl, DAG, VA, Flags));
1401b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        }
1402b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      } else {
1403b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1404b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                         StackPtr, MemOpChains, Flags);
1405b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      }
1406b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    } else if (VA.isRegLoc()) {
1407b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1408b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    } else if (isByVal) {
1409b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      assert(VA.isMemLoc());
1410b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      unsigned offset = 0;
1411b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1412b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // True if this byval aggregate will be split between registers
1413b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // and memory.
1414b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      if (CCInfo.isFirstByValRegValid()) {
1415b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1416b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        unsigned int i, j;
1417b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
1418b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          SDValue Const = DAG.getConstant(4*i, MVT::i32);
1419b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1420b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1421b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                     MachinePointerInfo(),
1422b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                     false, false, false, 0);
1423b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          MemOpChains.push_back(Load.getValue(1));
1424b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          RegsToPass.push_back(std::make_pair(j, Load));
1425b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        }
1426b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        offset = ARM::R4 - CCInfo.getFirstByValReg();
1427b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        CCInfo.clearFirstByValReg();
1428b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      }
1429b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1430521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      if (Flags.getByValSize() - 4*offset > 0) {
1431521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        unsigned LocMemOffset = VA.getLocMemOffset();
1432521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
1433521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
1434521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                  StkPtrOff);
1435521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
1436521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
1437b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
1438521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                           MVT::i32);
1439521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
1440521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1441521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1442521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
14439aa8ef7fbb5ada14b7c4585d6c1361aa5eab6f88Roland McGrath        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1444521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                          Ops, array_lengthof(Ops)));
1445521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      }
1446521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    } else if (!IsSibCall) {
14476d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      assert(VA.isMemLoc());
14486d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
1449521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
14509aa8ef7fbb5ada14b7c4585d6c1361aa5eab6f88Roland McGrath                                             dl, DAG, VA, Flags));
1451521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
14529aa8ef7fbb5ada14b7c4585d6c1361aa5eab6f88Roland McGrath  }
1453521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1454521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (!MemOpChains.empty())
1455521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1456521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                        &MemOpChains[0], MemOpChains.size());
1457521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1458b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // Build a sequence of copy-to-reg nodes chained together with token chain
1459521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // and flag operands which copy the outgoing args into the appropriate regs.
14606d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  SDValue InFlag;
1461521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // Tail call byval lowering might overwrite argument registers so in case of
1462521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // tail call optimization the copies to registers are lowered later.
1463521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (!isTailCall)
1464521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1465b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1466b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                               RegsToPass[i].second, InFlag);
1467521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      InFlag = Chain.getValue(1);
1468521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
1469b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1470521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // For tail calls lower the arguments to the 'real' stack slot.
1471521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (isTailCall) {
1472521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // Force all the incoming stack arguments to be loaded from the stack
1473521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // before any new outgoing arguments are stored to the stack, because the
1474521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // outgoing stack slots may alias the incoming argument stack slots, and
1475521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // the alias isn't otherwise explicit. This is slightly more conservative
1476b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // than necessary, because it means that each store effectively depends
1477521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // on every argument instead of just those arguments it would clobber.
1478521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1479521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // Do not flag preceding copytoreg stuff together with the following stuff.
1480521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    InFlag = SDValue();
1481b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1482521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1483521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                               RegsToPass[i].second, InFlag);
1484521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      InFlag = Chain.getValue(1);
1485521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
1486521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    InFlag =SDValue();
14876d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  }
14886d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
1489521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1490521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1491b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // node so that legalize doesn't hack it.
14926d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  bool isDirect = false;
14936d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  bool isARMFunc = false;
14946d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  bool isLocalARMFunc = false;
1495521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1496521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1497521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (EnableARMLongCalls) {
1498521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    assert (getTargetMachine().getRelocationModel() == Reloc::Static
1499521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath            && "long-calls with non-static relocation model!");
1500521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // Handle a global address or an external symbol. If it's not one of
1501b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // those, the target's already in a register, so we don't need to do
1502521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // anything extra.
15036d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1504521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      const GlobalValue *GV = G->getGlobal();
1505521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // Create a constant pool entry for the callee address
1506521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1507521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      ARMConstantPoolValue *CPV =
1508521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
1509b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1510521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // Get the address of the callee into a register
1511521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1512521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1513521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      Callee = DAG.getLoad(getPointerTy(), dl,
1514521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           DAG.getEntryNode(), CPAddr,
1515521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           MachinePointerInfo::getConstantPool(),
1516521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           false, false, false, 0);
1517521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1518521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      const char *Sym = S->getSymbol();
1519521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1520b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      // Create a constant pool entry for the callee address
1521521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
15226d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      ARMConstantPoolValue *CPV =
1523521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1524521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                      ARMPCLabelIndex, 0);
1525521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // Get the address of the callee into a register
1526521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1527521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1528b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Callee = DAG.getLoad(getPointerTy(), dl,
1529521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           DAG.getEntryNode(), CPAddr,
1530521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           MachinePointerInfo::getConstantPool(),
1531521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           false, false, false, 0);
1532521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
1533521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1534b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    const GlobalValue *GV = G->getGlobal();
1535521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    isDirect = true;
1536521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1537521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1538b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                   getTargetMachine().getRelocationModel() != Reloc::Static;
1539521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    isARMFunc = !Subtarget->isThumb() || isStub;
1540b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // ARM call to a local ARM function is predicable.
15416d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1542b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // tBX takes a register source operand.
1543521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1544521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1545b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      ARMConstantPoolValue *CPV =
1546521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
1547521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1548521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1549521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      Callee = DAG.getLoad(getPointerTy(), dl,
15506d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                           DAG.getEntryNode(), CPAddr,
1551b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           MachinePointerInfo::getConstantPool(),
1552521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           false, false, false, 0);
1553521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
15546d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1555b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           getPointerTy(), Callee, PICLabel);
1556521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    } else {
1557521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // On ELF targets for PIC code, direct calls should go through the PLT
1558521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned OpFlags = 0;
15596d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      if (Subtarget->isTargetELF() &&
1560521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1561b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        OpFlags = ARMII::MO_PLT;
1562521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
1563a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard    }
1564b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
15656d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    isDirect = true;
15666d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    bool isStub = Subtarget->isTargetDarwin() &&
15676d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                  getTargetMachine().getRelocationModel() != Reloc::Static;
15686d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    isARMFunc = !Subtarget->isThumb() || isStub;
15696d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // tBX takes a register source operand.
15706d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    const char *Sym = S->getSymbol();
1571521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1572521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
1573521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      ARMConstantPoolValue *CPV =
1574521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
1575521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                      ARMPCLabelIndex, 4);
1576521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1577521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
15786d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      Callee = DAG.getLoad(getPointerTy(), dl,
1579521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           DAG.getEntryNode(), CPAddr,
15806d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                           MachinePointerInfo::getConstantPool(),
15816d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                           false, false, false, 0);
1582521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1583521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1584521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                           getPointerTy(), Callee, PICLabel);
1585521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    } else {
1586521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned OpFlags = 0;
1587521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // On ELF targets for PIC code, direct calls should go through the PLT
1588521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      if (Subtarget->isTargetELF() &&
1589521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
1590521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        OpFlags = ARMII::MO_PLT;
1591521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
1592521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
1593521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  }
159458d3619facfb708f4998d73270ca4082b20853b9Roland McGrath
1595b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // FIXME: handle tail calls differently.
1596521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  unsigned CallOpc;
1597521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (Subtarget->isThumb()) {
15986d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1599521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CallOpc = ARMISD::CALL_NOLINK;
1600b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    else if (doesNotRet && isDirect && !isARMFunc &&
1601521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath             Subtarget->hasRAS() && !Subtarget->isThumb1Only())
1602521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // "mov lr, pc; b _foo" to avoid confusing the RSP
1603521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CallOpc = ARMISD::CALL_NOLINK;
1604521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    else
1605521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1606b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else {
16076d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (!isDirect && !Subtarget->hasV5TOps()) {
1608521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CallOpc = ARMISD::CALL_NOLINK;
1609b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    } else if (doesNotRet && isDirect && Subtarget->hasRAS())
1610521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // "mov lr, pc; b _foo" to avoid confusing the RSP
1611521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CallOpc = ARMISD::CALL_NOLINK;
1612521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    else
1613521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
1614521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  }
1615521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1616521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  std::vector<SDValue> Ops;
161758d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  Ops.push_back(Chain);
161858d3619facfb708f4998d73270ca4082b20853b9Roland McGrath  Ops.push_back(Callee);
1619b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1620521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // Add argument registers to the end of the list so that they are known live
1621b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // into the call.
1622521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1623a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1624b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                  RegsToPass[i].second.getValueType()));
16256d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
16266d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  // Add a register mask operand representing the call-preserved registers.
16276d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
16286d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
16296d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  assert(Mask && "Missing call preserved mask for calling convention");
16306d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  Ops.push_back(DAG.getRegisterMask(Mask));
16316d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
16326d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (InFlag.getNode())
1633521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    Ops.push_back(InFlag);
1634521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1635521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1636521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (isTailCall)
1637521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1638521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1639521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // Returns a chain and a flag for retval copy to use.
16406d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1641521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  InFlag = Chain.getValue(1);
16426d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
16436d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1644521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                             DAG.getIntPtrConstant(0, true), InFlag);
1645521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (!Ins.empty())
1646521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    InFlag = Chain.getValue(1);
1647521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1648521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // Handle result values, copying them out of physregs into vregs that we
1649521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // return.
1650521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1651521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                         dl, DAG, InVals);
1652521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath}
1653521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1654521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// HandleByVal - Every parameter *after* a byval parameter is passed
1655521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// on the stack.  Remember the next parameter register to allocate,
165658d3619facfb708f4998d73270ca4082b20853b9Roland McGrath/// and then confiscate the rest of the parameter registers to insure
1657b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper/// this.
1658521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrathvoid
1659521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrathARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
16606d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  unsigned reg = State->AllocateReg(GPRArgRegs, 4);
1661521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  assert((State->getCallOrPrologue() == Prologue ||
1662b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper          State->getCallOrPrologue() == Call) &&
1663521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath         "unhandled ParmContext");
1664521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if ((!State->isFirstByValRegValid()) &&
1665521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
1666521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    State->setFirstByValReg(reg);
1667521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // At a call site, a byval parameter that is split between
1668521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // registers and memory needs its size truncated here.  In a
16696d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    // function prologue, such byval parameters are reassembled in
1670521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    // memory, and are not truncated.
1671521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    if (State->getCallOrPrologue() == Call) {
1672521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      unsigned excess = 4 * (ARM::R4 - reg);
1673521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      assert(size >= excess && "expected larger existing stack allocation");
1674521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      size -= excess;
1675521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    }
1676521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  }
1677521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // Confiscate any remaining parameter registers to preclude their
1678521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  // assignment to subsequent parameters.
1679521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  while (State->AllocateReg(GPRArgRegs, 4))
1680521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    ;
1681521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath}
1682521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
16836d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard/// MatchingStackOffset - Return true if the given stack call argument is
1684521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// already available in the same position (relatively) of the caller's
1685521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// incoming argument stack.
1686521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrathstatic
1687521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrathbool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1688521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1689521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                         const TargetInstrInfo *TII) {
1690521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
16916d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  int FI = INT_MAX;
16926d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (Arg.getOpcode() == ISD::CopyFromReg) {
16936d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1694521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    if (!TargetRegisterInfo::isVirtualRegister(VR))
1695521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      return false;
1696521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    MachineInstr *Def = MRI->getVRegDef(VR);
16976d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (!Def)
1698521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      return false;
1699521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    if (!Flags.isByVal()) {
1700521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      if (!TII->isLoadFromStackSlot(Def, FI))
1701521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath        return false;
1702521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    } else {
1703a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard      return false;
17046d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    }
17056d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
17066d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (Flags.isByVal())
1707521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // ByVal argument is passed in as a pointer but it's now being
1708521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // dereferenced. e.g.
1709521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // define @foo(%struct.X* %A) {
1710521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      //   tail call @bar(%struct.X* byval %A)
1711521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      // }
1712521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      return false;
1713521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    SDValue Ptr = Ld->getBasePtr();
1714521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1715521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    if (!FINode)
1716a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard      return false;
1717a2b964c7dbbf54162b2c50931c172568fb4cfa70Mark Wielaard    FI = FINode->getIndex();
1718521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  } else
1719521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    return false;
1720521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1721521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  assert(FI != INT_MAX);
1722521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  if (!MFI->isFixedObjectIndex(FI))
1723521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    return false;
1724521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1725521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath}
1726521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath
1727521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// IsEligibleForTailCallOptimization - Check whether the call is eligible
1728521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// for tail call optimization. Targets which want to do tail call
1729521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath/// optimization should implement this function.
17306d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaardbool
1731521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrathARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
17326d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                                     CallingConv::ID CalleeCC,
17336d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                                     bool isVarArg,
17346d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                                     bool isCalleeStructRet,
17356d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                                     bool isCallerStructRet,
1736521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1737521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                    const SmallVectorImpl<SDValue> &OutVals,
1738521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1739521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                                     SelectionDAG& DAG) const {
1740521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  const Function *CallerF = DAG.getMachineFunction().getFunction();
1741521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath  CallingConv::ID CallerCC = CallerF->getCallingConv();
1742b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  bool CCMatch = CallerCC == CalleeCC;
17431662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17441662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // Look for obvious safe cases to perform tail call optimization that do not
17451662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // require ABI changes. This is what gcc calls sibcall.
17461662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17471662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // Do not sibcall optimize vararg calls unless the call site is not passing
17481662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // any arguments.
17491662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (isVarArg && !Outs.empty())
17501662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    return false;
17511662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17521662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // Also avoid sibcall optimization if either caller or callee uses struct
17531662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // return semantics.
17541662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (isCalleeStructRet || isCallerStructRet)
17551662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    return false;
17561662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17571662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
17581662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
17591662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
17601662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // support in the assembler and linker to be used. This would need to be
17611662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // fixed to fully support tail calls in Thumb1.
17621662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  //
17631662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
17641662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // LR.  This means if we need to reload LR, it takes an extra instructions,
17651662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // which outweighs the value of the tail call; but here we don't know yet
17661662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // whether LR is going to be used.  Probably the right approach is to
17671662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // generate the tail call here and turn it back into CALL/RET in
17681662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // emitEpilogue if LR is used.
17691662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17701662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
17711662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // but we need to make sure there are enough registers; the only valid
17721662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // registers are the 4 used for parameters.  We don't currently do this
17731662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // case.
17741662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (Subtarget->isThumb1Only())
17751662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    return false;
17761662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17771662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // If the calling conventions do not match, then we'd better make sure the
17781662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // results are returned in the same way as what the caller expects.
17796d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  if (!CCMatch) {
17806d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    SmallVector<CCValAssign, 16> RVLocs1;
17811662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
17821662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                       getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
17831662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
17841662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17851662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    SmallVector<CCValAssign, 16> RVLocs2;
17866d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
17876d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                       getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
17881662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
17891662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
17901662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    if (RVLocs1.size() != RVLocs2.size())
17911662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      return false;
17921662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
17931662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
17946d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        return false;
17956d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
17961662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        return false;
17971662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      if (RVLocs1[i].isRegLoc()) {
17981662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
17991662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard          return false;
18001662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      } else {
18011662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
18021662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard          return false;
18031662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      }
180463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    }
18051662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  }
18061662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
18071662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // If Caller's vararg or byval argument has been split between registers and
18081662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // stack, do not perform tail call, since part of the argument is in caller's
18091662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // local frame.
18101662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
18111662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                                      getInfo<ARMFunctionInfo>();
18121662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (AFI_Caller->getVarArgsRegSaveSize())
18131662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    return false;
18141662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
18151662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // If the callee takes no arguments then go on to check the results of the
18161662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  // call.
18171662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (!Outs.empty()) {
18181662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // Check if stack adjustment is needed. For now, do not do this if any
18191662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // argument is passed on the stack.
18201662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    SmallVector<CCValAssign, 16> ArgLocs;
18211662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
18221662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                      getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
18231662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    CCInfo.AnalyzeCallOperands(Outs,
18241662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                               CCAssignFnForNode(CalleeCC, false, isVarArg));
18251662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    if (CCInfo.getNextStackOffset()) {
182663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      MachineFunction &MF = DAG.getMachineFunction();
182763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
18286d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      // Check if the arguments are already laid out in the right way as
18296d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      // the caller's fixed stack objects.
18306d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      MachineFrameInfo *MFI = MF.getFrameInfo();
183163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      const MachineRegisterInfo *MRI = &MF.getRegInfo();
18321662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
183363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
18341662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard           i != e;
18351662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard           ++i, ++realArgIdx) {
183663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        CCValAssign &VA = ArgLocs[i];
18371662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        EVT RegVT = VA.getLocVT();
18381662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        SDValue Arg = OutVals[realArgIdx];
183963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
184063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        if (VA.getLocInfo() == CCValAssign::Indirect)
18411662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard          return false;
184263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        if (VA.needsCustom()) {
184363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard          // f64 and vector types are split into multiple registers or
184463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard          // register/stack-slot combinations.  The types will not match
184563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard          // the registers; give up on memory f64 refs until we figure
184663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard          // out what to do about this.
18471662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard          if (!VA.isRegLoc())
184863868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard            return false;
184963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard          if (!ArgLocs[++i].isRegLoc())
185063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard            return false;
185163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard          if (RegVT == MVT::v2f64) {
185202a958bc2662c1c9c2d6b663742b9c8e720e25b2Mark Wielaard            if (!ArgLocs[++i].isRegLoc())
185302a958bc2662c1c9c2d6b663742b9c8e720e25b2Mark Wielaard              return false;
18546d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard            if (!ArgLocs[++i].isRegLoc())
18556d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard              return false;
18566d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard          }
18576d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        } else if (!VA.isRegLoc()) {
18581662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
185963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                                   MFI, MRI, TII))
186063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard            return false;
186163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        }
186263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      }
186363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    }
186463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  }
186563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
186663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  return true;
186763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard}
186863868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
186963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark WielaardSDValue
187063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark WielaardARMTargetLowering::LowerReturn(SDValue Chain,
187163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                               CallingConv::ID CallConv, bool isVarArg,
187263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                               const SmallVectorImpl<ISD::OutputArg> &Outs,
187363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                               const SmallVectorImpl<SDValue> &OutVals,
187463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                               DebugLoc dl, SelectionDAG &DAG) const {
187563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
187663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  // CCValAssign - represent the assignment of the return value to a location.
187763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  SmallVector<CCValAssign, 16> RVLocs;
187863868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
187963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  // CCState - Info about the registers and stack slots.
188063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
188163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                    getTargetMachine(), RVLocs, *DAG.getContext(), Call);
188263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
188363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  // Analyze outgoing return values.
188463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
188563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                                               isVarArg));
188663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
188763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  // If this is the first return lowered for this function, add
188863868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  // the regs to the liveout set for the function.
188963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
189063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    for (unsigned i = 0; i != RVLocs.size(); ++i)
189163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      if (RVLocs[i].isRegLoc())
189263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
189363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  }
189463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
189563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  SDValue Flag;
189663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
189763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard  // Copy the result values into the output registers.
18981662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  for (unsigned i = 0, realRVLocIdx = 0;
189963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard       i != RVLocs.size();
19001662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard       ++i, ++realRVLocIdx) {
190163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    CCValAssign &VA = RVLocs[i];
190263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    assert(VA.isRegLoc() && "Can only return in registers!");
19031662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
190463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    SDValue Arg = OutVals[realRVLocIdx];
190563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
190663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    switch (VA.getLocInfo()) {
190763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    default: llvm_unreachable("Unknown loc info!");
190863868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    case CCValAssign::Full: break;
190963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    case CCValAssign::BCvt:
191063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
191163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      break;
191263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    }
191363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
19141662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    if (VA.needsCustom()) {
191563868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      if (VA.getLocVT() == MVT::v2f64) {
191663868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        // Extract the first half and return it in two registers.
191763868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
191863868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                                   DAG.getConstant(0, MVT::i32));
191963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
192063868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
192163868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard
192263868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
192363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        Flag = Chain.getValue(1);
192463868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard        VA = RVLocs[++i]; // skip ahead to next loc
19251662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
19261662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                                 HalfGPRs.getValue(1), Flag);
19271662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        Flag = Chain.getValue(1);
19281662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard        VA = RVLocs[++i]; // skip ahead to next loc
19296d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
19306d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        // Extract the 2nd half and fall through to handle it as an f64 value.
19316d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
19321662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                          DAG.getConstant(1, MVT::i32));
19331662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      }
19341662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
19351662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      // available.
19361662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
19371662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
19381662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
193963868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard      Flag = Chain.getValue(1);
19401662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      VA = RVLocs[++i]; // skip ahead to next loc
19411662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
19421662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard                               Flag);
19431662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    } else
19441662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
19451662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
19461662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // Guarantee that all emitted copies are
19471662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // stuck together, avoiding something bad.
19481662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    Flag = Chain.getValue(1);
19491662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  }
19501662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
19511662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  SDValue result;
19521662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (Flag.getNode())
195363868c2afb1123bf8ac2f99048e6f3f70dcf4c0eMark Wielaard    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
19541662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  else // Return Void
19551662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
19561662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
19571662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  return result;
19581662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard}
19591662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
19601662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaardbool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
19611662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (N->getNumValues() != 1)
19621662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    return false;
19631662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (!N->hasNUsesOfValue(1, 0))
19641662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    return false;
19651662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard
19661662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  SDValue TCChain = Chain;
19671662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  SDNode *Copy = *N->use_begin();
19681662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard  if (Copy->getOpcode() == ISD::CopyToReg) {
19691662bc3880ae5acae6aa2a3013d193223c36f189Mark Wielaard    // If the copy has a glue operand, we conservatively assume it isn't safe to
1970b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // perform a tail call.
1971b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
1972b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return false;
1973b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    TCChain = Copy->getOperand(0);
1974b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
19753cbdd387c752999255aea91600b5cfdefbeac7d0Ulrich Drepper    SDNode *VMov = Copy;
1976b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // f64 returned in a pair of GPRs.
1977b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    SmallPtrSet<SDNode*, 2> Copies;
19786d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
1979b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper         UI != UE; ++UI) {
1980b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      if (UI->getOpcode() != ISD::CopyToReg)
1981b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        return false;
1982b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Copies.insert(*UI);
1983b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
1984b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (Copies.size() > 2)
1985b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return false;
1986b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
1987b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
1988b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper         UI != UE; ++UI) {
1989b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      SDValue UseChain = UI->getOperand(0);
1990b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      if (Copies.count(UseChain.getNode()))
1991b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        // Second CopyToReg
1992b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        Copy = *UI;
1993b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      else
19946d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard        // First CopyToReg
1995b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper        TCChain = UseChain;
1996b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    }
1997f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  } else if (Copy->getOpcode() == ISD::BITCAST) {
1998b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // f32 returned in a single GPR.
1999f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    if (!Copy->hasOneUse())
2000f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath      return false;
2001f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    Copy = *Copy->use_begin();
2002f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2003f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath      return false;
2004f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    Chain = Copy->getOperand(0);
2005f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  } else {
2006f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    return false;
2007f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  }
2008f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath
2009f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  bool HasRet = false;
2010f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2011f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath       UI != UE; ++UI) {
2012f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    if (UI->getOpcode() != ARMISD::RET_FLAG)
2013f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath      return false;
2014f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    HasRet = true;
2015f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  }
2016f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath
2017b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!HasRet)
2018f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath    return false;
2019f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath
2020f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  Chain = TCChain;
2021f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath  return true;
2022f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath}
2023f151b7d4b103b6d07ac510833cb2550de064621cRoland McGrath
2024b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperbool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2025b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
2026b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return false;
2027b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2028b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (!CI->isTailCall())
2029b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return false;
2030b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2031b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return !Subtarget->isThumb1Only();
2032b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2033b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2034b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2035b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2036b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// one of the above mentioned nodes. It has to be wrapped because otherwise
2037b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2038b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// be used to form addressing mode. These wrapped nodes will be selected
20390b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath// into MOVi.
2040b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperstatic SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2041b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  EVT PtrVT = Op.getValueType();
2042b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // FIXME there is no actual debug info here
2043b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  DebugLoc dl = Op.getDebugLoc();
2044b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2045b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue Res;
2046b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (CP->isMachineConstantPoolEntry())
2047b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2048b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                    CP->getAlignment());
2049b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  else
2050b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2051b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                    CP->getAlignment());
2052b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2053b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2054b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2055b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepperunsigned ARMTargetLowering::getJumpTableEncoding() const {
2056b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return MachineJumpTableInfo::EK_Inline;
2057b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2058b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2059b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2060b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                             SelectionDAG &DAG) const {
2061b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  MachineFunction &MF = DAG.getMachineFunction();
2062b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2063b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  unsigned ARMPCLabelIndex = 0;
2064b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  DebugLoc DL = Op.getDebugLoc();
2065b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  EVT PtrVT = getPointerTy();
2066b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
20676d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
20686d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  SDValue CPAddr;
2069b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (RelocM == Reloc::Static) {
2070b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2071b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else {
2072b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2073b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ARMPCLabelIndex = AFI->createPICLabelUId();
2074b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ARMConstantPoolValue *CPV =
2075b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2076b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                      ARMCP::CPBlockAddress, PCAdj);
20776d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2078b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
2079b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2080b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
2081b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                               MachinePointerInfo::getConstantPool(),
2082b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                               false, false, false, 0);
2083b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (RelocM == Reloc::Static)
2084b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Result;
2085b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2086b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2087b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2088b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2089b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2090b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue
20916d93c8c46d9b2b381c889e5f176451996845b055Mark WielaardARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2092b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                                 SelectionDAG &DAG) const {
2093b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  DebugLoc dl = GA->getDebugLoc();
20940b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  EVT PtrVT = getPointerTy();
20950b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
20960b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  MachineFunction &MF = DAG.getMachineFunction();
20970b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
20980b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
20990b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  ARMConstantPoolValue *CPV =
21000b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
21010b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
21020b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
21030b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
21040b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
21050b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                         MachinePointerInfo::getConstantPool(),
21060b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                         false, false, false, 0);
21070b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  SDValue Chain = Argument.getValue(1);
21080b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath
21090b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
21100b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
21110b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath
21120b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  // call __tls_get_addr.
21130b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  ArgListTy Args;
21140b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  ArgListEntry Entry;
21150b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  Entry.Node = Argument;
21166d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
21170b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  Args.push_back(Entry);
21180b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  // FIXME: is there useful debug info available here?
21190b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  TargetLowering::CallLoweringInfo CLI(Chain,
21200b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                (Type *) Type::getInt32Ty(*DAG.getContext()),
21210b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                false, false, false, false,
21220b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                0, CallingConv::C, /*isTailCall=*/false,
21230b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
21240b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
21250b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
21260b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  return CallResult.first;
21270b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath}
21280b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath
21290b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath// Lower ISD::GlobalTLSAddress using the "initial exec" or
21300b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath// "local exec" model.
21310b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrathSDValue
21320b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrathARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
21330b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                                        SelectionDAG &DAG,
21340b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath                                        TLSModel::Model model) const {
21350b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  const GlobalValue *GV = GA->getGlobal();
21366d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard  DebugLoc dl = GA->getDebugLoc();
21370b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  SDValue Offset;
21380b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  SDValue Chain = DAG.getEntryNode();
21390b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  EVT PtrVT = getPointerTy();
21400b9d1fb534604a9ba19999cd8ce8e7efce28da24Roland McGrath  // Get the Thread Pointer
2141b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2142b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2143b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (model == TLSModel::InitialExec) {
2144b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    MachineFunction &MF = DAG.getMachineFunction();
2145b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2146b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2147b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // Initial exec model.
2148b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2149521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    ARMConstantPoolValue *CPV =
2150521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath      ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2151521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                      ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2152521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath                                      true);
2153521c47d2a216fc64098c024fc5ed53532b485f32Roland McGrath    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2154b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2155a58951b183fb3e051870b19c95d8082a3efa3ddbMark Wielaard    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2156a58951b183fb3e051870b19c95d8082a3efa3ddbMark Wielaard                         MachinePointerInfo::getConstantPool(),
2157a58951b183fb3e051870b19c95d8082a3efa3ddbMark Wielaard                         false, false, false, 0);
2158b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Chain = Offset.getValue(1);
2159b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2160b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2161b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2162b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2163b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2164b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                         MachinePointerInfo::getConstantPool(),
2165b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                         false, false, false, 0);
2166b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else {
2167b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // local exec model
2168b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    assert(model == TLSModel::LocalExec);
2169b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ARMConstantPoolValue *CPV =
2170b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2171b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
21726d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
21736d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
2174b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                         MachinePointerInfo::getConstantPool(),
2175b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                         false, false, false, 0);
2176b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
2177b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2178b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // The address of the thread local variable is the add of the thread
2179b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // pointer with the offset of the variable.
2180b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2181b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2182b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2183b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue
2184b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2185b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // TODO: implement the "local dynamic" model
2186b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  assert(Subtarget->isTargetELF() &&
2187b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper         "TLS not implemented for non-ELF targets");
2188b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
21896d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard
2190b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2191b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2192b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  switch (model) {
2193b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case TLSModel::GeneralDynamic:
21948c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    case TLSModel::LocalDynamic:
2195b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return LowerToTLSGeneralDynamicModel(GA, DAG);
2196b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case TLSModel::InitialExec:
2197b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    case TLSModel::LocalExec:
2198b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return LowerToTLSExecModels(GA, DAG, model);
2199b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
2200b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  llvm_unreachable("bogus TLS model");
2201b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2202b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2203b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
2204b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                                 SelectionDAG &DAG) const {
2205b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  EVT PtrVT = getPointerTy();
2206b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  DebugLoc dl = Op.getDebugLoc();
2207b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2208b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2209b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (RelocM == Reloc::PIC_) {
22106d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
22111ccdfb683ad6c7e59793136c3a657ddf131cafd1Mark Wielaard    ARMConstantPoolValue *CPV =
22126d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      ARMConstantPoolConstant::Create(GV,
22136d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                      UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
22146d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
22156d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
22166d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
22176d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                 CPAddr,
22186d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                 MachinePointerInfo::getConstantPool(),
22196d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard                                 false, false, false, 0);
22206d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    SDValue Chain = Result.getValue(1);
22216d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
22226d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
22236d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard    if (!UseGOTOFF)
22246d93c8c46d9b2b381c889e5f176451996845b055Mark Wielaard      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
2225b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           MachinePointerInfo::getGOT(),
2226b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           false, false, false, 0);
2227b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Result;
22288c4aa0ef998191ed828a37190dc179b91649938aMax Filippov  }
2229b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2230b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // If we have T2 ops, we can materialize the address directly via movt/movw
2231b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // pair. This is always cheaper.
2232b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->useMovt()) {
2233b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ++NumMovwMovt;
2234b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Once remat is capable of dealing with instructions with register
2235b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // operands, expand this into two nodes.
2236b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2237b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2238b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  } else {
2239b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
2240b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2241b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2242b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                       MachinePointerInfo::getConstantPool(),
2243b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                       false, false, false, 0);
2244b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
2245b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper}
2246b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2247b08d5a8fb42f4586d756068065186b5af7e48daUlrich DrepperSDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
2248b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                                    SelectionDAG &DAG) const {
2249b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  EVT PtrVT = getPointerTy();
2250b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  DebugLoc dl = Op.getDebugLoc();
2251b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2252b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2253b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  MachineFunction &MF = DAG.getMachineFunction();
2254b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2255b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2256b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // FIXME: Enable this for static codegen when tool issues are fixed.  Also
2257b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  // update ARMFastISel::ARMMaterializeGV.
2258b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  if (Subtarget->useMovt() && RelocM != Reloc::Static) {
2259b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    ++NumMovwMovt;
2260b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // FIXME: Once remat is capable of dealing with instructions with register
2261b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    // operands, expand this into two nodes.
2262b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (RelocM == Reloc::Static)
2263b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
2264b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2265b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
22668c4aa0ef998191ed828a37190dc179b91649938aMax Filippov    unsigned Wrapper = (RelocM == Reloc::PIC_)
2267b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
2268b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
2269b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
2270b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2271b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper      Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
2272b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           MachinePointerInfo::getGOT(),
2273b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper                           false, false, false, 0);
2274b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper    return Result;
2275b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  }
2276b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper
2277b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  unsigned ARMPCLabelIndex = 0;
2278b08d5a8fb42f4586d756068065186b5af7e48daUlrich Drepper  SDValue CPAddr;
22793cbdd387c752999255aea91600b5cfdefbeac7d0Ulrich Drepper  if (RelocM == Reloc::Static) {
22803cbdd387c752999255aea91600b5cfdefbeac7d0Ulrich Drepper    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
22813cbdd387c752999255aea91600b5cfdefbeac7d0Ulrich Drepper  } else {
2282    ARMPCLabelIndex = AFI->createPICLabelUId();
2283    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
2284    ARMConstantPoolValue *CPV =
2285      ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
2286                                      PCAdj);
2287    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2288  }
2289  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2290
2291  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2292                               MachinePointerInfo::getConstantPool(),
2293                               false, false, false, 0);
2294  SDValue Chain = Result.getValue(1);
2295
2296  if (RelocM == Reloc::PIC_) {
2297    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2298    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2299  }
2300
2301  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
2302    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
2303                         false, false, false, 0);
2304
2305  return Result;
2306}
2307
2308SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
2309                                                    SelectionDAG &DAG) const {
2310  assert(Subtarget->isTargetELF() &&
2311         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
2312  MachineFunction &MF = DAG.getMachineFunction();
2313  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2314  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2315  EVT PtrVT = getPointerTy();
2316  DebugLoc dl = Op.getDebugLoc();
2317  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2318  ARMConstantPoolValue *CPV =
2319    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
2320                                  ARMPCLabelIndex, PCAdj);
2321  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2322  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2323  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2324                               MachinePointerInfo::getConstantPool(),
2325                               false, false, false, 0);
2326  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2327  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2328}
2329
2330SDValue
2331ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
2332  DebugLoc dl = Op.getDebugLoc();
2333  SDValue Val = DAG.getConstant(0, MVT::i32);
2334  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
2335                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
2336                     Op.getOperand(1), Val);
2337}
2338
2339SDValue
2340ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
2341  DebugLoc dl = Op.getDebugLoc();
2342  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
2343                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
2344}
2345
2346SDValue
2347ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
2348                                          const ARMSubtarget *Subtarget) const {
2349  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2350  DebugLoc dl = Op.getDebugLoc();
2351  switch (IntNo) {
2352  default: return SDValue();    // Don't custom lower most intrinsics.
2353  case Intrinsic::arm_thread_pointer: {
2354    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2355    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2356  }
2357  case Intrinsic::eh_sjlj_lsda: {
2358    MachineFunction &MF = DAG.getMachineFunction();
2359    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2360    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2361    EVT PtrVT = getPointerTy();
2362    DebugLoc dl = Op.getDebugLoc();
2363    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
2364    SDValue CPAddr;
2365    unsigned PCAdj = (RelocM != Reloc::PIC_)
2366      ? 0 : (Subtarget->isThumb() ? 4 : 8);
2367    ARMConstantPoolValue *CPV =
2368      ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
2369                                      ARMCP::CPLSDA, PCAdj);
2370    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2371    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2372    SDValue Result =
2373      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
2374                  MachinePointerInfo::getConstantPool(),
2375                  false, false, false, 0);
2376
2377    if (RelocM == Reloc::PIC_) {
2378      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
2379      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
2380    }
2381    return Result;
2382  }
2383  case Intrinsic::arm_neon_vmulls:
2384  case Intrinsic::arm_neon_vmullu: {
2385    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
2386      ? ARMISD::VMULLs : ARMISD::VMULLu;
2387    return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
2388                       Op.getOperand(1), Op.getOperand(2));
2389  }
2390  }
2391}
2392
2393static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2394                               const ARMSubtarget *Subtarget) {
2395  DebugLoc dl = Op.getDebugLoc();
2396  if (!Subtarget->hasDataBarrier()) {
2397    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2398    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2399    // here.
2400    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2401           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2402    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2403                       DAG.getConstant(0, MVT::i32));
2404  }
2405
2406  SDValue Op5 = Op.getOperand(5);
2407  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
2408  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2409  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2410  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
2411
2412  ARM_MB::MemBOpt DMBOpt;
2413  if (isDeviceBarrier)
2414    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
2415  else
2416    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
2417  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2418                     DAG.getConstant(DMBOpt, MVT::i32));
2419}
2420
2421
2422static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
2423                                 const ARMSubtarget *Subtarget) {
2424  // FIXME: handle "fence singlethread" more efficiently.
2425  DebugLoc dl = Op.getDebugLoc();
2426  if (!Subtarget->hasDataBarrier()) {
2427    // Some ARMv6 cpus can support data barriers with an mcr instruction.
2428    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
2429    // here.
2430    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
2431           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2432    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
2433                       DAG.getConstant(0, MVT::i32));
2434  }
2435
2436  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
2437                     DAG.getConstant(ARM_MB::ISH, MVT::i32));
2438}
2439
2440static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
2441                             const ARMSubtarget *Subtarget) {
2442  // ARM pre v5TE and Thumb1 does not have preload instructions.
2443  if (!(Subtarget->isThumb2() ||
2444        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
2445    // Just preserve the chain.
2446    return Op.getOperand(0);
2447
2448  DebugLoc dl = Op.getDebugLoc();
2449  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
2450  if (!isRead &&
2451      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
2452    // ARMv7 with MP extension has PLDW.
2453    return Op.getOperand(0);
2454
2455  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2456  if (Subtarget->isThumb()) {
2457    // Invert the bits.
2458    isRead = ~isRead & 1;
2459    isData = ~isData & 1;
2460  }
2461
2462  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
2463                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
2464                     DAG.getConstant(isData, MVT::i32));
2465}
2466
2467static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2468  MachineFunction &MF = DAG.getMachineFunction();
2469  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2470
2471  // vastart just stores the address of the VarArgsFrameIndex slot into the
2472  // memory location argument.
2473  DebugLoc dl = Op.getDebugLoc();
2474  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2475  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2476  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2477  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2478                      MachinePointerInfo(SV), false, false, 0);
2479}
2480
2481SDValue
2482ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2483                                        SDValue &Root, SelectionDAG &DAG,
2484                                        DebugLoc dl) const {
2485  MachineFunction &MF = DAG.getMachineFunction();
2486  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2487
2488  const TargetRegisterClass *RC;
2489  if (AFI->isThumb1OnlyFunction())
2490    RC = &ARM::tGPRRegClass;
2491  else
2492    RC = &ARM::GPRRegClass;
2493
2494  // Transform the arguments stored in physical registers into virtual ones.
2495  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2496  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2497
2498  SDValue ArgValue2;
2499  if (NextVA.isMemLoc()) {
2500    MachineFrameInfo *MFI = MF.getFrameInfo();
2501    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2502
2503    // Create load node to retrieve arguments from the stack.
2504    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2505    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2506                            MachinePointerInfo::getFixedStack(FI),
2507                            false, false, false, 0);
2508  } else {
2509    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2510    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2511  }
2512
2513  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2514}
2515
2516void
2517ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
2518                                  unsigned &VARegSize, unsigned &VARegSaveSize)
2519  const {
2520  unsigned NumGPRs;
2521  if (CCInfo.isFirstByValRegValid())
2522    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
2523  else {
2524    unsigned int firstUnalloced;
2525    firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
2526                                                sizeof(GPRArgRegs) /
2527                                                sizeof(GPRArgRegs[0]));
2528    NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
2529  }
2530
2531  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
2532  VARegSize = NumGPRs * 4;
2533  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2534}
2535
2536// The remaining GPRs hold either the beginning of variable-argument
2537// data, or the beginning of an aggregate passed by value (usuall
2538// byval).  Either way, we allocate stack slots adjacent to the data
2539// provided by our caller, and store the unallocated registers there.
2540// If this is a variadic function, the va_list pointer will begin with
2541// these values; otherwise, this reassembles a (byval) structure that
2542// was split between registers and memory.
2543void
2544ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
2545                                        DebugLoc dl, SDValue &Chain,
2546                                        const Value *OrigArg,
2547                                        unsigned OffsetFromOrigArg,
2548                                        unsigned ArgOffset) const {
2549  MachineFunction &MF = DAG.getMachineFunction();
2550  MachineFrameInfo *MFI = MF.getFrameInfo();
2551  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2552  unsigned firstRegToSaveIndex;
2553  if (CCInfo.isFirstByValRegValid())
2554    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
2555  else {
2556    firstRegToSaveIndex = CCInfo.getFirstUnallocated
2557      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2558  }
2559
2560  unsigned VARegSize, VARegSaveSize;
2561  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2562  if (VARegSaveSize) {
2563    // If this function is vararg, store any remaining integer argument regs
2564    // to their spots on the stack so that they may be loaded by deferencing
2565    // the result of va_next.
2566    AFI->setVarArgsRegSaveSize(VARegSaveSize);
2567    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
2568                                                     ArgOffset + VARegSaveSize
2569                                                     - VARegSize,
2570                                                     false));
2571    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2572                                    getPointerTy());
2573
2574    SmallVector<SDValue, 4> MemOps;
2575    for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
2576      const TargetRegisterClass *RC;
2577      if (AFI->isThumb1OnlyFunction())
2578        RC = &ARM::tGPRRegClass;
2579      else
2580        RC = &ARM::GPRRegClass;
2581
2582      unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
2583      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2584      SDValue Store =
2585        DAG.getStore(Val.getValue(1), dl, Val, FIN,
2586                     MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
2587                     false, false, 0);
2588      MemOps.push_back(Store);
2589      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2590                        DAG.getConstant(4, getPointerTy()));
2591    }
2592    if (!MemOps.empty())
2593      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2594                          &MemOps[0], MemOps.size());
2595  } else
2596    // This will point to the next argument passed via stack.
2597    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2598}
2599
2600SDValue
2601ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2602                                        CallingConv::ID CallConv, bool isVarArg,
2603                                        const SmallVectorImpl<ISD::InputArg>
2604                                          &Ins,
2605                                        DebugLoc dl, SelectionDAG &DAG,
2606                                        SmallVectorImpl<SDValue> &InVals)
2607                                          const {
2608  MachineFunction &MF = DAG.getMachineFunction();
2609  MachineFrameInfo *MFI = MF.getFrameInfo();
2610
2611  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2612
2613  // Assign locations to all of the incoming arguments.
2614  SmallVector<CCValAssign, 16> ArgLocs;
2615  ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2616                    getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
2617  CCInfo.AnalyzeFormalArguments(Ins,
2618                                CCAssignFnForNode(CallConv, /* Return*/ false,
2619                                                  isVarArg));
2620
2621  SmallVector<SDValue, 16> ArgValues;
2622  int lastInsIndex = -1;
2623  SDValue ArgValue;
2624  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2625  unsigned CurArgIdx = 0;
2626  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2627    CCValAssign &VA = ArgLocs[i];
2628    std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
2629    CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
2630    // Arguments stored in registers.
2631    if (VA.isRegLoc()) {
2632      EVT RegVT = VA.getLocVT();
2633
2634      if (VA.needsCustom()) {
2635        // f64 and vector types are split up into multiple registers or
2636        // combinations of registers and stack slots.
2637        if (VA.getLocVT() == MVT::v2f64) {
2638          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2639                                                   Chain, DAG, dl);
2640          VA = ArgLocs[++i]; // skip ahead to next loc
2641          SDValue ArgValue2;
2642          if (VA.isMemLoc()) {
2643            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2644            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2645            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2646                                    MachinePointerInfo::getFixedStack(FI),
2647                                    false, false, false, 0);
2648          } else {
2649            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2650                                             Chain, DAG, dl);
2651          }
2652          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2653          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2654                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2655          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2656                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2657        } else
2658          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2659
2660      } else {
2661        const TargetRegisterClass *RC;
2662
2663        if (RegVT == MVT::f32)
2664          RC = &ARM::SPRRegClass;
2665        else if (RegVT == MVT::f64)
2666          RC = &ARM::DPRRegClass;
2667        else if (RegVT == MVT::v2f64)
2668          RC = &ARM::QPRRegClass;
2669        else if (RegVT == MVT::i32)
2670          RC = AFI->isThumb1OnlyFunction() ?
2671            (const TargetRegisterClass*)&ARM::tGPRRegClass :
2672            (const TargetRegisterClass*)&ARM::GPRRegClass;
2673        else
2674          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2675
2676        // Transform the arguments in physical registers into virtual ones.
2677        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2678        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2679      }
2680
2681      // If this is an 8 or 16-bit value, it is really passed promoted
2682      // to 32 bits.  Insert an assert[sz]ext to capture this, then
2683      // truncate to the right size.
2684      switch (VA.getLocInfo()) {
2685      default: llvm_unreachable("Unknown loc info!");
2686      case CCValAssign::Full: break;
2687      case CCValAssign::BCvt:
2688        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2689        break;
2690      case CCValAssign::SExt:
2691        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2692                               DAG.getValueType(VA.getValVT()));
2693        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2694        break;
2695      case CCValAssign::ZExt:
2696        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2697                               DAG.getValueType(VA.getValVT()));
2698        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2699        break;
2700      }
2701
2702      InVals.push_back(ArgValue);
2703
2704    } else { // VA.isRegLoc()
2705
2706      // sanity check
2707      assert(VA.isMemLoc());
2708      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2709
2710      int index = ArgLocs[i].getValNo();
2711
2712      // Some Ins[] entries become multiple ArgLoc[] entries.
2713      // Process them only once.
2714      if (index != lastInsIndex)
2715        {
2716          ISD::ArgFlagsTy Flags = Ins[index].Flags;
2717          // FIXME: For now, all byval parameter objects are marked mutable.
2718          // This can be changed with more analysis.
2719          // In case of tail call optimization mark all arguments mutable.
2720          // Since they could be overwritten by lowering of arguments in case of
2721          // a tail call.
2722          if (Flags.isByVal()) {
2723            unsigned VARegSize, VARegSaveSize;
2724            computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
2725            VarArgStyleRegisters(CCInfo, DAG,
2726                                 dl, Chain, CurOrigArg, Ins[VA.getValNo()].PartOffset, 0);
2727            unsigned Bytes = Flags.getByValSize() - VARegSize;
2728            if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2729            int FI = MFI->CreateFixedObject(Bytes,
2730                                            VA.getLocMemOffset(), false);
2731            InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
2732          } else {
2733            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
2734                                            VA.getLocMemOffset(), true);
2735
2736            // Create load nodes to retrieve arguments from the stack.
2737            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2738            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2739                                         MachinePointerInfo::getFixedStack(FI),
2740                                         false, false, false, 0));
2741          }
2742          lastInsIndex = index;
2743        }
2744    }
2745  }
2746
2747  // varargs
2748  if (isVarArg)
2749    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
2750                         CCInfo.getNextStackOffset());
2751
2752  return Chain;
2753}
2754
2755/// isFloatingPointZero - Return true if this is +0.0.
2756static bool isFloatingPointZero(SDValue Op) {
2757  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2758    return CFP->getValueAPF().isPosZero();
2759  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2760    // Maybe this has already been legalized into the constant pool?
2761    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2762      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2763      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2764        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2765          return CFP->getValueAPF().isPosZero();
2766    }
2767  }
2768  return false;
2769}
2770
2771/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2772/// the given operands.
2773SDValue
2774ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2775                             SDValue &ARMcc, SelectionDAG &DAG,
2776                             DebugLoc dl) const {
2777  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2778    unsigned C = RHSC->getZExtValue();
2779    if (!isLegalICmpImmediate(C)) {
2780      // Constant does not fit, try adjusting it by one?
2781      switch (CC) {
2782      default: break;
2783      case ISD::SETLT:
2784      case ISD::SETGE:
2785        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2786          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2787          RHS = DAG.getConstant(C-1, MVT::i32);
2788        }
2789        break;
2790      case ISD::SETULT:
2791      case ISD::SETUGE:
2792        if (C != 0 && isLegalICmpImmediate(C-1)) {
2793          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2794          RHS = DAG.getConstant(C-1, MVT::i32);
2795        }
2796        break;
2797      case ISD::SETLE:
2798      case ISD::SETGT:
2799        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2800          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2801          RHS = DAG.getConstant(C+1, MVT::i32);
2802        }
2803        break;
2804      case ISD::SETULE:
2805      case ISD::SETUGT:
2806        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2807          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2808          RHS = DAG.getConstant(C+1, MVT::i32);
2809        }
2810        break;
2811      }
2812    }
2813  }
2814
2815  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2816  ARMISD::NodeType CompareType;
2817  switch (CondCode) {
2818  default:
2819    CompareType = ARMISD::CMP;
2820    break;
2821  case ARMCC::EQ:
2822  case ARMCC::NE:
2823    // Uses only Z Flag
2824    CompareType = ARMISD::CMPZ;
2825    break;
2826  }
2827  ARMcc = DAG.getConstant(CondCode, MVT::i32);
2828  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
2829}
2830
2831/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2832SDValue
2833ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2834                             DebugLoc dl) const {
2835  SDValue Cmp;
2836  if (!isFloatingPointZero(RHS))
2837    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
2838  else
2839    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
2840  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
2841}
2842
2843/// duplicateCmp - Glue values can have only one use, so this function
2844/// duplicates a comparison node.
2845SDValue
2846ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
2847  unsigned Opc = Cmp.getOpcode();
2848  DebugLoc DL = Cmp.getDebugLoc();
2849  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
2850    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
2851
2852  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
2853  Cmp = Cmp.getOperand(0);
2854  Opc = Cmp.getOpcode();
2855  if (Opc == ARMISD::CMPFP)
2856    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
2857  else {
2858    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
2859    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
2860  }
2861  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
2862}
2863
2864SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2865  SDValue Cond = Op.getOperand(0);
2866  SDValue SelectTrue = Op.getOperand(1);
2867  SDValue SelectFalse = Op.getOperand(2);
2868  DebugLoc dl = Op.getDebugLoc();
2869
2870  // Convert:
2871  //
2872  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2873  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2874  //
2875  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2876    const ConstantSDNode *CMOVTrue =
2877      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2878    const ConstantSDNode *CMOVFalse =
2879      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2880
2881    if (CMOVTrue && CMOVFalse) {
2882      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2883      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2884
2885      SDValue True;
2886      SDValue False;
2887      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2888        True = SelectTrue;
2889        False = SelectFalse;
2890      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2891        True = SelectFalse;
2892        False = SelectTrue;
2893      }
2894
2895      if (True.getNode() && False.getNode()) {
2896        EVT VT = Op.getValueType();
2897        SDValue ARMcc = Cond.getOperand(2);
2898        SDValue CCR = Cond.getOperand(3);
2899        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
2900        assert(True.getValueType() == VT);
2901        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2902      }
2903    }
2904  }
2905
2906  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
2907  // undefined bits before doing a full-word comparison with zero.
2908  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
2909                     DAG.getConstant(1, Cond.getValueType()));
2910
2911  return DAG.getSelectCC(dl, Cond,
2912                         DAG.getConstant(0, Cond.getValueType()),
2913                         SelectTrue, SelectFalse, ISD::SETNE);
2914}
2915
2916SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2917  EVT VT = Op.getValueType();
2918  SDValue LHS = Op.getOperand(0);
2919  SDValue RHS = Op.getOperand(1);
2920  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2921  SDValue TrueVal = Op.getOperand(2);
2922  SDValue FalseVal = Op.getOperand(3);
2923  DebugLoc dl = Op.getDebugLoc();
2924
2925  if (LHS.getValueType() == MVT::i32) {
2926    SDValue ARMcc;
2927    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2928    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2929    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2930  }
2931
2932  ARMCC::CondCodes CondCode, CondCode2;
2933  FPCCToARMCC(CC, CondCode, CondCode2);
2934
2935  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2936  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2937  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2938  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2939                               ARMcc, CCR, Cmp);
2940  if (CondCode2 != ARMCC::AL) {
2941    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2942    // FIXME: Needs another CMP because flag can have but one use.
2943    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2944    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2945                         Result, TrueVal, ARMcc2, CCR, Cmp2);
2946  }
2947  return Result;
2948}
2949
2950/// canChangeToInt - Given the fp compare operand, return true if it is suitable
2951/// to morph to an integer compare sequence.
2952static bool canChangeToInt(SDValue Op, bool &SeenZero,
2953                           const ARMSubtarget *Subtarget) {
2954  SDNode *N = Op.getNode();
2955  if (!N->hasOneUse())
2956    // Otherwise it requires moving the value from fp to integer registers.
2957    return false;
2958  if (!N->getNumValues())
2959    return false;
2960  EVT VT = Op.getValueType();
2961  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2962    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2963    // vmrs are very slow, e.g. cortex-a8.
2964    return false;
2965
2966  if (isFloatingPointZero(Op)) {
2967    SeenZero = true;
2968    return true;
2969  }
2970  return ISD::isNormalLoad(N);
2971}
2972
2973static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2974  if (isFloatingPointZero(Op))
2975    return DAG.getConstant(0, MVT::i32);
2976
2977  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2978    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2979                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
2980                       Ld->isVolatile(), Ld->isNonTemporal(),
2981                       Ld->isInvariant(), Ld->getAlignment());
2982
2983  llvm_unreachable("Unknown VFP cmp argument!");
2984}
2985
2986static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2987                           SDValue &RetVal1, SDValue &RetVal2) {
2988  if (isFloatingPointZero(Op)) {
2989    RetVal1 = DAG.getConstant(0, MVT::i32);
2990    RetVal2 = DAG.getConstant(0, MVT::i32);
2991    return;
2992  }
2993
2994  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2995    SDValue Ptr = Ld->getBasePtr();
2996    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2997                          Ld->getChain(), Ptr,
2998                          Ld->getPointerInfo(),
2999                          Ld->isVolatile(), Ld->isNonTemporal(),
3000                          Ld->isInvariant(), Ld->getAlignment());
3001
3002    EVT PtrType = Ptr.getValueType();
3003    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
3004    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
3005                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
3006    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
3007                          Ld->getChain(), NewPtr,
3008                          Ld->getPointerInfo().getWithOffset(4),
3009                          Ld->isVolatile(), Ld->isNonTemporal(),
3010                          Ld->isInvariant(), NewAlign);
3011    return;
3012  }
3013
3014  llvm_unreachable("Unknown VFP cmp argument!");
3015}
3016
3017/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
3018/// f32 and even f64 comparisons to integer ones.
3019SDValue
3020ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
3021  SDValue Chain = Op.getOperand(0);
3022  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3023  SDValue LHS = Op.getOperand(2);
3024  SDValue RHS = Op.getOperand(3);
3025  SDValue Dest = Op.getOperand(4);
3026  DebugLoc dl = Op.getDebugLoc();
3027
3028  bool LHSSeenZero = false;
3029  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
3030  bool RHSSeenZero = false;
3031  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
3032  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
3033    // If unsafe fp math optimization is enabled and there are no other uses of
3034    // the CMP operands, and the condition code is EQ or NE, we can optimize it
3035    // to an integer comparison.
3036    if (CC == ISD::SETOEQ)
3037      CC = ISD::SETEQ;
3038    else if (CC == ISD::SETUNE)
3039      CC = ISD::SETNE;
3040
3041    SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
3042    SDValue ARMcc;
3043    if (LHS.getValueType() == MVT::f32) {
3044      LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3045                        bitcastf32Toi32(LHS, DAG), Mask);
3046      RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
3047                        bitcastf32Toi32(RHS, DAG), Mask);
3048      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3049      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3050      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3051                         Chain, Dest, ARMcc, CCR, Cmp);
3052    }
3053
3054    SDValue LHS1, LHS2;
3055    SDValue RHS1, RHS2;
3056    expandf64Toi32(LHS, DAG, LHS1, LHS2);
3057    expandf64Toi32(RHS, DAG, RHS1, RHS2);
3058    LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
3059    RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
3060    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3061    ARMcc = DAG.getConstant(CondCode, MVT::i32);
3062    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3063    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
3064    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
3065  }
3066
3067  return SDValue();
3068}
3069
3070SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3071  SDValue Chain = Op.getOperand(0);
3072  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3073  SDValue LHS = Op.getOperand(2);
3074  SDValue RHS = Op.getOperand(3);
3075  SDValue Dest = Op.getOperand(4);
3076  DebugLoc dl = Op.getDebugLoc();
3077
3078  if (LHS.getValueType() == MVT::i32) {
3079    SDValue ARMcc;
3080    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
3081    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3082    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
3083                       Chain, Dest, ARMcc, CCR, Cmp);
3084  }
3085
3086  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3087
3088  if (getTargetMachine().Options.UnsafeFPMath &&
3089      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
3090       CC == ISD::SETNE || CC == ISD::SETUNE)) {
3091    SDValue Result = OptimizeVFPBrcond(Op, DAG);
3092    if (Result.getNode())
3093      return Result;
3094  }
3095
3096  ARMCC::CondCodes CondCode, CondCode2;
3097  FPCCToARMCC(CC, CondCode, CondCode2);
3098
3099  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
3100  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
3101  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3102  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
3103  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
3104  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
3105  if (CondCode2 != ARMCC::AL) {
3106    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
3107    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
3108    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
3109  }
3110  return Res;
3111}
3112
3113SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
3114  SDValue Chain = Op.getOperand(0);
3115  SDValue Table = Op.getOperand(1);
3116  SDValue Index = Op.getOperand(2);
3117  DebugLoc dl = Op.getDebugLoc();
3118
3119  EVT PTy = getPointerTy();
3120  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
3121  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3122  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
3123  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
3124  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
3125  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
3126  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
3127  if (Subtarget->isThumb2()) {
3128    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
3129    // which does another jump to the destination. This also makes it easier
3130    // to translate it to TBB / TBH later.
3131    // FIXME: This might not work if the function is extremely large.
3132    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
3133                       Addr, Op.getOperand(2), JTI, UId);
3134  }
3135  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
3136    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
3137                       MachinePointerInfo::getJumpTable(),
3138                       false, false, false, 0);
3139    Chain = Addr.getValue(1);
3140    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
3141    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3142  } else {
3143    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
3144                       MachinePointerInfo::getJumpTable(),
3145                       false, false, false, 0);
3146    Chain = Addr.getValue(1);
3147    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
3148  }
3149}
3150
3151static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3152  EVT VT = Op.getValueType();
3153  DebugLoc dl = Op.getDebugLoc();
3154
3155  if (Op.getValueType().getVectorElementType() == MVT::i32) {
3156    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
3157      return Op;
3158    return DAG.UnrollVectorOp(Op.getNode());
3159  }
3160
3161  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
3162         "Invalid type for custom lowering!");
3163  if (VT != MVT::v4i16)
3164    return DAG.UnrollVectorOp(Op.getNode());
3165
3166  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
3167  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
3168}
3169
3170static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
3171  EVT VT = Op.getValueType();
3172  if (VT.isVector())
3173    return LowerVectorFP_TO_INT(Op, DAG);
3174
3175  DebugLoc dl = Op.getDebugLoc();
3176  unsigned Opc;
3177
3178  switch (Op.getOpcode()) {
3179  default: llvm_unreachable("Invalid opcode!");
3180  case ISD::FP_TO_SINT:
3181    Opc = ARMISD::FTOSI;
3182    break;
3183  case ISD::FP_TO_UINT:
3184    Opc = ARMISD::FTOUI;
3185    break;
3186  }
3187  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
3188  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3189}
3190
3191static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3192  EVT VT = Op.getValueType();
3193  DebugLoc dl = Op.getDebugLoc();
3194
3195  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
3196    if (VT.getVectorElementType() == MVT::f32)
3197      return Op;
3198    return DAG.UnrollVectorOp(Op.getNode());
3199  }
3200
3201  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
3202         "Invalid type for custom lowering!");
3203  if (VT != MVT::v4f32)
3204    return DAG.UnrollVectorOp(Op.getNode());
3205
3206  unsigned CastOpc;
3207  unsigned Opc;
3208  switch (Op.getOpcode()) {
3209  default: llvm_unreachable("Invalid opcode!");
3210  case ISD::SINT_TO_FP:
3211    CastOpc = ISD::SIGN_EXTEND;
3212    Opc = ISD::SINT_TO_FP;
3213    break;
3214  case ISD::UINT_TO_FP:
3215    CastOpc = ISD::ZERO_EXTEND;
3216    Opc = ISD::UINT_TO_FP;
3217    break;
3218  }
3219
3220  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
3221  return DAG.getNode(Opc, dl, VT, Op);
3222}
3223
3224static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3225  EVT VT = Op.getValueType();
3226  if (VT.isVector())
3227    return LowerVectorINT_TO_FP(Op, DAG);
3228
3229  DebugLoc dl = Op.getDebugLoc();
3230  unsigned Opc;
3231
3232  switch (Op.getOpcode()) {
3233  default: llvm_unreachable("Invalid opcode!");
3234  case ISD::SINT_TO_FP:
3235    Opc = ARMISD::SITOF;
3236    break;
3237  case ISD::UINT_TO_FP:
3238    Opc = ARMISD::UITOF;
3239    break;
3240  }
3241
3242  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
3243  return DAG.getNode(Opc, dl, VT, Op);
3244}
3245
3246SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
3247  // Implement fcopysign with a fabs and a conditional fneg.
3248  SDValue Tmp0 = Op.getOperand(0);
3249  SDValue Tmp1 = Op.getOperand(1);
3250  DebugLoc dl = Op.getDebugLoc();
3251  EVT VT = Op.getValueType();
3252  EVT SrcVT = Tmp1.getValueType();
3253  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
3254    Tmp0.getOpcode() == ARMISD::VMOVDRR;
3255  bool UseNEON = !InGPR && Subtarget->hasNEON();
3256
3257  if (UseNEON) {
3258    // Use VBSL to copy the sign bit.
3259    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
3260    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
3261                               DAG.getTargetConstant(EncodedVal, MVT::i32));
3262    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
3263    if (VT == MVT::f64)
3264      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3265                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
3266                         DAG.getConstant(32, MVT::i32));
3267    else /*if (VT == MVT::f32)*/
3268      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
3269    if (SrcVT == MVT::f32) {
3270      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
3271      if (VT == MVT::f64)
3272        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
3273                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
3274                           DAG.getConstant(32, MVT::i32));
3275    } else if (VT == MVT::f32)
3276      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
3277                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
3278                         DAG.getConstant(32, MVT::i32));
3279    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
3280    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
3281
3282    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
3283                                            MVT::i32);
3284    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
3285    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
3286                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
3287
3288    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
3289                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
3290                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
3291    if (VT == MVT::f32) {
3292      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
3293      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
3294                        DAG.getConstant(0, MVT::i32));
3295    } else {
3296      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
3297    }
3298
3299    return Res;
3300  }
3301
3302  // Bitcast operand 1 to i32.
3303  if (SrcVT == MVT::f64)
3304    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3305                       &Tmp1, 1).getValue(1);
3306  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
3307
3308  // Or in the signbit with integer operations.
3309  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
3310  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
3311  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
3312  if (VT == MVT::f32) {
3313    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
3314                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
3315    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
3316                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
3317  }
3318
3319  // f64: Or the high part with signbit and then combine two parts.
3320  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
3321                     &Tmp0, 1);
3322  SDValue Lo = Tmp0.getValue(0);
3323  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
3324  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
3325  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
3326}
3327
3328SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
3329  MachineFunction &MF = DAG.getMachineFunction();
3330  MachineFrameInfo *MFI = MF.getFrameInfo();
3331  MFI->setReturnAddressIsTaken(true);
3332
3333  EVT VT = Op.getValueType();
3334  DebugLoc dl = Op.getDebugLoc();
3335  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3336  if (Depth) {
3337    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
3338    SDValue Offset = DAG.getConstant(4, MVT::i32);
3339    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
3340                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
3341                       MachinePointerInfo(), false, false, false, 0);
3342  }
3343
3344  // Return LR, which contains the return address. Mark it an implicit live-in.
3345  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3346  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
3347}
3348
3349SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
3350  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
3351  MFI->setFrameAddressIsTaken(true);
3352
3353  EVT VT = Op.getValueType();
3354  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
3355  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3356  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
3357    ? ARM::R7 : ARM::R11;
3358  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
3359  while (Depth--)
3360    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
3361                            MachinePointerInfo(),
3362                            false, false, false, 0);
3363  return FrameAddr;
3364}
3365
3366/// ExpandBITCAST - If the target supports VFP, this function is called to
3367/// expand a bit convert where either the source or destination type is i64 to
3368/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
3369/// operand type is illegal (e.g., v2f32 for a target that doesn't support
3370/// vectors), since the legalizer won't know what to do with that.
3371static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
3372  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3373  DebugLoc dl = N->getDebugLoc();
3374  SDValue Op = N->getOperand(0);
3375
3376  // This function is only supposed to be called for i64 types, either as the
3377  // source or destination of the bit convert.
3378  EVT SrcVT = Op.getValueType();
3379  EVT DstVT = N->getValueType(0);
3380  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
3381         "ExpandBITCAST called for non-i64 type");
3382
3383  // Turn i64->f64 into VMOVDRR.
3384  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
3385    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3386                             DAG.getConstant(0, MVT::i32));
3387    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
3388                             DAG.getConstant(1, MVT::i32));
3389    return DAG.getNode(ISD::BITCAST, dl, DstVT,
3390                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
3391  }
3392
3393  // Turn f64->i64 into VMOVRRD.
3394  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
3395    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3396                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
3397    // Merge the pieces into a single i64 value.
3398    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
3399  }
3400
3401  return SDValue();
3402}
3403
3404/// getZeroVector - Returns a vector of specified type with all zero elements.
3405/// Zero vectors are used to represent vector negation and in those cases
3406/// will be implemented with the NEON VNEG instruction.  However, VNEG does
3407/// not support i64 elements, so sometimes the zero vectors will need to be
3408/// explicitly constructed.  Regardless, use a canonical VMOV to create the
3409/// zero vector.
3410static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
3411  assert(VT.isVector() && "Expected a vector type");
3412  // The canonical modified immediate encoding of a zero vector is....0!
3413  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
3414  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
3415  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
3416  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
3417}
3418
3419/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
3420/// i32 values and take a 2 x i32 value to shift plus a shift amount.
3421SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
3422                                                SelectionDAG &DAG) const {
3423  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
3424  EVT VT = Op.getValueType();
3425  unsigned VTBits = VT.getSizeInBits();
3426  DebugLoc dl = Op.getDebugLoc();
3427  SDValue ShOpLo = Op.getOperand(0);
3428  SDValue ShOpHi = Op.getOperand(1);
3429  SDValue ShAmt  = Op.getOperand(2);
3430  SDValue ARMcc;
3431  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
3432
3433  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
3434
3435  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
3436                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
3437  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
3438  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
3439                                   DAG.getConstant(VTBits, MVT::i32));
3440  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
3441  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
3442  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
3443
3444  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3445  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
3446                          ARMcc, DAG, dl);
3447  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
3448  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
3449                           CCR, Cmp);
3450
3451  SDValue Ops[2] = { Lo, Hi };
3452  return DAG.getMergeValues(Ops, 2, dl);
3453}
3454
3455/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
3456/// i32 values and take a 2 x i32 value to shift plus a shift amount.
3457SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
3458                                               SelectionDAG &DAG) const {
3459  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
3460  EVT VT = Op.getValueType();
3461  unsigned VTBits = VT.getSizeInBits();
3462  DebugLoc dl = Op.getDebugLoc();
3463  SDValue ShOpLo = Op.getOperand(0);
3464  SDValue ShOpHi = Op.getOperand(1);
3465  SDValue ShAmt  = Op.getOperand(2);
3466  SDValue ARMcc;
3467
3468  assert(Op.getOpcode() == ISD::SHL_PARTS);
3469  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
3470                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
3471  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
3472  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
3473                                   DAG.getConstant(VTBits, MVT::i32));
3474  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
3475  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
3476
3477  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
3478  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3479  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
3480                          ARMcc, DAG, dl);
3481  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
3482  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
3483                           CCR, Cmp);
3484
3485  SDValue Ops[2] = { Lo, Hi };
3486  return DAG.getMergeValues(Ops, 2, dl);
3487}
3488
3489SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3490                                            SelectionDAG &DAG) const {
3491  // The rounding mode is in bits 23:22 of the FPSCR.
3492  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3493  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3494  // so that the shift + and get folded into a bitfield extract.
3495  DebugLoc dl = Op.getDebugLoc();
3496  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
3497                              DAG.getConstant(Intrinsic::arm_get_fpscr,
3498                                              MVT::i32));
3499  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
3500                                  DAG.getConstant(1U << 22, MVT::i32));
3501  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3502                              DAG.getConstant(22, MVT::i32));
3503  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3504                     DAG.getConstant(3, MVT::i32));
3505}
3506
3507static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
3508                         const ARMSubtarget *ST) {
3509  EVT VT = N->getValueType(0);
3510  DebugLoc dl = N->getDebugLoc();
3511
3512  if (!ST->hasV6T2Ops())
3513    return SDValue();
3514
3515  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
3516  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
3517}
3518
3519static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
3520                          const ARMSubtarget *ST) {
3521  EVT VT = N->getValueType(0);
3522  DebugLoc dl = N->getDebugLoc();
3523
3524  if (!VT.isVector())
3525    return SDValue();
3526
3527  // Lower vector shifts on NEON to use VSHL.
3528  assert(ST->hasNEON() && "unexpected vector shift");
3529
3530  // Left shifts translate directly to the vshiftu intrinsic.
3531  if (N->getOpcode() == ISD::SHL)
3532    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
3533                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
3534                       N->getOperand(0), N->getOperand(1));
3535
3536  assert((N->getOpcode() == ISD::SRA ||
3537          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
3538
3539  // NEON uses the same intrinsics for both left and right shifts.  For
3540  // right shifts, the shift amounts are negative, so negate the vector of
3541  // shift amounts.
3542  EVT ShiftVT = N->getOperand(1).getValueType();
3543  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
3544                                     getZeroVector(ShiftVT, DAG, dl),
3545                                     N->getOperand(1));
3546  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
3547                             Intrinsic::arm_neon_vshifts :
3548                             Intrinsic::arm_neon_vshiftu);
3549  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
3550                     DAG.getConstant(vshiftInt, MVT::i32),
3551                     N->getOperand(0), NegatedCount);
3552}
3553
3554static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
3555                                const ARMSubtarget *ST) {
3556  EVT VT = N->getValueType(0);
3557  DebugLoc dl = N->getDebugLoc();
3558
3559  // We can get here for a node like i32 = ISD::SHL i32, i64
3560  if (VT != MVT::i64)
3561    return SDValue();
3562
3563  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
3564         "Unknown shift to lower!");
3565
3566  // We only lower SRA, SRL of 1 here, all others use generic lowering.
3567  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
3568      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
3569    return SDValue();
3570
3571  // If we are in thumb mode, we don't have RRX.
3572  if (ST->isThumb1Only()) return SDValue();
3573
3574  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
3575  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
3576                           DAG.getConstant(0, MVT::i32));
3577  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
3578                           DAG.getConstant(1, MVT::i32));
3579
3580  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
3581  // captures the result into a carry flag.
3582  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
3583  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
3584
3585  // The low part is an ARMISD::RRX operand, which shifts the carry in.
3586  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
3587
3588  // Merge the pieces into a single i64 value.
3589 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
3590}
3591
3592static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
3593  SDValue TmpOp0, TmpOp1;
3594  bool Invert = false;
3595  bool Swap = false;
3596  unsigned Opc = 0;
3597
3598  SDValue Op0 = Op.getOperand(0);
3599  SDValue Op1 = Op.getOperand(1);
3600  SDValue CC = Op.getOperand(2);
3601  EVT VT = Op.getValueType();
3602  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3603  DebugLoc dl = Op.getDebugLoc();
3604
3605  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
3606    switch (SetCCOpcode) {
3607    default: llvm_unreachable("Illegal FP comparison");
3608    case ISD::SETUNE:
3609    case ISD::SETNE:  Invert = true; // Fallthrough
3610    case ISD::SETOEQ:
3611    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3612    case ISD::SETOLT:
3613    case ISD::SETLT: Swap = true; // Fallthrough
3614    case ISD::SETOGT:
3615    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3616    case ISD::SETOLE:
3617    case ISD::SETLE:  Swap = true; // Fallthrough
3618    case ISD::SETOGE:
3619    case ISD::SETGE: Opc = ARMISD::VCGE; break;
3620    case ISD::SETUGE: Swap = true; // Fallthrough
3621    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
3622    case ISD::SETUGT: Swap = true; // Fallthrough
3623    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
3624    case ISD::SETUEQ: Invert = true; // Fallthrough
3625    case ISD::SETONE:
3626      // Expand this to (OLT | OGT).
3627      TmpOp0 = Op0;
3628      TmpOp1 = Op1;
3629      Opc = ISD::OR;
3630      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3631      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
3632      break;
3633    case ISD::SETUO: Invert = true; // Fallthrough
3634    case ISD::SETO:
3635      // Expand this to (OLT | OGE).
3636      TmpOp0 = Op0;
3637      TmpOp1 = Op1;
3638      Opc = ISD::OR;
3639      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
3640      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
3641      break;
3642    }
3643  } else {
3644    // Integer comparisons.
3645    switch (SetCCOpcode) {
3646    default: llvm_unreachable("Illegal integer comparison");
3647    case ISD::SETNE:  Invert = true;
3648    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
3649    case ISD::SETLT:  Swap = true;
3650    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
3651    case ISD::SETLE:  Swap = true;
3652    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
3653    case ISD::SETULT: Swap = true;
3654    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
3655    case ISD::SETULE: Swap = true;
3656    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
3657    }
3658
3659    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
3660    if (Opc == ARMISD::VCEQ) {
3661
3662      SDValue AndOp;
3663      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3664        AndOp = Op0;
3665      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
3666        AndOp = Op1;
3667
3668      // Ignore bitconvert.
3669      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
3670        AndOp = AndOp.getOperand(0);
3671
3672      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
3673        Opc = ARMISD::VTST;
3674        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
3675        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
3676        Invert = !Invert;
3677      }
3678    }
3679  }
3680
3681  if (Swap)
3682    std::swap(Op0, Op1);
3683
3684  // If one of the operands is a constant vector zero, attempt to fold the
3685  // comparison to a specialized compare-against-zero form.
3686  SDValue SingleOp;
3687  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
3688    SingleOp = Op0;
3689  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
3690    if (Opc == ARMISD::VCGE)
3691      Opc = ARMISD::VCLEZ;
3692    else if (Opc == ARMISD::VCGT)
3693      Opc = ARMISD::VCLTZ;
3694    SingleOp = Op1;
3695  }
3696
3697  SDValue Result;
3698  if (SingleOp.getNode()) {
3699    switch (Opc) {
3700    case ARMISD::VCEQ:
3701      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
3702    case ARMISD::VCGE:
3703      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
3704    case ARMISD::VCLEZ:
3705      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
3706    case ARMISD::VCGT:
3707      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
3708    case ARMISD::VCLTZ:
3709      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
3710    default:
3711      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3712    }
3713  } else {
3714     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3715  }
3716
3717  if (Invert)
3718    Result = DAG.getNOT(dl, Result, VT);
3719
3720  return Result;
3721}
3722
3723/// isNEONModifiedImm - Check if the specified splat value corresponds to a
3724/// valid vector constant for a NEON instruction with a "modified immediate"
3725/// operand (e.g., VMOV).  If so, return the encoded value.
3726static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3727                                 unsigned SplatBitSize, SelectionDAG &DAG,
3728                                 EVT &VT, bool is128Bits, NEONModImmType type) {
3729  unsigned OpCmode, Imm;
3730
3731  // SplatBitSize is set to the smallest size that splats the vector, so a
3732  // zero vector will always have SplatBitSize == 8.  However, NEON modified
3733  // immediate instructions others than VMOV do not support the 8-bit encoding
3734  // of a zero vector, and the default encoding of zero is supposed to be the
3735  // 32-bit version.
3736  if (SplatBits == 0)
3737    SplatBitSize = 32;
3738
3739  switch (SplatBitSize) {
3740  case 8:
3741    if (type != VMOVModImm)
3742      return SDValue();
3743    // Any 1-byte value is OK.  Op=0, Cmode=1110.
3744    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3745    OpCmode = 0xe;
3746    Imm = SplatBits;
3747    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3748    break;
3749
3750  case 16:
3751    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3752    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3753    if ((SplatBits & ~0xff) == 0) {
3754      // Value = 0x00nn: Op=x, Cmode=100x.
3755      OpCmode = 0x8;
3756      Imm = SplatBits;
3757      break;
3758    }
3759    if ((SplatBits & ~0xff00) == 0) {
3760      // Value = 0xnn00: Op=x, Cmode=101x.
3761      OpCmode = 0xa;
3762      Imm = SplatBits >> 8;
3763      break;
3764    }
3765    return SDValue();
3766
3767  case 32:
3768    // NEON's 32-bit VMOV supports splat values where:
3769    // * only one byte is nonzero, or
3770    // * the least significant byte is 0xff and the second byte is nonzero, or
3771    // * the least significant 2 bytes are 0xff and the third is nonzero.
3772    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3773    if ((SplatBits & ~0xff) == 0) {
3774      // Value = 0x000000nn: Op=x, Cmode=000x.
3775      OpCmode = 0;
3776      Imm = SplatBits;
3777      break;
3778    }
3779    if ((SplatBits & ~0xff00) == 0) {
3780      // Value = 0x0000nn00: Op=x, Cmode=001x.
3781      OpCmode = 0x2;
3782      Imm = SplatBits >> 8;
3783      break;
3784    }
3785    if ((SplatBits & ~0xff0000) == 0) {
3786      // Value = 0x00nn0000: Op=x, Cmode=010x.
3787      OpCmode = 0x4;
3788      Imm = SplatBits >> 16;
3789      break;
3790    }
3791    if ((SplatBits & ~0xff000000) == 0) {
3792      // Value = 0xnn000000: Op=x, Cmode=011x.
3793      OpCmode = 0x6;
3794      Imm = SplatBits >> 24;
3795      break;
3796    }
3797
3798    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
3799    if (type == OtherModImm) return SDValue();
3800
3801    if ((SplatBits & ~0xffff) == 0 &&
3802        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3803      // Value = 0x0000nnff: Op=x, Cmode=1100.
3804      OpCmode = 0xc;
3805      Imm = SplatBits >> 8;
3806      SplatBits |= 0xff;
3807      break;
3808    }
3809
3810    if ((SplatBits & ~0xffffff) == 0 &&
3811        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3812      // Value = 0x00nnffff: Op=x, Cmode=1101.
3813      OpCmode = 0xd;
3814      Imm = SplatBits >> 16;
3815      SplatBits |= 0xffff;
3816      break;
3817    }
3818
3819    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3820    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3821    // VMOV.I32.  A (very) minor optimization would be to replicate the value
3822    // and fall through here to test for a valid 64-bit splat.  But, then the
3823    // caller would also need to check and handle the change in size.
3824    return SDValue();
3825
3826  case 64: {
3827    if (type != VMOVModImm)
3828      return SDValue();
3829    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3830    uint64_t BitMask = 0xff;
3831    uint64_t Val = 0;
3832    unsigned ImmMask = 1;
3833    Imm = 0;
3834    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3835      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3836        Val |= BitMask;
3837        Imm |= ImmMask;
3838      } else if ((SplatBits & BitMask) != 0) {
3839        return SDValue();
3840      }
3841      BitMask <<= 8;
3842      ImmMask <<= 1;
3843    }
3844    // Op=1, Cmode=1110.
3845    OpCmode = 0x1e;
3846    SplatBits = Val;
3847    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3848    break;
3849  }
3850
3851  default:
3852    llvm_unreachable("unexpected size for isNEONModifiedImm");
3853  }
3854
3855  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3856  return DAG.getTargetConstant(EncodedVal, MVT::i32);
3857}
3858
3859SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
3860                                           const ARMSubtarget *ST) const {
3861  if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
3862    return SDValue();
3863
3864  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
3865  assert(Op.getValueType() == MVT::f32 &&
3866         "ConstantFP custom lowering should only occur for f32.");
3867
3868  // Try splatting with a VMOV.f32...
3869  APFloat FPVal = CFP->getValueAPF();
3870  int ImmVal = ARM_AM::getFP32Imm(FPVal);
3871  if (ImmVal != -1) {
3872    DebugLoc DL = Op.getDebugLoc();
3873    SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
3874    SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
3875                                      NewVal);
3876    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
3877                       DAG.getConstant(0, MVT::i32));
3878  }
3879
3880  // If that fails, try a VMOV.i32
3881  EVT VMovVT;
3882  unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
3883  SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
3884                                     VMOVModImm);
3885  if (NewVal != SDValue()) {
3886    DebugLoc DL = Op.getDebugLoc();
3887    SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
3888                                      NewVal);
3889    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
3890                                       VecConstant);
3891    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
3892                       DAG.getConstant(0, MVT::i32));
3893  }
3894
3895  // Finally, try a VMVN.i32
3896  NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
3897                             VMVNModImm);
3898  if (NewVal != SDValue()) {
3899    DebugLoc DL = Op.getDebugLoc();
3900    SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
3901    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
3902                                       VecConstant);
3903    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
3904                       DAG.getConstant(0, MVT::i32));
3905  }
3906
3907  return SDValue();
3908}
3909
3910
3911static bool isVEXTMask(ArrayRef<int> M, EVT VT,
3912                       bool &ReverseVEXT, unsigned &Imm) {
3913  unsigned NumElts = VT.getVectorNumElements();
3914  ReverseVEXT = false;
3915
3916  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3917  if (M[0] < 0)
3918    return false;
3919
3920  Imm = M[0];
3921
3922  // If this is a VEXT shuffle, the immediate value is the index of the first
3923  // element.  The other shuffle indices must be the successive elements after
3924  // the first one.
3925  unsigned ExpectedElt = Imm;
3926  for (unsigned i = 1; i < NumElts; ++i) {
3927    // Increment the expected index.  If it wraps around, it may still be
3928    // a VEXT but the source vectors must be swapped.
3929    ExpectedElt += 1;
3930    if (ExpectedElt == NumElts * 2) {
3931      ExpectedElt = 0;
3932      ReverseVEXT = true;
3933    }
3934
3935    if (M[i] < 0) continue; // ignore UNDEF indices
3936    if (ExpectedElt != static_cast<unsigned>(M[i]))
3937      return false;
3938  }
3939
3940  // Adjust the index value if the source operands will be swapped.
3941  if (ReverseVEXT)
3942    Imm -= NumElts;
3943
3944  return true;
3945}
3946
3947/// isVREVMask - Check if a vector shuffle corresponds to a VREV
3948/// instruction with the specified blocksize.  (The order of the elements
3949/// within each block of the vector is reversed.)
3950static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
3951  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3952         "Only possible block sizes for VREV are: 16, 32, 64");
3953
3954  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3955  if (EltSz == 64)
3956    return false;
3957
3958  unsigned NumElts = VT.getVectorNumElements();
3959  unsigned BlockElts = M[0] + 1;
3960  // If the first shuffle index is UNDEF, be optimistic.
3961  if (M[0] < 0)
3962    BlockElts = BlockSize / EltSz;
3963
3964  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3965    return false;
3966
3967  for (unsigned i = 0; i < NumElts; ++i) {
3968    if (M[i] < 0) continue; // ignore UNDEF indices
3969    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3970      return false;
3971  }
3972
3973  return true;
3974}
3975
3976static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
3977  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
3978  // range, then 0 is placed into the resulting vector. So pretty much any mask
3979  // of 8 elements can work here.
3980  return VT == MVT::v8i8 && M.size() == 8;
3981}
3982
3983static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
3984  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3985  if (EltSz == 64)
3986    return false;
3987
3988  unsigned NumElts = VT.getVectorNumElements();
3989  WhichResult = (M[0] == 0 ? 0 : 1);
3990  for (unsigned i = 0; i < NumElts; i += 2) {
3991    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3992        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3993      return false;
3994  }
3995  return true;
3996}
3997
3998/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3999/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4000/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
4001static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4002  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4003  if (EltSz == 64)
4004    return false;
4005
4006  unsigned NumElts = VT.getVectorNumElements();
4007  WhichResult = (M[0] == 0 ? 0 : 1);
4008  for (unsigned i = 0; i < NumElts; i += 2) {
4009    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
4010        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
4011      return false;
4012  }
4013  return true;
4014}
4015
4016static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4017  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4018  if (EltSz == 64)
4019    return false;
4020
4021  unsigned NumElts = VT.getVectorNumElements();
4022  WhichResult = (M[0] == 0 ? 0 : 1);
4023  for (unsigned i = 0; i != NumElts; ++i) {
4024    if (M[i] < 0) continue; // ignore UNDEF indices
4025    if ((unsigned) M[i] != 2 * i + WhichResult)
4026      return false;
4027  }
4028
4029  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4030  if (VT.is64BitVector() && EltSz == 32)
4031    return false;
4032
4033  return true;
4034}
4035
4036/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
4037/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4038/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
4039static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4040  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4041  if (EltSz == 64)
4042    return false;
4043
4044  unsigned Half = VT.getVectorNumElements() / 2;
4045  WhichResult = (M[0] == 0 ? 0 : 1);
4046  for (unsigned j = 0; j != 2; ++j) {
4047    unsigned Idx = WhichResult;
4048    for (unsigned i = 0; i != Half; ++i) {
4049      int MIdx = M[i + j * Half];
4050      if (MIdx >= 0 && (unsigned) MIdx != Idx)
4051        return false;
4052      Idx += 2;
4053    }
4054  }
4055
4056  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4057  if (VT.is64BitVector() && EltSz == 32)
4058    return false;
4059
4060  return true;
4061}
4062
4063static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4064  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4065  if (EltSz == 64)
4066    return false;
4067
4068  unsigned NumElts = VT.getVectorNumElements();
4069  WhichResult = (M[0] == 0 ? 0 : 1);
4070  unsigned Idx = WhichResult * NumElts / 2;
4071  for (unsigned i = 0; i != NumElts; i += 2) {
4072    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
4073        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
4074      return false;
4075    Idx += 1;
4076  }
4077
4078  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4079  if (VT.is64BitVector() && EltSz == 32)
4080    return false;
4081
4082  return true;
4083}
4084
4085/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
4086/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4087/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
4088static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
4089  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4090  if (EltSz == 64)
4091    return false;
4092
4093  unsigned NumElts = VT.getVectorNumElements();
4094  WhichResult = (M[0] == 0 ? 0 : 1);
4095  unsigned Idx = WhichResult * NumElts / 2;
4096  for (unsigned i = 0; i != NumElts; i += 2) {
4097    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
4098        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
4099      return false;
4100    Idx += 1;
4101  }
4102
4103  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
4104  if (VT.is64BitVector() && EltSz == 32)
4105    return false;
4106
4107  return true;
4108}
4109
4110// If N is an integer constant that can be moved into a register in one
4111// instruction, return an SDValue of such a constant (will become a MOV
4112// instruction).  Otherwise return null.
4113static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
4114                                     const ARMSubtarget *ST, DebugLoc dl) {
4115  uint64_t Val;
4116  if (!isa<ConstantSDNode>(N))
4117    return SDValue();
4118  Val = cast<ConstantSDNode>(N)->getZExtValue();
4119
4120  if (ST->isThumb1Only()) {
4121    if (Val <= 255 || ~Val <= 255)
4122      return DAG.getConstant(Val, MVT::i32);
4123  } else {
4124    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
4125      return DAG.getConstant(Val, MVT::i32);
4126  }
4127  return SDValue();
4128}
4129
4130// If this is a case we can't handle, return null and let the default
4131// expansion code take care of it.
4132SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
4133                                             const ARMSubtarget *ST) const {
4134  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
4135  DebugLoc dl = Op.getDebugLoc();
4136  EVT VT = Op.getValueType();
4137
4138  APInt SplatBits, SplatUndef;
4139  unsigned SplatBitSize;
4140  bool HasAnyUndefs;
4141  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
4142    if (SplatBitSize <= 64) {
4143      // Check if an immediate VMOV works.
4144      EVT VmovVT;
4145      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
4146                                      SplatUndef.getZExtValue(), SplatBitSize,
4147                                      DAG, VmovVT, VT.is128BitVector(),
4148                                      VMOVModImm);
4149      if (Val.getNode()) {
4150        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
4151        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4152      }
4153
4154      // Try an immediate VMVN.
4155      uint64_t NegatedImm = (~SplatBits).getZExtValue();
4156      Val = isNEONModifiedImm(NegatedImm,
4157                                      SplatUndef.getZExtValue(), SplatBitSize,
4158                                      DAG, VmovVT, VT.is128BitVector(),
4159                                      VMVNModImm);
4160      if (Val.getNode()) {
4161        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
4162        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4163      }
4164
4165      // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
4166      if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
4167        int ImmVal = ARM_AM::getFP32Imm(SplatBits);
4168        if (ImmVal != -1) {
4169          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
4170          return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
4171        }
4172      }
4173    }
4174  }
4175
4176  // Scan through the operands to see if only one value is used.
4177  //
4178  // As an optimisation, even if more than one value is used it may be more
4179  // profitable to splat with one value then change some lanes.
4180  //
4181  // Heuristically we decide to do this if the vector has a "dominant" value,
4182  // defined as splatted to more than half of the lanes.
4183  unsigned NumElts = VT.getVectorNumElements();
4184  bool isOnlyLowElement = true;
4185  bool usesOnlyOneValue = true;
4186  bool hasDominantValue = false;
4187  bool isConstant = true;
4188
4189  // Map of the number of times a particular SDValue appears in the
4190  // element list.
4191  DenseMap<SDValue, unsigned> ValueCounts;
4192  SDValue Value;
4193  for (unsigned i = 0; i < NumElts; ++i) {
4194    SDValue V = Op.getOperand(i);
4195    if (V.getOpcode() == ISD::UNDEF)
4196      continue;
4197    if (i > 0)
4198      isOnlyLowElement = false;
4199    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
4200      isConstant = false;
4201
4202    ValueCounts.insert(std::make_pair(V, 0));
4203    unsigned &Count = ValueCounts[V];
4204
4205    // Is this value dominant? (takes up more than half of the lanes)
4206    if (++Count > (NumElts / 2)) {
4207      hasDominantValue = true;
4208      Value = V;
4209    }
4210  }
4211  if (ValueCounts.size() != 1)
4212    usesOnlyOneValue = false;
4213  if (!Value.getNode() && ValueCounts.size() > 0)
4214    Value = ValueCounts.begin()->first;
4215
4216  if (ValueCounts.size() == 0)
4217    return DAG.getUNDEF(VT);
4218
4219  if (isOnlyLowElement)
4220    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
4221
4222  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4223
4224  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
4225  // i32 and try again.
4226  if (hasDominantValue && EltSize <= 32) {
4227    if (!isConstant) {
4228      SDValue N;
4229
4230      // If we are VDUPing a value that comes directly from a vector, that will
4231      // cause an unnecessary move to and from a GPR, where instead we could
4232      // just use VDUPLANE.
4233      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4234        // We need to create a new undef vector to use for the VDUPLANE if the
4235        // size of the vector from which we get the value is different than the
4236        // size of the vector that we need to create. We will insert the element
4237        // such that the register coalescer will remove unnecessary copies.
4238        if (VT != Value->getOperand(0).getValueType()) {
4239          ConstantSDNode *constIndex;
4240          constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
4241          assert(constIndex && "The index is not a constant!");
4242          unsigned index = constIndex->getAPIntValue().getLimitedValue() %
4243                             VT.getVectorNumElements();
4244          N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
4245                 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
4246                        Value, DAG.getConstant(index, MVT::i32)),
4247                           DAG.getConstant(index, MVT::i32));
4248        } else {
4249          N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
4250                        Value->getOperand(0), Value->getOperand(1));
4251        }
4252      }
4253      else
4254        N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
4255
4256      if (!usesOnlyOneValue) {
4257        // The dominant value was splatted as 'N', but we now have to insert
4258        // all differing elements.
4259        for (unsigned I = 0; I < NumElts; ++I) {
4260          if (Op.getOperand(I) == Value)
4261            continue;
4262          SmallVector<SDValue, 3> Ops;
4263          Ops.push_back(N);
4264          Ops.push_back(Op.getOperand(I));
4265          Ops.push_back(DAG.getConstant(I, MVT::i32));
4266          N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
4267        }
4268      }
4269      return N;
4270    }
4271    if (VT.getVectorElementType().isFloatingPoint()) {
4272      SmallVector<SDValue, 8> Ops;
4273      for (unsigned i = 0; i < NumElts; ++i)
4274        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
4275                                  Op.getOperand(i)));
4276      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
4277      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
4278      Val = LowerBUILD_VECTOR(Val, DAG, ST);
4279      if (Val.getNode())
4280        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4281    }
4282    if (usesOnlyOneValue) {
4283      SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
4284      if (isConstant && Val.getNode())
4285        return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
4286    }
4287  }
4288
4289  // If all elements are constants and the case above didn't get hit, fall back
4290  // to the default expansion, which will generate a load from the constant
4291  // pool.
4292  if (isConstant)
4293    return SDValue();
4294
4295  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
4296  if (NumElts >= 4) {
4297    SDValue shuffle = ReconstructShuffle(Op, DAG);
4298    if (shuffle != SDValue())
4299      return shuffle;
4300  }
4301
4302  // Vectors with 32- or 64-bit elements can be built by directly assigning
4303  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
4304  // will be legalized.
4305  if (EltSize >= 32) {
4306    // Do the expansion with floating-point types, since that is what the VFP
4307    // registers are defined to use, and since i64 is not legal.
4308    EVT EltVT = EVT::getFloatingPointVT(EltSize);
4309    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
4310    SmallVector<SDValue, 8> Ops;
4311    for (unsigned i = 0; i < NumElts; ++i)
4312      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
4313    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
4314    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4315  }
4316
4317  return SDValue();
4318}
4319
4320// Gather data to see if the operation can be modelled as a
4321// shuffle in combination with VEXTs.
4322SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
4323                                              SelectionDAG &DAG) const {
4324  DebugLoc dl = Op.getDebugLoc();
4325  EVT VT = Op.getValueType();
4326  unsigned NumElts = VT.getVectorNumElements();
4327
4328  SmallVector<SDValue, 2> SourceVecs;
4329  SmallVector<unsigned, 2> MinElts;
4330  SmallVector<unsigned, 2> MaxElts;
4331
4332  for (unsigned i = 0; i < NumElts; ++i) {
4333    SDValue V = Op.getOperand(i);
4334    if (V.getOpcode() == ISD::UNDEF)
4335      continue;
4336    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
4337      // A shuffle can only come from building a vector from various
4338      // elements of other vectors.
4339      return SDValue();
4340    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
4341               VT.getVectorElementType()) {
4342      // This code doesn't know how to handle shuffles where the vector
4343      // element types do not match (this happens because type legalization
4344      // promotes the return type of EXTRACT_VECTOR_ELT).
4345      // FIXME: It might be appropriate to extend this code to handle
4346      // mismatched types.
4347      return SDValue();
4348    }
4349
4350    // Record this extraction against the appropriate vector if possible...
4351    SDValue SourceVec = V.getOperand(0);
4352    // If the element number isn't a constant, we can't effectively
4353    // analyze what's going on.
4354    if (!isa<ConstantSDNode>(V.getOperand(1)))
4355      return SDValue();
4356    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
4357    bool FoundSource = false;
4358    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
4359      if (SourceVecs[j] == SourceVec) {
4360        if (MinElts[j] > EltNo)
4361          MinElts[j] = EltNo;
4362        if (MaxElts[j] < EltNo)
4363          MaxElts[j] = EltNo;
4364        FoundSource = true;
4365        break;
4366      }
4367    }
4368
4369    // Or record a new source if not...
4370    if (!FoundSource) {
4371      SourceVecs.push_back(SourceVec);
4372      MinElts.push_back(EltNo);
4373      MaxElts.push_back(EltNo);
4374    }
4375  }
4376
4377  // Currently only do something sane when at most two source vectors
4378  // involved.
4379  if (SourceVecs.size() > 2)
4380    return SDValue();
4381
4382  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
4383  int VEXTOffsets[2] = {0, 0};
4384
4385  // This loop extracts the usage patterns of the source vectors
4386  // and prepares appropriate SDValues for a shuffle if possible.
4387  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
4388    if (SourceVecs[i].getValueType() == VT) {
4389      // No VEXT necessary
4390      ShuffleSrcs[i] = SourceVecs[i];
4391      VEXTOffsets[i] = 0;
4392      continue;
4393    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
4394      // It probably isn't worth padding out a smaller vector just to
4395      // break it down again in a shuffle.
4396      return SDValue();
4397    }
4398
4399    // Since only 64-bit and 128-bit vectors are legal on ARM and
4400    // we've eliminated the other cases...
4401    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
4402           "unexpected vector sizes in ReconstructShuffle");
4403
4404    if (MaxElts[i] - MinElts[i] >= NumElts) {
4405      // Span too large for a VEXT to cope
4406      return SDValue();
4407    }
4408
4409    if (MinElts[i] >= NumElts) {
4410      // The extraction can just take the second half
4411      VEXTOffsets[i] = NumElts;
4412      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4413                                   SourceVecs[i],
4414                                   DAG.getIntPtrConstant(NumElts));
4415    } else if (MaxElts[i] < NumElts) {
4416      // The extraction can just take the first half
4417      VEXTOffsets[i] = 0;
4418      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4419                                   SourceVecs[i],
4420                                   DAG.getIntPtrConstant(0));
4421    } else {
4422      // An actual VEXT is needed
4423      VEXTOffsets[i] = MinElts[i];
4424      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4425                                     SourceVecs[i],
4426                                     DAG.getIntPtrConstant(0));
4427      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
4428                                     SourceVecs[i],
4429                                     DAG.getIntPtrConstant(NumElts));
4430      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
4431                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
4432    }
4433  }
4434
4435  SmallVector<int, 8> Mask;
4436
4437  for (unsigned i = 0; i < NumElts; ++i) {
4438    SDValue Entry = Op.getOperand(i);
4439    if (Entry.getOpcode() == ISD::UNDEF) {
4440      Mask.push_back(-1);
4441      continue;
4442    }
4443
4444    SDValue ExtractVec = Entry.getOperand(0);
4445    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
4446                                          .getOperand(1))->getSExtValue();
4447    if (ExtractVec == SourceVecs[0]) {
4448      Mask.push_back(ExtractElt - VEXTOffsets[0]);
4449    } else {
4450      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
4451    }
4452  }
4453
4454  // Final check before we try to produce nonsense...
4455  if (isShuffleMaskLegal(Mask, VT))
4456    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
4457                                &Mask[0]);
4458
4459  return SDValue();
4460}
4461
4462/// isShuffleMaskLegal - Targets can use this to indicate that they only
4463/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4464/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4465/// are assumed to be legal.
4466bool
4467ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
4468                                      EVT VT) const {
4469  if (VT.getVectorNumElements() == 4 &&
4470      (VT.is128BitVector() || VT.is64BitVector())) {
4471    unsigned PFIndexes[4];
4472    for (unsigned i = 0; i != 4; ++i) {
4473      if (M[i] < 0)
4474        PFIndexes[i] = 8;
4475      else
4476        PFIndexes[i] = M[i];
4477    }
4478
4479    // Compute the index in the perfect shuffle table.
4480    unsigned PFTableIndex =
4481      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4482    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4483    unsigned Cost = (PFEntry >> 30);
4484
4485    if (Cost <= 4)
4486      return true;
4487  }
4488
4489  bool ReverseVEXT;
4490  unsigned Imm, WhichResult;
4491
4492  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4493  return (EltSize >= 32 ||
4494          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
4495          isVREVMask(M, VT, 64) ||
4496          isVREVMask(M, VT, 32) ||
4497          isVREVMask(M, VT, 16) ||
4498          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
4499          isVTBLMask(M, VT) ||
4500          isVTRNMask(M, VT, WhichResult) ||
4501          isVUZPMask(M, VT, WhichResult) ||
4502          isVZIPMask(M, VT, WhichResult) ||
4503          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
4504          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
4505          isVZIP_v_undef_Mask(M, VT, WhichResult));
4506}
4507
4508/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4509/// the specified operations to build the shuffle.
4510static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
4511                                      SDValue RHS, SelectionDAG &DAG,
4512                                      DebugLoc dl) {
4513  unsigned OpNum = (PFEntry >> 26) & 0x0F;
4514  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
4515  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
4516
4517  enum {
4518    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
4519    OP_VREV,
4520    OP_VDUP0,
4521    OP_VDUP1,
4522    OP_VDUP2,
4523    OP_VDUP3,
4524    OP_VEXT1,
4525    OP_VEXT2,
4526    OP_VEXT3,
4527    OP_VUZPL, // VUZP, left result
4528    OP_VUZPR, // VUZP, right result
4529    OP_VZIPL, // VZIP, left result
4530    OP_VZIPR, // VZIP, right result
4531    OP_VTRNL, // VTRN, left result
4532    OP_VTRNR  // VTRN, right result
4533  };
4534
4535  if (OpNum == OP_COPY) {
4536    if (LHSID == (1*9+2)*9+3) return LHS;
4537    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
4538    return RHS;
4539  }
4540
4541  SDValue OpLHS, OpRHS;
4542  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
4543  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
4544  EVT VT = OpLHS.getValueType();
4545
4546  switch (OpNum) {
4547  default: llvm_unreachable("Unknown shuffle opcode!");
4548  case OP_VREV:
4549    // VREV divides the vector in half and swaps within the half.
4550    if (VT.getVectorElementType() == MVT::i32 ||
4551        VT.getVectorElementType() == MVT::f32)
4552      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
4553    // vrev <4 x i16> -> VREV32
4554    if (VT.getVectorElementType() == MVT::i16)
4555      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
4556    // vrev <4 x i8> -> VREV16
4557    assert(VT.getVectorElementType() == MVT::i8);
4558    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
4559  case OP_VDUP0:
4560  case OP_VDUP1:
4561  case OP_VDUP2:
4562  case OP_VDUP3:
4563    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
4564                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
4565  case OP_VEXT1:
4566  case OP_VEXT2:
4567  case OP_VEXT3:
4568    return DAG.getNode(ARMISD::VEXT, dl, VT,
4569                       OpLHS, OpRHS,
4570                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
4571  case OP_VUZPL:
4572  case OP_VUZPR:
4573    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4574                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
4575  case OP_VZIPL:
4576  case OP_VZIPR:
4577    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4578                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
4579  case OP_VTRNL:
4580  case OP_VTRNR:
4581    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4582                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
4583  }
4584}
4585
4586static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
4587                                       ArrayRef<int> ShuffleMask,
4588                                       SelectionDAG &DAG) {
4589  // Check to see if we can use the VTBL instruction.
4590  SDValue V1 = Op.getOperand(0);
4591  SDValue V2 = Op.getOperand(1);
4592  DebugLoc DL = Op.getDebugLoc();
4593
4594  SmallVector<SDValue, 8> VTBLMask;
4595  for (ArrayRef<int>::iterator
4596         I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
4597    VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
4598
4599  if (V2.getNode()->getOpcode() == ISD::UNDEF)
4600    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
4601                       DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
4602                                   &VTBLMask[0], 8));
4603
4604  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
4605                     DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
4606                                 &VTBLMask[0], 8));
4607}
4608
4609static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
4610  SDValue V1 = Op.getOperand(0);
4611  SDValue V2 = Op.getOperand(1);
4612  DebugLoc dl = Op.getDebugLoc();
4613  EVT VT = Op.getValueType();
4614  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4615
4616  // Convert shuffles that are directly supported on NEON to target-specific
4617  // DAG nodes, instead of keeping them as shuffles and matching them again
4618  // during code selection.  This is more efficient and avoids the possibility
4619  // of inconsistencies between legalization and selection.
4620  // FIXME: floating-point vectors should be canonicalized to integer vectors
4621  // of the same time so that they get CSEd properly.
4622  ArrayRef<int> ShuffleMask = SVN->getMask();
4623
4624  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4625  if (EltSize <= 32) {
4626    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
4627      int Lane = SVN->getSplatIndex();
4628      // If this is undef splat, generate it via "just" vdup, if possible.
4629      if (Lane == -1) Lane = 0;
4630
4631      // Test if V1 is a SCALAR_TO_VECTOR.
4632      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4633        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
4634      }
4635      // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
4636      // (and probably will turn into a SCALAR_TO_VECTOR once legalization
4637      // reaches it).
4638      if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
4639          !isa<ConstantSDNode>(V1.getOperand(0))) {
4640        bool IsScalarToVector = true;
4641        for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
4642          if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
4643            IsScalarToVector = false;
4644            break;
4645          }
4646        if (IsScalarToVector)
4647          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
4648      }
4649      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
4650                         DAG.getConstant(Lane, MVT::i32));
4651    }
4652
4653    bool ReverseVEXT;
4654    unsigned Imm;
4655    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
4656      if (ReverseVEXT)
4657        std::swap(V1, V2);
4658      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
4659                         DAG.getConstant(Imm, MVT::i32));
4660    }
4661
4662    if (isVREVMask(ShuffleMask, VT, 64))
4663      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
4664    if (isVREVMask(ShuffleMask, VT, 32))
4665      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
4666    if (isVREVMask(ShuffleMask, VT, 16))
4667      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
4668
4669    // Check for Neon shuffles that modify both input vectors in place.
4670    // If both results are used, i.e., if there are two shuffles with the same
4671    // source operands and with masks corresponding to both results of one of
4672    // these operations, DAG memoization will ensure that a single node is
4673    // used for both shuffles.
4674    unsigned WhichResult;
4675    if (isVTRNMask(ShuffleMask, VT, WhichResult))
4676      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4677                         V1, V2).getValue(WhichResult);
4678    if (isVUZPMask(ShuffleMask, VT, WhichResult))
4679      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4680                         V1, V2).getValue(WhichResult);
4681    if (isVZIPMask(ShuffleMask, VT, WhichResult))
4682      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4683                         V1, V2).getValue(WhichResult);
4684
4685    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
4686      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
4687                         V1, V1).getValue(WhichResult);
4688    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
4689      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
4690                         V1, V1).getValue(WhichResult);
4691    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
4692      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
4693                         V1, V1).getValue(WhichResult);
4694  }
4695
4696  // If the shuffle is not directly supported and it has 4 elements, use
4697  // the PerfectShuffle-generated table to synthesize it from other shuffles.
4698  unsigned NumElts = VT.getVectorNumElements();
4699  if (NumElts == 4) {
4700    unsigned PFIndexes[4];
4701    for (unsigned i = 0; i != 4; ++i) {
4702      if (ShuffleMask[i] < 0)
4703        PFIndexes[i] = 8;
4704      else
4705        PFIndexes[i] = ShuffleMask[i];
4706    }
4707
4708    // Compute the index in the perfect shuffle table.
4709    unsigned PFTableIndex =
4710      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4711    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4712    unsigned Cost = (PFEntry >> 30);
4713
4714    if (Cost <= 4)
4715      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
4716  }
4717
4718  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
4719  if (EltSize >= 32) {
4720    // Do the expansion with floating-point types, since that is what the VFP
4721    // registers are defined to use, and since i64 is not legal.
4722    EVT EltVT = EVT::getFloatingPointVT(EltSize);
4723    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
4724    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
4725    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
4726    SmallVector<SDValue, 8> Ops;
4727    for (unsigned i = 0; i < NumElts; ++i) {
4728      if (ShuffleMask[i] < 0)
4729        Ops.push_back(DAG.getUNDEF(EltVT));
4730      else
4731        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
4732                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
4733                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
4734                                                  MVT::i32)));
4735    }
4736    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
4737    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
4738  }
4739
4740  if (VT == MVT::v8i8) {
4741    SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
4742    if (NewOp.getNode())
4743      return NewOp;
4744  }
4745
4746  return SDValue();
4747}
4748
4749static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
4750  // INSERT_VECTOR_ELT is legal only for immediate indexes.
4751  SDValue Lane = Op.getOperand(2);
4752  if (!isa<ConstantSDNode>(Lane))
4753    return SDValue();
4754
4755  return Op;
4756}
4757
4758static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
4759  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
4760  SDValue Lane = Op.getOperand(1);
4761  if (!isa<ConstantSDNode>(Lane))
4762    return SDValue();
4763
4764  SDValue Vec = Op.getOperand(0);
4765  if (Op.getValueType() == MVT::i32 &&
4766      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
4767    DebugLoc dl = Op.getDebugLoc();
4768    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
4769  }
4770
4771  return Op;
4772}
4773
4774static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
4775  // The only time a CONCAT_VECTORS operation can have legal types is when
4776  // two 64-bit vectors are concatenated to a 128-bit vector.
4777  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
4778         "unexpected CONCAT_VECTORS");
4779  DebugLoc dl = Op.getDebugLoc();
4780  SDValue Val = DAG.getUNDEF(MVT::v2f64);
4781  SDValue Op0 = Op.getOperand(0);
4782  SDValue Op1 = Op.getOperand(1);
4783  if (Op0.getOpcode() != ISD::UNDEF)
4784    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
4785                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
4786                      DAG.getIntPtrConstant(0));
4787  if (Op1.getOpcode() != ISD::UNDEF)
4788    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
4789                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
4790                      DAG.getIntPtrConstant(1));
4791  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
4792}
4793
4794/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
4795/// element has been zero/sign-extended, depending on the isSigned parameter,
4796/// from an integer type half its size.
4797static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
4798                                   bool isSigned) {
4799  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
4800  EVT VT = N->getValueType(0);
4801  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
4802    SDNode *BVN = N->getOperand(0).getNode();
4803    if (BVN->getValueType(0) != MVT::v4i32 ||
4804        BVN->getOpcode() != ISD::BUILD_VECTOR)
4805      return false;
4806    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4807    unsigned HiElt = 1 - LoElt;
4808    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
4809    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
4810    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
4811    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
4812    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
4813      return false;
4814    if (isSigned) {
4815      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
4816          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
4817        return true;
4818    } else {
4819      if (Hi0->isNullValue() && Hi1->isNullValue())
4820        return true;
4821    }
4822    return false;
4823  }
4824
4825  if (N->getOpcode() != ISD::BUILD_VECTOR)
4826    return false;
4827
4828  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
4829    SDNode *Elt = N->getOperand(i).getNode();
4830    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
4831      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4832      unsigned HalfSize = EltSize / 2;
4833      if (isSigned) {
4834        if (!isIntN(HalfSize, C->getSExtValue()))
4835          return false;
4836      } else {
4837        if (!isUIntN(HalfSize, C->getZExtValue()))
4838          return false;
4839      }
4840      continue;
4841    }
4842    return false;
4843  }
4844
4845  return true;
4846}
4847
4848/// isSignExtended - Check if a node is a vector value that is sign-extended
4849/// or a constant BUILD_VECTOR with sign-extended elements.
4850static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
4851  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
4852    return true;
4853  if (isExtendedBUILD_VECTOR(N, DAG, true))
4854    return true;
4855  return false;
4856}
4857
4858/// isZeroExtended - Check if a node is a vector value that is zero-extended
4859/// or a constant BUILD_VECTOR with zero-extended elements.
4860static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
4861  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
4862    return true;
4863  if (isExtendedBUILD_VECTOR(N, DAG, false))
4864    return true;
4865  return false;
4866}
4867
4868/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
4869/// load, or BUILD_VECTOR with extended elements, return the unextended value.
4870static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
4871  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
4872    return N->getOperand(0);
4873  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
4874    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
4875                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
4876                       LD->isNonTemporal(), LD->isInvariant(),
4877                       LD->getAlignment());
4878  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
4879  // have been legalized as a BITCAST from v4i32.
4880  if (N->getOpcode() == ISD::BITCAST) {
4881    SDNode *BVN = N->getOperand(0).getNode();
4882    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
4883           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
4884    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
4885    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
4886                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
4887  }
4888  // Construct a new BUILD_VECTOR with elements truncated to half the size.
4889  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
4890  EVT VT = N->getValueType(0);
4891  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
4892  unsigned NumElts = VT.getVectorNumElements();
4893  MVT TruncVT = MVT::getIntegerVT(EltSize);
4894  SmallVector<SDValue, 8> Ops;
4895  for (unsigned i = 0; i != NumElts; ++i) {
4896    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
4897    const APInt &CInt = C->getAPIntValue();
4898    // Element types smaller than 32 bits are not legal, so use i32 elements.
4899    // The values are implicitly truncated so sext vs. zext doesn't matter.
4900    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
4901  }
4902  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
4903                     MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
4904}
4905
4906static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
4907  unsigned Opcode = N->getOpcode();
4908  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4909    SDNode *N0 = N->getOperand(0).getNode();
4910    SDNode *N1 = N->getOperand(1).getNode();
4911    return N0->hasOneUse() && N1->hasOneUse() &&
4912      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
4913  }
4914  return false;
4915}
4916
4917static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
4918  unsigned Opcode = N->getOpcode();
4919  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
4920    SDNode *N0 = N->getOperand(0).getNode();
4921    SDNode *N1 = N->getOperand(1).getNode();
4922    return N0->hasOneUse() && N1->hasOneUse() &&
4923      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
4924  }
4925  return false;
4926}
4927
4928static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
4929  // Multiplications are only custom-lowered for 128-bit vectors so that
4930  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
4931  EVT VT = Op.getValueType();
4932  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
4933  SDNode *N0 = Op.getOperand(0).getNode();
4934  SDNode *N1 = Op.getOperand(1).getNode();
4935  unsigned NewOpc = 0;
4936  bool isMLA = false;
4937  bool isN0SExt = isSignExtended(N0, DAG);
4938  bool isN1SExt = isSignExtended(N1, DAG);
4939  if (isN0SExt && isN1SExt)
4940    NewOpc = ARMISD::VMULLs;
4941  else {
4942    bool isN0ZExt = isZeroExtended(N0, DAG);
4943    bool isN1ZExt = isZeroExtended(N1, DAG);
4944    if (isN0ZExt && isN1ZExt)
4945      NewOpc = ARMISD::VMULLu;
4946    else if (isN1SExt || isN1ZExt) {
4947      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
4948      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
4949      if (isN1SExt && isAddSubSExt(N0, DAG)) {
4950        NewOpc = ARMISD::VMULLs;
4951        isMLA = true;
4952      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
4953        NewOpc = ARMISD::VMULLu;
4954        isMLA = true;
4955      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
4956        std::swap(N0, N1);
4957        NewOpc = ARMISD::VMULLu;
4958        isMLA = true;
4959      }
4960    }
4961
4962    if (!NewOpc) {
4963      if (VT == MVT::v2i64)
4964        // Fall through to expand this.  It is not legal.
4965        return SDValue();
4966      else
4967        // Other vector multiplications are legal.
4968        return Op;
4969    }
4970  }
4971
4972  // Legalize to a VMULL instruction.
4973  DebugLoc DL = Op.getDebugLoc();
4974  SDValue Op0;
4975  SDValue Op1 = SkipExtension(N1, DAG);
4976  if (!isMLA) {
4977    Op0 = SkipExtension(N0, DAG);
4978    assert(Op0.getValueType().is64BitVector() &&
4979           Op1.getValueType().is64BitVector() &&
4980           "unexpected types for extended operands to VMULL");
4981    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
4982  }
4983
4984  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
4985  // isel lowering to take advantage of no-stall back to back vmul + vmla.
4986  //   vmull q0, d4, d6
4987  //   vmlal q0, d5, d6
4988  // is faster than
4989  //   vaddl q0, d4, d5
4990  //   vmovl q1, d6
4991  //   vmul  q0, q0, q1
4992  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
4993  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
4994  EVT Op1VT = Op1.getValueType();
4995  return DAG.getNode(N0->getOpcode(), DL, VT,
4996                     DAG.getNode(NewOpc, DL, VT,
4997                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
4998                     DAG.getNode(NewOpc, DL, VT,
4999                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
5000}
5001
5002static SDValue
5003LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
5004  // Convert to float
5005  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
5006  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
5007  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
5008  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
5009  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
5010  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
5011  // Get reciprocal estimate.
5012  // float4 recip = vrecpeq_f32(yf);
5013  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5014                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
5015  // Because char has a smaller range than uchar, we can actually get away
5016  // without any newton steps.  This requires that we use a weird bias
5017  // of 0xb000, however (again, this has been exhaustively tested).
5018  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
5019  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
5020  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
5021  Y = DAG.getConstant(0xb000, MVT::i32);
5022  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
5023  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
5024  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
5025  // Convert back to short.
5026  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
5027  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
5028  return X;
5029}
5030
5031static SDValue
5032LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
5033  SDValue N2;
5034  // Convert to float.
5035  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
5036  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
5037  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
5038  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
5039  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
5040  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
5041
5042  // Use reciprocal estimate and one refinement step.
5043  // float4 recip = vrecpeq_f32(yf);
5044  // recip *= vrecpsq_f32(yf, recip);
5045  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5046                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
5047  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5048                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
5049                   N1, N2);
5050  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
5051  // Because short has a smaller range than ushort, we can actually get away
5052  // with only a single newton step.  This requires that we use a weird bias
5053  // of 89, however (again, this has been exhaustively tested).
5054  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
5055  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
5056  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
5057  N1 = DAG.getConstant(0x89, MVT::i32);
5058  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
5059  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
5060  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
5061  // Convert back to integer and return.
5062  // return vmovn_s32(vcvt_s32_f32(result));
5063  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
5064  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
5065  return N0;
5066}
5067
5068static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
5069  EVT VT = Op.getValueType();
5070  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
5071         "unexpected type for custom-lowering ISD::SDIV");
5072
5073  DebugLoc dl = Op.getDebugLoc();
5074  SDValue N0 = Op.getOperand(0);
5075  SDValue N1 = Op.getOperand(1);
5076  SDValue N2, N3;
5077
5078  if (VT == MVT::v8i8) {
5079    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
5080    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
5081
5082    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
5083                     DAG.getIntPtrConstant(4));
5084    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
5085                     DAG.getIntPtrConstant(4));
5086    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
5087                     DAG.getIntPtrConstant(0));
5088    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
5089                     DAG.getIntPtrConstant(0));
5090
5091    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
5092    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
5093
5094    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
5095    N0 = LowerCONCAT_VECTORS(N0, DAG);
5096
5097    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
5098    return N0;
5099  }
5100  return LowerSDIV_v4i16(N0, N1, dl, DAG);
5101}
5102
5103static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
5104  EVT VT = Op.getValueType();
5105  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
5106         "unexpected type for custom-lowering ISD::UDIV");
5107
5108  DebugLoc dl = Op.getDebugLoc();
5109  SDValue N0 = Op.getOperand(0);
5110  SDValue N1 = Op.getOperand(1);
5111  SDValue N2, N3;
5112
5113  if (VT == MVT::v8i8) {
5114    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
5115    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
5116
5117    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
5118                     DAG.getIntPtrConstant(4));
5119    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
5120                     DAG.getIntPtrConstant(4));
5121    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
5122                     DAG.getIntPtrConstant(0));
5123    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
5124                     DAG.getIntPtrConstant(0));
5125
5126    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
5127    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
5128
5129    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
5130    N0 = LowerCONCAT_VECTORS(N0, DAG);
5131
5132    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
5133                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
5134                     N0);
5135    return N0;
5136  }
5137
5138  // v4i16 sdiv ... Convert to float.
5139  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
5140  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
5141  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
5142  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
5143  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
5144  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
5145
5146  // Use reciprocal estimate and two refinement steps.
5147  // float4 recip = vrecpeq_f32(yf);
5148  // recip *= vrecpsq_f32(yf, recip);
5149  // recip *= vrecpsq_f32(yf, recip);
5150  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5151                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
5152  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5153                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
5154                   BN1, N2);
5155  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
5156  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
5157                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
5158                   BN1, N2);
5159  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
5160  // Simply multiplying by the reciprocal estimate can leave us a few ulps
5161  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
5162  // and that it will never cause us to return an answer too large).
5163  // float4 result = as_float4(as_int4(xf*recip) + 2);
5164  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
5165  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
5166  N1 = DAG.getConstant(2, MVT::i32);
5167  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
5168  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
5169  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
5170  // Convert back to integer and return.
5171  // return vmovn_u32(vcvt_s32_f32(result));
5172  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
5173  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
5174  return N0;
5175}
5176
5177static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
5178  EVT VT = Op.getNode()->getValueType(0);
5179  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5180
5181  unsigned Opc;
5182  bool ExtraOp = false;
5183  switch (Op.getOpcode()) {
5184  default: llvm_unreachable("Invalid code");
5185  case ISD::ADDC: Opc = ARMISD::ADDC; break;
5186  case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
5187  case ISD::SUBC: Opc = ARMISD::SUBC; break;
5188  case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
5189  }
5190
5191  if (!ExtraOp)
5192    return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
5193                       Op.getOperand(1));
5194  return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
5195                     Op.getOperand(1), Op.getOperand(2));
5196}
5197
5198static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
5199  // Monotonic load/store is legal for all targets
5200  if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
5201    return Op;
5202
5203  // Aquire/Release load/store is not legal for targets without a
5204  // dmb or equivalent available.
5205  return SDValue();
5206}
5207
5208
5209static void
5210ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
5211                    SelectionDAG &DAG, unsigned NewOp) {
5212  DebugLoc dl = Node->getDebugLoc();
5213  assert (Node->getValueType(0) == MVT::i64 &&
5214          "Only know how to expand i64 atomics");
5215
5216  SmallVector<SDValue, 6> Ops;
5217  Ops.push_back(Node->getOperand(0)); // Chain
5218  Ops.push_back(Node->getOperand(1)); // Ptr
5219  // Low part of Val1
5220  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5221                            Node->getOperand(2), DAG.getIntPtrConstant(0)));
5222  // High part of Val1
5223  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5224                            Node->getOperand(2), DAG.getIntPtrConstant(1)));
5225  if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
5226    // High part of Val1
5227    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5228                              Node->getOperand(3), DAG.getIntPtrConstant(0)));
5229    // High part of Val2
5230    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
5231                              Node->getOperand(3), DAG.getIntPtrConstant(1)));
5232  }
5233  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
5234  SDValue Result =
5235    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
5236                            cast<MemSDNode>(Node)->getMemOperand());
5237  SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
5238  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
5239  Results.push_back(Result.getValue(2));
5240}
5241
5242SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
5243  switch (Op.getOpcode()) {
5244  default: llvm_unreachable("Don't know how to custom lower this!");
5245  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
5246  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
5247  case ISD::GlobalAddress:
5248    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
5249      LowerGlobalAddressELF(Op, DAG);
5250  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
5251  case ISD::SELECT:        return LowerSELECT(Op, DAG);
5252  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
5253  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
5254  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
5255  case ISD::VASTART:       return LowerVASTART(Op, DAG);
5256  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
5257  case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
5258  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
5259  case ISD::SINT_TO_FP:
5260  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
5261  case ISD::FP_TO_SINT:
5262  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
5263  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
5264  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
5265  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
5266  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
5267  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
5268  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
5269  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
5270                                                               Subtarget);
5271  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
5272  case ISD::SHL:
5273  case ISD::SRL:
5274  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
5275  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
5276  case ISD::SRL_PARTS:
5277  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
5278  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
5279  case ISD::SETCC:         return LowerVSETCC(Op, DAG);
5280  case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
5281  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
5282  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
5283  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
5284  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
5285  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
5286  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
5287  case ISD::MUL:           return LowerMUL(Op, DAG);
5288  case ISD::SDIV:          return LowerSDIV(Op, DAG);
5289  case ISD::UDIV:          return LowerUDIV(Op, DAG);
5290  case ISD::ADDC:
5291  case ISD::ADDE:
5292  case ISD::SUBC:
5293  case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
5294  case ISD::ATOMIC_LOAD:
5295  case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
5296  }
5297}
5298
5299/// ReplaceNodeResults - Replace the results of node with an illegal result
5300/// type with new values built out of custom code.
5301void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
5302                                           SmallVectorImpl<SDValue>&Results,
5303                                           SelectionDAG &DAG) const {
5304  SDValue Res;
5305  switch (N->getOpcode()) {
5306  default:
5307    llvm_unreachable("Don't know how to custom expand this!");
5308  case ISD::BITCAST:
5309    Res = ExpandBITCAST(N, DAG);
5310    break;
5311  case ISD::SRL:
5312  case ISD::SRA:
5313    Res = Expand64BitShift(N, DAG, Subtarget);
5314    break;
5315  case ISD::ATOMIC_LOAD_ADD:
5316    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
5317    return;
5318  case ISD::ATOMIC_LOAD_AND:
5319    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
5320    return;
5321  case ISD::ATOMIC_LOAD_NAND:
5322    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
5323    return;
5324  case ISD::ATOMIC_LOAD_OR:
5325    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
5326    return;
5327  case ISD::ATOMIC_LOAD_SUB:
5328    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
5329    return;
5330  case ISD::ATOMIC_LOAD_XOR:
5331    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
5332    return;
5333  case ISD::ATOMIC_SWAP:
5334    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
5335    return;
5336  case ISD::ATOMIC_CMP_SWAP:
5337    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
5338    return;
5339  }
5340  if (Res.getNode())
5341    Results.push_back(Res);
5342}
5343
5344//===----------------------------------------------------------------------===//
5345//                           ARM Scheduler Hooks
5346//===----------------------------------------------------------------------===//
5347
5348MachineBasicBlock *
5349ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
5350                                     MachineBasicBlock *BB,
5351                                     unsigned Size) const {
5352  unsigned dest    = MI->getOperand(0).getReg();
5353  unsigned ptr     = MI->getOperand(1).getReg();
5354  unsigned oldval  = MI->getOperand(2).getReg();
5355  unsigned newval  = MI->getOperand(3).getReg();
5356  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5357  DebugLoc dl = MI->getDebugLoc();
5358  bool isThumb2 = Subtarget->isThumb2();
5359
5360  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5361  unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
5362    (const TargetRegisterClass*)&ARM::rGPRRegClass :
5363    (const TargetRegisterClass*)&ARM::GPRRegClass);
5364
5365  if (isThumb2) {
5366    MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
5367    MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
5368    MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
5369  }
5370
5371  unsigned ldrOpc, strOpc;
5372  switch (Size) {
5373  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5374  case 1:
5375    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5376    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5377    break;
5378  case 2:
5379    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5380    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5381    break;
5382  case 4:
5383    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5384    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5385    break;
5386  }
5387
5388  MachineFunction *MF = BB->getParent();
5389  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5390  MachineFunction::iterator It = BB;
5391  ++It; // insert the new blocks after the current block
5392
5393  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
5394  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
5395  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5396  MF->insert(It, loop1MBB);
5397  MF->insert(It, loop2MBB);
5398  MF->insert(It, exitMBB);
5399
5400  // Transfer the remainder of BB and its successor edges to exitMBB.
5401  exitMBB->splice(exitMBB->begin(), BB,
5402                  llvm::next(MachineBasicBlock::iterator(MI)),
5403                  BB->end());
5404  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5405
5406  //  thisMBB:
5407  //   ...
5408  //   fallthrough --> loop1MBB
5409  BB->addSuccessor(loop1MBB);
5410
5411  // loop1MBB:
5412  //   ldrex dest, [ptr]
5413  //   cmp dest, oldval
5414  //   bne exitMBB
5415  BB = loop1MBB;
5416  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5417  if (ldrOpc == ARM::t2LDREX)
5418    MIB.addImm(0);
5419  AddDefaultPred(MIB);
5420  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
5421                 .addReg(dest).addReg(oldval));
5422  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5423    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5424  BB->addSuccessor(loop2MBB);
5425  BB->addSuccessor(exitMBB);
5426
5427  // loop2MBB:
5428  //   strex scratch, newval, [ptr]
5429  //   cmp scratch, #0
5430  //   bne loop1MBB
5431  BB = loop2MBB;
5432  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
5433  if (strOpc == ARM::t2STREX)
5434    MIB.addImm(0);
5435  AddDefaultPred(MIB);
5436  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5437                 .addReg(scratch).addImm(0));
5438  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5439    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5440  BB->addSuccessor(loop1MBB);
5441  BB->addSuccessor(exitMBB);
5442
5443  //  exitMBB:
5444  //   ...
5445  BB = exitMBB;
5446
5447  MI->eraseFromParent();   // The instruction is gone now.
5448
5449  return BB;
5450}
5451
5452MachineBasicBlock *
5453ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
5454                                    unsigned Size, unsigned BinOpcode) const {
5455  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5456  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5457
5458  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5459  MachineFunction *MF = BB->getParent();
5460  MachineFunction::iterator It = BB;
5461  ++It;
5462
5463  unsigned dest = MI->getOperand(0).getReg();
5464  unsigned ptr = MI->getOperand(1).getReg();
5465  unsigned incr = MI->getOperand(2).getReg();
5466  DebugLoc dl = MI->getDebugLoc();
5467  bool isThumb2 = Subtarget->isThumb2();
5468
5469  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5470  if (isThumb2) {
5471    MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
5472    MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
5473  }
5474
5475  unsigned ldrOpc, strOpc;
5476  switch (Size) {
5477  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5478  case 1:
5479    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5480    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5481    break;
5482  case 2:
5483    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5484    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5485    break;
5486  case 4:
5487    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5488    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5489    break;
5490  }
5491
5492  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5493  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5494  MF->insert(It, loopMBB);
5495  MF->insert(It, exitMBB);
5496
5497  // Transfer the remainder of BB and its successor edges to exitMBB.
5498  exitMBB->splice(exitMBB->begin(), BB,
5499                  llvm::next(MachineBasicBlock::iterator(MI)),
5500                  BB->end());
5501  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5502
5503  const TargetRegisterClass *TRC = isThumb2 ?
5504    (const TargetRegisterClass*)&ARM::rGPRRegClass :
5505    (const TargetRegisterClass*)&ARM::GPRRegClass;
5506  unsigned scratch = MRI.createVirtualRegister(TRC);
5507  unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
5508
5509  //  thisMBB:
5510  //   ...
5511  //   fallthrough --> loopMBB
5512  BB->addSuccessor(loopMBB);
5513
5514  //  loopMBB:
5515  //   ldrex dest, ptr
5516  //   <binop> scratch2, dest, incr
5517  //   strex scratch, scratch2, ptr
5518  //   cmp scratch, #0
5519  //   bne- loopMBB
5520  //   fallthrough --> exitMBB
5521  BB = loopMBB;
5522  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5523  if (ldrOpc == ARM::t2LDREX)
5524    MIB.addImm(0);
5525  AddDefaultPred(MIB);
5526  if (BinOpcode) {
5527    // operand order needs to go the other way for NAND
5528    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
5529      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
5530                     addReg(incr).addReg(dest)).addReg(0);
5531    else
5532      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
5533                     addReg(dest).addReg(incr)).addReg(0);
5534  }
5535
5536  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
5537  if (strOpc == ARM::t2STREX)
5538    MIB.addImm(0);
5539  AddDefaultPred(MIB);
5540  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5541                 .addReg(scratch).addImm(0));
5542  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5543    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5544
5545  BB->addSuccessor(loopMBB);
5546  BB->addSuccessor(exitMBB);
5547
5548  //  exitMBB:
5549  //   ...
5550  BB = exitMBB;
5551
5552  MI->eraseFromParent();   // The instruction is gone now.
5553
5554  return BB;
5555}
5556
5557MachineBasicBlock *
5558ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
5559                                          MachineBasicBlock *BB,
5560                                          unsigned Size,
5561                                          bool signExtend,
5562                                          ARMCC::CondCodes Cond) const {
5563  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5564
5565  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5566  MachineFunction *MF = BB->getParent();
5567  MachineFunction::iterator It = BB;
5568  ++It;
5569
5570  unsigned dest = MI->getOperand(0).getReg();
5571  unsigned ptr = MI->getOperand(1).getReg();
5572  unsigned incr = MI->getOperand(2).getReg();
5573  unsigned oldval = dest;
5574  DebugLoc dl = MI->getDebugLoc();
5575  bool isThumb2 = Subtarget->isThumb2();
5576
5577  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5578  if (isThumb2) {
5579    MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
5580    MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
5581  }
5582
5583  unsigned ldrOpc, strOpc, extendOpc;
5584  switch (Size) {
5585  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
5586  case 1:
5587    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
5588    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
5589    extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
5590    break;
5591  case 2:
5592    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
5593    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
5594    extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
5595    break;
5596  case 4:
5597    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
5598    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
5599    extendOpc = 0;
5600    break;
5601  }
5602
5603  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5604  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5605  MF->insert(It, loopMBB);
5606  MF->insert(It, exitMBB);
5607
5608  // Transfer the remainder of BB and its successor edges to exitMBB.
5609  exitMBB->splice(exitMBB->begin(), BB,
5610                  llvm::next(MachineBasicBlock::iterator(MI)),
5611                  BB->end());
5612  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5613
5614  const TargetRegisterClass *TRC = isThumb2 ?
5615    (const TargetRegisterClass*)&ARM::rGPRRegClass :
5616    (const TargetRegisterClass*)&ARM::GPRRegClass;
5617  unsigned scratch = MRI.createVirtualRegister(TRC);
5618  unsigned scratch2 = MRI.createVirtualRegister(TRC);
5619
5620  //  thisMBB:
5621  //   ...
5622  //   fallthrough --> loopMBB
5623  BB->addSuccessor(loopMBB);
5624
5625  //  loopMBB:
5626  //   ldrex dest, ptr
5627  //   (sign extend dest, if required)
5628  //   cmp dest, incr
5629  //   cmov.cond scratch2, incr, dest
5630  //   strex scratch, scratch2, ptr
5631  //   cmp scratch, #0
5632  //   bne- loopMBB
5633  //   fallthrough --> exitMBB
5634  BB = loopMBB;
5635  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
5636  if (ldrOpc == ARM::t2LDREX)
5637    MIB.addImm(0);
5638  AddDefaultPred(MIB);
5639
5640  // Sign extend the value, if necessary.
5641  if (signExtend && extendOpc) {
5642    oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
5643    AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
5644                     .addReg(dest)
5645                     .addImm(0));
5646  }
5647
5648  // Build compare and cmov instructions.
5649  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
5650                 .addReg(oldval).addReg(incr));
5651  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
5652         .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
5653
5654  MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
5655  if (strOpc == ARM::t2STREX)
5656    MIB.addImm(0);
5657  AddDefaultPred(MIB);
5658  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5659                 .addReg(scratch).addImm(0));
5660  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5661    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5662
5663  BB->addSuccessor(loopMBB);
5664  BB->addSuccessor(exitMBB);
5665
5666  //  exitMBB:
5667  //   ...
5668  BB = exitMBB;
5669
5670  MI->eraseFromParent();   // The instruction is gone now.
5671
5672  return BB;
5673}
5674
5675MachineBasicBlock *
5676ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
5677                                      unsigned Op1, unsigned Op2,
5678                                      bool NeedsCarry, bool IsCmpxchg) const {
5679  // This also handles ATOMIC_SWAP, indicated by Op1==0.
5680  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5681
5682  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5683  MachineFunction *MF = BB->getParent();
5684  MachineFunction::iterator It = BB;
5685  ++It;
5686
5687  unsigned destlo = MI->getOperand(0).getReg();
5688  unsigned desthi = MI->getOperand(1).getReg();
5689  unsigned ptr = MI->getOperand(2).getReg();
5690  unsigned vallo = MI->getOperand(3).getReg();
5691  unsigned valhi = MI->getOperand(4).getReg();
5692  DebugLoc dl = MI->getDebugLoc();
5693  bool isThumb2 = Subtarget->isThumb2();
5694
5695  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5696  if (isThumb2) {
5697    MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
5698    MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
5699    MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
5700  }
5701
5702  unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
5703  unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
5704
5705  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5706  MachineBasicBlock *contBB = 0, *cont2BB = 0;
5707  if (IsCmpxchg) {
5708    contBB = MF->CreateMachineBasicBlock(LLVM_BB);
5709    cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
5710  }
5711  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
5712  MF->insert(It, loopMBB);
5713  if (IsCmpxchg) {
5714    MF->insert(It, contBB);
5715    MF->insert(It, cont2BB);
5716  }
5717  MF->insert(It, exitMBB);
5718
5719  // Transfer the remainder of BB and its successor edges to exitMBB.
5720  exitMBB->splice(exitMBB->begin(), BB,
5721                  llvm::next(MachineBasicBlock::iterator(MI)),
5722                  BB->end());
5723  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5724
5725  const TargetRegisterClass *TRC = isThumb2 ?
5726    (const TargetRegisterClass*)&ARM::tGPRRegClass :
5727    (const TargetRegisterClass*)&ARM::GPRRegClass;
5728  unsigned storesuccess = MRI.createVirtualRegister(TRC);
5729
5730  //  thisMBB:
5731  //   ...
5732  //   fallthrough --> loopMBB
5733  BB->addSuccessor(loopMBB);
5734
5735  //  loopMBB:
5736  //   ldrexd r2, r3, ptr
5737  //   <binopa> r0, r2, incr
5738  //   <binopb> r1, r3, incr
5739  //   strexd storesuccess, r0, r1, ptr
5740  //   cmp storesuccess, #0
5741  //   bne- loopMBB
5742  //   fallthrough --> exitMBB
5743  //
5744  // Note that the registers are explicitly specified because there is not any
5745  // way to force the register allocator to allocate a register pair.
5746  //
5747  // FIXME: The hardcoded registers are not necessary for Thumb2, but we
5748  // need to properly enforce the restriction that the two output registers
5749  // for ldrexd must be different.
5750  BB = loopMBB;
5751  // Load
5752  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
5753                 .addReg(ARM::R2, RegState::Define)
5754                 .addReg(ARM::R3, RegState::Define).addReg(ptr));
5755  // Copy r2/r3 into dest.  (This copy will normally be coalesced.)
5756  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
5757  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
5758
5759  if (IsCmpxchg) {
5760    // Add early exit
5761    for (unsigned i = 0; i < 2; i++) {
5762      AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
5763                                                         ARM::CMPrr))
5764                     .addReg(i == 0 ? destlo : desthi)
5765                     .addReg(i == 0 ? vallo : valhi));
5766      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5767        .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5768      BB->addSuccessor(exitMBB);
5769      BB->addSuccessor(i == 0 ? contBB : cont2BB);
5770      BB = (i == 0 ? contBB : cont2BB);
5771    }
5772
5773    // Copy to physregs for strexd
5774    unsigned setlo = MI->getOperand(5).getReg();
5775    unsigned sethi = MI->getOperand(6).getReg();
5776    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
5777    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
5778  } else if (Op1) {
5779    // Perform binary operation
5780    AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
5781                   .addReg(destlo).addReg(vallo))
5782        .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
5783    AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
5784                   .addReg(desthi).addReg(valhi)).addReg(0);
5785  } else {
5786    // Copy to physregs for strexd
5787    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
5788    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
5789  }
5790
5791  // Store
5792  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
5793                 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
5794  // Cmp+jump
5795  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
5796                 .addReg(storesuccess).addImm(0));
5797  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
5798    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
5799
5800  BB->addSuccessor(loopMBB);
5801  BB->addSuccessor(exitMBB);
5802
5803  //  exitMBB:
5804  //   ...
5805  BB = exitMBB;
5806
5807  MI->eraseFromParent();   // The instruction is gone now.
5808
5809  return BB;
5810}
5811
5812/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
5813/// registers the function context.
5814void ARMTargetLowering::
5815SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
5816                       MachineBasicBlock *DispatchBB, int FI) const {
5817  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5818  DebugLoc dl = MI->getDebugLoc();
5819  MachineFunction *MF = MBB->getParent();
5820  MachineRegisterInfo *MRI = &MF->getRegInfo();
5821  MachineConstantPool *MCP = MF->getConstantPool();
5822  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
5823  const Function *F = MF->getFunction();
5824
5825  bool isThumb = Subtarget->isThumb();
5826  bool isThumb2 = Subtarget->isThumb2();
5827
5828  unsigned PCLabelId = AFI->createPICLabelUId();
5829  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
5830  ARMConstantPoolValue *CPV =
5831    ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
5832  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
5833
5834  const TargetRegisterClass *TRC = isThumb ?
5835    (const TargetRegisterClass*)&ARM::tGPRRegClass :
5836    (const TargetRegisterClass*)&ARM::GPRRegClass;
5837
5838  // Grab constant pool and fixed stack memory operands.
5839  MachineMemOperand *CPMMO =
5840    MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
5841                             MachineMemOperand::MOLoad, 4, 4);
5842
5843  MachineMemOperand *FIMMOSt =
5844    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
5845                             MachineMemOperand::MOStore, 4, 4);
5846
5847  // Load the address of the dispatch MBB into the jump buffer.
5848  if (isThumb2) {
5849    // Incoming value: jbuf
5850    //   ldr.n  r5, LCPI1_1
5851    //   orr    r5, r5, #1
5852    //   add    r5, pc
5853    //   str    r5, [$jbuf, #+4] ; &jbuf[1]
5854    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5855    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
5856                   .addConstantPoolIndex(CPI)
5857                   .addMemOperand(CPMMO));
5858    // Set the low bit because of thumb mode.
5859    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5860    AddDefaultCC(
5861      AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
5862                     .addReg(NewVReg1, RegState::Kill)
5863                     .addImm(0x01)));
5864    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5865    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
5866      .addReg(NewVReg2, RegState::Kill)
5867      .addImm(PCLabelId);
5868    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
5869                   .addReg(NewVReg3, RegState::Kill)
5870                   .addFrameIndex(FI)
5871                   .addImm(36)  // &jbuf[1] :: pc
5872                   .addMemOperand(FIMMOSt));
5873  } else if (isThumb) {
5874    // Incoming value: jbuf
5875    //   ldr.n  r1, LCPI1_4
5876    //   add    r1, pc
5877    //   mov    r2, #1
5878    //   orrs   r1, r2
5879    //   add    r2, $jbuf, #+4 ; &jbuf[1]
5880    //   str    r1, [r2]
5881    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5882    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
5883                   .addConstantPoolIndex(CPI)
5884                   .addMemOperand(CPMMO));
5885    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5886    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
5887      .addReg(NewVReg1, RegState::Kill)
5888      .addImm(PCLabelId);
5889    // Set the low bit because of thumb mode.
5890    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
5891    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
5892                   .addReg(ARM::CPSR, RegState::Define)
5893                   .addImm(1));
5894    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
5895    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
5896                   .addReg(ARM::CPSR, RegState::Define)
5897                   .addReg(NewVReg2, RegState::Kill)
5898                   .addReg(NewVReg3, RegState::Kill));
5899    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
5900    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
5901                   .addFrameIndex(FI)
5902                   .addImm(36)); // &jbuf[1] :: pc
5903    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
5904                   .addReg(NewVReg4, RegState::Kill)
5905                   .addReg(NewVReg5, RegState::Kill)
5906                   .addImm(0)
5907                   .addMemOperand(FIMMOSt));
5908  } else {
5909    // Incoming value: jbuf
5910    //   ldr  r1, LCPI1_1
5911    //   add  r1, pc, r1
5912    //   str  r1, [$jbuf, #+4] ; &jbuf[1]
5913    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
5914    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12),  NewVReg1)
5915                   .addConstantPoolIndex(CPI)
5916                   .addImm(0)
5917                   .addMemOperand(CPMMO));
5918    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
5919    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
5920                   .addReg(NewVReg1, RegState::Kill)
5921                   .addImm(PCLabelId));
5922    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
5923                   .addReg(NewVReg2, RegState::Kill)
5924                   .addFrameIndex(FI)
5925                   .addImm(36)  // &jbuf[1] :: pc
5926                   .addMemOperand(FIMMOSt));
5927  }
5928}
5929
5930MachineBasicBlock *ARMTargetLowering::
5931EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
5932  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5933  DebugLoc dl = MI->getDebugLoc();
5934  MachineFunction *MF = MBB->getParent();
5935  MachineRegisterInfo *MRI = &MF->getRegInfo();
5936  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
5937  MachineFrameInfo *MFI = MF->getFrameInfo();
5938  int FI = MFI->getFunctionContextIndex();
5939
5940  const TargetRegisterClass *TRC = Subtarget->isThumb() ?
5941    (const TargetRegisterClass*)&ARM::tGPRRegClass :
5942    (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
5943
5944  // Get a mapping of the call site numbers to all of the landing pads they're
5945  // associated with.
5946  DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
5947  unsigned MaxCSNum = 0;
5948  MachineModuleInfo &MMI = MF->getMMI();
5949  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
5950       ++BB) {
5951    if (!BB->isLandingPad()) continue;
5952
5953    // FIXME: We should assert that the EH_LABEL is the first MI in the landing
5954    // pad.
5955    for (MachineBasicBlock::iterator
5956           II = BB->begin(), IE = BB->end(); II != IE; ++II) {
5957      if (!II->isEHLabel()) continue;
5958
5959      MCSymbol *Sym = II->getOperand(0).getMCSymbol();
5960      if (!MMI.hasCallSiteLandingPad(Sym)) continue;
5961
5962      SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
5963      for (SmallVectorImpl<unsigned>::iterator
5964             CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
5965           CSI != CSE; ++CSI) {
5966        CallSiteNumToLPad[*CSI].push_back(BB);
5967        MaxCSNum = std::max(MaxCSNum, *CSI);
5968      }
5969      break;
5970    }
5971  }
5972
5973  // Get an ordered list of the machine basic blocks for the jump table.
5974  std::vector<MachineBasicBlock*> LPadList;
5975  SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
5976  LPadList.reserve(CallSiteNumToLPad.size());
5977  for (unsigned I = 1; I <= MaxCSNum; ++I) {
5978    SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
5979    for (SmallVectorImpl<MachineBasicBlock*>::iterator
5980           II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
5981      LPadList.push_back(*II);
5982      InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
5983    }
5984  }
5985
5986  assert(!LPadList.empty() &&
5987         "No landing pad destinations for the dispatch jump table!");
5988
5989  // Create the jump table and associated information.
5990  MachineJumpTableInfo *JTI =
5991    MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
5992  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
5993  unsigned UId = AFI->createJumpTableUId();
5994
5995  // Create the MBBs for the dispatch code.
5996
5997  // Shove the dispatch's address into the return slot in the function context.
5998  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
5999  DispatchBB->setIsLandingPad();
6000
6001  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
6002  BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
6003  DispatchBB->addSuccessor(TrapBB);
6004
6005  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
6006  DispatchBB->addSuccessor(DispContBB);
6007
6008  // Insert and MBBs.
6009  MF->insert(MF->end(), DispatchBB);
6010  MF->insert(MF->end(), DispContBB);
6011  MF->insert(MF->end(), TrapBB);
6012
6013  // Insert code into the entry block that creates and registers the function
6014  // context.
6015  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
6016
6017  MachineMemOperand *FIMMOLd =
6018    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
6019                             MachineMemOperand::MOLoad |
6020                             MachineMemOperand::MOVolatile, 4, 4);
6021
6022  if (AFI->isThumb1OnlyFunction())
6023    BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
6024  else if (!Subtarget->hasVFP2())
6025    BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
6026  else
6027    BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
6028
6029  unsigned NumLPads = LPadList.size();
6030  if (Subtarget->isThumb2()) {
6031    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6032    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
6033                   .addFrameIndex(FI)
6034                   .addImm(4)
6035                   .addMemOperand(FIMMOLd));
6036
6037    if (NumLPads < 256) {
6038      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
6039                     .addReg(NewVReg1)
6040                     .addImm(LPadList.size()));
6041    } else {
6042      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6043      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
6044                     .addImm(NumLPads & 0xFFFF));
6045
6046      unsigned VReg2 = VReg1;
6047      if ((NumLPads & 0xFFFF0000) != 0) {
6048        VReg2 = MRI->createVirtualRegister(TRC);
6049        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
6050                       .addReg(VReg1)
6051                       .addImm(NumLPads >> 16));
6052      }
6053
6054      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
6055                     .addReg(NewVReg1)
6056                     .addReg(VReg2));
6057    }
6058
6059    BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
6060      .addMBB(TrapBB)
6061      .addImm(ARMCC::HI)
6062      .addReg(ARM::CPSR);
6063
6064    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6065    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
6066                   .addJumpTableIndex(MJTI)
6067                   .addImm(UId));
6068
6069    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6070    AddDefaultCC(
6071      AddDefaultPred(
6072        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
6073        .addReg(NewVReg3, RegState::Kill)
6074        .addReg(NewVReg1)
6075        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
6076
6077    BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
6078      .addReg(NewVReg4, RegState::Kill)
6079      .addReg(NewVReg1)
6080      .addJumpTableIndex(MJTI)
6081      .addImm(UId);
6082  } else if (Subtarget->isThumb()) {
6083    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6084    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
6085                   .addFrameIndex(FI)
6086                   .addImm(1)
6087                   .addMemOperand(FIMMOLd));
6088
6089    if (NumLPads < 256) {
6090      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
6091                     .addReg(NewVReg1)
6092                     .addImm(NumLPads));
6093    } else {
6094      MachineConstantPool *ConstantPool = MF->getConstantPool();
6095      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6096      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
6097
6098      // MachineConstantPool wants an explicit alignment.
6099      unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
6100      if (Align == 0)
6101        Align = getDataLayout()->getTypeAllocSize(C->getType());
6102      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6103
6104      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6105      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
6106                     .addReg(VReg1, RegState::Define)
6107                     .addConstantPoolIndex(Idx));
6108      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
6109                     .addReg(NewVReg1)
6110                     .addReg(VReg1));
6111    }
6112
6113    BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
6114      .addMBB(TrapBB)
6115      .addImm(ARMCC::HI)
6116      .addReg(ARM::CPSR);
6117
6118    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
6119    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
6120                   .addReg(ARM::CPSR, RegState::Define)
6121                   .addReg(NewVReg1)
6122                   .addImm(2));
6123
6124    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6125    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
6126                   .addJumpTableIndex(MJTI)
6127                   .addImm(UId));
6128
6129    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6130    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
6131                   .addReg(ARM::CPSR, RegState::Define)
6132                   .addReg(NewVReg2, RegState::Kill)
6133                   .addReg(NewVReg3));
6134
6135    MachineMemOperand *JTMMOLd =
6136      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
6137                               MachineMemOperand::MOLoad, 4, 4);
6138
6139    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6140    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
6141                   .addReg(NewVReg4, RegState::Kill)
6142                   .addImm(0)
6143                   .addMemOperand(JTMMOLd));
6144
6145    unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
6146    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
6147                   .addReg(ARM::CPSR, RegState::Define)
6148                   .addReg(NewVReg5, RegState::Kill)
6149                   .addReg(NewVReg3));
6150
6151    BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
6152      .addReg(NewVReg6, RegState::Kill)
6153      .addJumpTableIndex(MJTI)
6154      .addImm(UId);
6155  } else {
6156    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
6157    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
6158                   .addFrameIndex(FI)
6159                   .addImm(4)
6160                   .addMemOperand(FIMMOLd));
6161
6162    if (NumLPads < 256) {
6163      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
6164                     .addReg(NewVReg1)
6165                     .addImm(NumLPads));
6166    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
6167      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6168      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
6169                     .addImm(NumLPads & 0xFFFF));
6170
6171      unsigned VReg2 = VReg1;
6172      if ((NumLPads & 0xFFFF0000) != 0) {
6173        VReg2 = MRI->createVirtualRegister(TRC);
6174        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
6175                       .addReg(VReg1)
6176                       .addImm(NumLPads >> 16));
6177      }
6178
6179      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
6180                     .addReg(NewVReg1)
6181                     .addReg(VReg2));
6182    } else {
6183      MachineConstantPool *ConstantPool = MF->getConstantPool();
6184      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6185      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
6186
6187      // MachineConstantPool wants an explicit alignment.
6188      unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
6189      if (Align == 0)
6190        Align = getDataLayout()->getTypeAllocSize(C->getType());
6191      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6192
6193      unsigned VReg1 = MRI->createVirtualRegister(TRC);
6194      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
6195                     .addReg(VReg1, RegState::Define)
6196                     .addConstantPoolIndex(Idx)
6197                     .addImm(0));
6198      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
6199                     .addReg(NewVReg1)
6200                     .addReg(VReg1, RegState::Kill));
6201    }
6202
6203    BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
6204      .addMBB(TrapBB)
6205      .addImm(ARMCC::HI)
6206      .addReg(ARM::CPSR);
6207
6208    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
6209    AddDefaultCC(
6210      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
6211                     .addReg(NewVReg1)
6212                     .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
6213    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
6214    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
6215                   .addJumpTableIndex(MJTI)
6216                   .addImm(UId));
6217
6218    MachineMemOperand *JTMMOLd =
6219      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
6220                               MachineMemOperand::MOLoad, 4, 4);
6221    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
6222    AddDefaultPred(
6223      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
6224      .addReg(NewVReg3, RegState::Kill)
6225      .addReg(NewVReg4)
6226      .addImm(0)
6227      .addMemOperand(JTMMOLd));
6228
6229    BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
6230      .addReg(NewVReg5, RegState::Kill)
6231      .addReg(NewVReg4)
6232      .addJumpTableIndex(MJTI)
6233      .addImm(UId);
6234  }
6235
6236  // Add the jump table entries as successors to the MBB.
6237  SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
6238  for (std::vector<MachineBasicBlock*>::iterator
6239         I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
6240    MachineBasicBlock *CurMBB = *I;
6241    if (SeenMBBs.insert(CurMBB))
6242      DispContBB->addSuccessor(CurMBB);
6243  }
6244
6245  // N.B. the order the invoke BBs are processed in doesn't matter here.
6246  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
6247  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
6248  const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
6249  SmallVector<MachineBasicBlock*, 64> MBBLPads;
6250  for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
6251         I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
6252    MachineBasicBlock *BB = *I;
6253
6254    // Remove the landing pad successor from the invoke block and replace it
6255    // with the new dispatch block.
6256    SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
6257                                                  BB->succ_end());
6258    while (!Successors.empty()) {
6259      MachineBasicBlock *SMBB = Successors.pop_back_val();
6260      if (SMBB->isLandingPad()) {
6261        BB->removeSuccessor(SMBB);
6262        MBBLPads.push_back(SMBB);
6263      }
6264    }
6265
6266    BB->addSuccessor(DispatchBB);
6267
6268    // Find the invoke call and mark all of the callee-saved registers as
6269    // 'implicit defined' so that they're spilled. This prevents code from
6270    // moving instructions to before the EH block, where they will never be
6271    // executed.
6272    for (MachineBasicBlock::reverse_iterator
6273           II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
6274      if (!II->isCall()) continue;
6275
6276      DenseMap<unsigned, bool> DefRegs;
6277      for (MachineInstr::mop_iterator
6278             OI = II->operands_begin(), OE = II->operands_end();
6279           OI != OE; ++OI) {
6280        if (!OI->isReg()) continue;
6281        DefRegs[OI->getReg()] = true;
6282      }
6283
6284      MachineInstrBuilder MIB(&*II);
6285
6286      for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
6287        unsigned Reg = SavedRegs[i];
6288        if (Subtarget->isThumb2() &&
6289            !ARM::tGPRRegClass.contains(Reg) &&
6290            !ARM::hGPRRegClass.contains(Reg))
6291          continue;
6292        if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
6293          continue;
6294        if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
6295          continue;
6296        if (!DefRegs[Reg])
6297          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
6298      }
6299
6300      break;
6301    }
6302  }
6303
6304  // Mark all former landing pads as non-landing pads. The dispatch is the only
6305  // landing pad now.
6306  for (SmallVectorImpl<MachineBasicBlock*>::iterator
6307         I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
6308    (*I)->setIsLandingPad(false);
6309
6310  // The instruction is gone now.
6311  MI->eraseFromParent();
6312
6313  return MBB;
6314}
6315
6316static
6317MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
6318  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
6319       E = MBB->succ_end(); I != E; ++I)
6320    if (*I != Succ)
6321      return *I;
6322  llvm_unreachable("Expecting a BB with two successors!");
6323}
6324
6325MachineBasicBlock *ARMTargetLowering::
6326EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
6327  // This pseudo instruction has 3 operands: dst, src, size
6328  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
6329  // Otherwise, we will generate unrolled scalar copies.
6330  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6331  const BasicBlock *LLVM_BB = BB->getBasicBlock();
6332  MachineFunction::iterator It = BB;
6333  ++It;
6334
6335  unsigned dest = MI->getOperand(0).getReg();
6336  unsigned src = MI->getOperand(1).getReg();
6337  unsigned SizeVal = MI->getOperand(2).getImm();
6338  unsigned Align = MI->getOperand(3).getImm();
6339  DebugLoc dl = MI->getDebugLoc();
6340
6341  bool isThumb2 = Subtarget->isThumb2();
6342  MachineFunction *MF = BB->getParent();
6343  MachineRegisterInfo &MRI = MF->getRegInfo();
6344  unsigned ldrOpc, strOpc, UnitSize = 0;
6345
6346  const TargetRegisterClass *TRC = isThumb2 ?
6347    (const TargetRegisterClass*)&ARM::tGPRRegClass :
6348    (const TargetRegisterClass*)&ARM::GPRRegClass;
6349  const TargetRegisterClass *TRC_Vec = 0;
6350
6351  if (Align & 1) {
6352    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
6353    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
6354    UnitSize = 1;
6355  } else if (Align & 2) {
6356    ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
6357    strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
6358    UnitSize = 2;
6359  } else {
6360    // Check whether we can use NEON instructions.
6361    if (!MF->getFunction()->getFnAttributes().
6362          hasAttribute(Attributes::NoImplicitFloat) &&
6363        Subtarget->hasNEON()) {
6364      if ((Align % 16 == 0) && SizeVal >= 16) {
6365        ldrOpc = ARM::VLD1q32wb_fixed;
6366        strOpc = ARM::VST1q32wb_fixed;
6367        UnitSize = 16;
6368        TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
6369      }
6370      else if ((Align % 8 == 0) && SizeVal >= 8) {
6371        ldrOpc = ARM::VLD1d32wb_fixed;
6372        strOpc = ARM::VST1d32wb_fixed;
6373        UnitSize = 8;
6374        TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
6375      }
6376    }
6377    // Can't use NEON instructions.
6378    if (UnitSize == 0) {
6379      ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6380      strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
6381      UnitSize = 4;
6382    }
6383  }
6384
6385  unsigned BytesLeft = SizeVal % UnitSize;
6386  unsigned LoopSize = SizeVal - BytesLeft;
6387
6388  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
6389    // Use LDR and STR to copy.
6390    // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
6391    // [destOut] = STR_POST(scratch, destIn, UnitSize)
6392    unsigned srcIn = src;
6393    unsigned destIn = dest;
6394    for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
6395      unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
6396      unsigned srcOut = MRI.createVirtualRegister(TRC);
6397      unsigned destOut = MRI.createVirtualRegister(TRC);
6398      if (UnitSize >= 8) {
6399        AddDefaultPred(BuildMI(*BB, MI, dl,
6400          TII->get(ldrOpc), scratch)
6401          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
6402
6403        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
6404          .addReg(destIn).addImm(0).addReg(scratch));
6405      } else if (isThumb2) {
6406        AddDefaultPred(BuildMI(*BB, MI, dl,
6407          TII->get(ldrOpc), scratch)
6408          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
6409
6410        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
6411          .addReg(scratch).addReg(destIn)
6412          .addImm(UnitSize));
6413      } else {
6414        AddDefaultPred(BuildMI(*BB, MI, dl,
6415          TII->get(ldrOpc), scratch)
6416          .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
6417          .addImm(UnitSize));
6418
6419        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
6420          .addReg(scratch).addReg(destIn)
6421          .addReg(0).addImm(UnitSize));
6422      }
6423      srcIn = srcOut;
6424      destIn = destOut;
6425    }
6426
6427    // Handle the leftover bytes with LDRB and STRB.
6428    // [scratch, srcOut] = LDRB_POST(srcIn, 1)
6429    // [destOut] = STRB_POST(scratch, destIn, 1)
6430    ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
6431    strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
6432    for (unsigned i = 0; i < BytesLeft; i++) {
6433      unsigned scratch = MRI.createVirtualRegister(TRC);
6434      unsigned srcOut = MRI.createVirtualRegister(TRC);
6435      unsigned destOut = MRI.createVirtualRegister(TRC);
6436      if (isThumb2) {
6437        AddDefaultPred(BuildMI(*BB, MI, dl,
6438          TII->get(ldrOpc),scratch)
6439          .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
6440
6441        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
6442          .addReg(scratch).addReg(destIn)
6443          .addReg(0).addImm(1));
6444      } else {
6445        AddDefaultPred(BuildMI(*BB, MI, dl,
6446          TII->get(ldrOpc),scratch)
6447          .addReg(srcOut, RegState::Define).addReg(srcIn)
6448          .addReg(0).addImm(1));
6449
6450        AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
6451          .addReg(scratch).addReg(destIn)
6452          .addReg(0).addImm(1));
6453      }
6454      srcIn = srcOut;
6455      destIn = destOut;
6456    }
6457    MI->eraseFromParent();   // The instruction is gone now.
6458    return BB;
6459  }
6460
6461  // Expand the pseudo op to a loop.
6462  // thisMBB:
6463  //   ...
6464  //   movw varEnd, # --> with thumb2
6465  //   movt varEnd, #
6466  //   ldrcp varEnd, idx --> without thumb2
6467  //   fallthrough --> loopMBB
6468  // loopMBB:
6469  //   PHI varPhi, varEnd, varLoop
6470  //   PHI srcPhi, src, srcLoop
6471  //   PHI destPhi, dst, destLoop
6472  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
6473  //   [destLoop] = STR_POST(scratch, destPhi, UnitSize)
6474  //   subs varLoop, varPhi, #UnitSize
6475  //   bne loopMBB
6476  //   fallthrough --> exitMBB
6477  // exitMBB:
6478  //   epilogue to handle left-over bytes
6479  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
6480  //   [destOut] = STRB_POST(scratch, destLoop, 1)
6481  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6482  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6483  MF->insert(It, loopMBB);
6484  MF->insert(It, exitMBB);
6485
6486  // Transfer the remainder of BB and its successor edges to exitMBB.
6487  exitMBB->splice(exitMBB->begin(), BB,
6488                  llvm::next(MachineBasicBlock::iterator(MI)),
6489                  BB->end());
6490  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6491
6492  // Load an immediate to varEnd.
6493  unsigned varEnd = MRI.createVirtualRegister(TRC);
6494  if (isThumb2) {
6495    unsigned VReg1 = varEnd;
6496    if ((LoopSize & 0xFFFF0000) != 0)
6497      VReg1 = MRI.createVirtualRegister(TRC);
6498    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
6499                   .addImm(LoopSize & 0xFFFF));
6500
6501    if ((LoopSize & 0xFFFF0000) != 0)
6502      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
6503                     .addReg(VReg1)
6504                     .addImm(LoopSize >> 16));
6505  } else {
6506    MachineConstantPool *ConstantPool = MF->getConstantPool();
6507    Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
6508    const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
6509
6510    // MachineConstantPool wants an explicit alignment.
6511    unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
6512    if (Align == 0)
6513      Align = getDataLayout()->getTypeAllocSize(C->getType());
6514    unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
6515
6516    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
6517                   .addReg(varEnd, RegState::Define)
6518                   .addConstantPoolIndex(Idx)
6519                   .addImm(0));
6520  }
6521  BB->addSuccessor(loopMBB);
6522
6523  // Generate the loop body:
6524  //   varPhi = PHI(varLoop, varEnd)
6525  //   srcPhi = PHI(srcLoop, src)
6526  //   destPhi = PHI(destLoop, dst)
6527  MachineBasicBlock *entryBB = BB;
6528  BB = loopMBB;
6529  unsigned varLoop = MRI.createVirtualRegister(TRC);
6530  unsigned varPhi = MRI.createVirtualRegister(TRC);
6531  unsigned srcLoop = MRI.createVirtualRegister(TRC);
6532  unsigned srcPhi = MRI.createVirtualRegister(TRC);
6533  unsigned destLoop = MRI.createVirtualRegister(TRC);
6534  unsigned destPhi = MRI.createVirtualRegister(TRC);
6535
6536  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
6537    .addReg(varLoop).addMBB(loopMBB)
6538    .addReg(varEnd).addMBB(entryBB);
6539  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
6540    .addReg(srcLoop).addMBB(loopMBB)
6541    .addReg(src).addMBB(entryBB);
6542  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
6543    .addReg(destLoop).addMBB(loopMBB)
6544    .addReg(dest).addMBB(entryBB);
6545
6546  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
6547  //   [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
6548  unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
6549  if (UnitSize >= 8) {
6550    AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
6551      .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
6552
6553    AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
6554      .addReg(destPhi).addImm(0).addReg(scratch));
6555  } else if (isThumb2) {
6556    AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
6557      .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
6558
6559    AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
6560      .addReg(scratch).addReg(destPhi)
6561      .addImm(UnitSize));
6562  } else {
6563    AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
6564      .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
6565      .addImm(UnitSize));
6566
6567    AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
6568      .addReg(scratch).addReg(destPhi)
6569      .addReg(0).addImm(UnitSize));
6570  }
6571
6572  // Decrement loop variable by UnitSize.
6573  MachineInstrBuilder MIB = BuildMI(BB, dl,
6574    TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
6575  AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
6576  MIB->getOperand(5).setReg(ARM::CPSR);
6577  MIB->getOperand(5).setIsDef(true);
6578
6579  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
6580    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
6581
6582  // loopMBB can loop back to loopMBB or fall through to exitMBB.
6583  BB->addSuccessor(loopMBB);
6584  BB->addSuccessor(exitMBB);
6585
6586  // Add epilogue to handle BytesLeft.
6587  BB = exitMBB;
6588  MachineInstr *StartOfExit = exitMBB->begin();
6589  ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
6590  strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
6591
6592  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
6593  //   [destOut] = STRB_POST(scratch, destLoop, 1)
6594  unsigned srcIn = srcLoop;
6595  unsigned destIn = destLoop;
6596  for (unsigned i = 0; i < BytesLeft; i++) {
6597    unsigned scratch = MRI.createVirtualRegister(TRC);
6598    unsigned srcOut = MRI.createVirtualRegister(TRC);
6599    unsigned destOut = MRI.createVirtualRegister(TRC);
6600    if (isThumb2) {
6601      AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
6602        TII->get(ldrOpc),scratch)
6603        .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
6604
6605      AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
6606        .addReg(scratch).addReg(destIn)
6607        .addImm(1));
6608    } else {
6609      AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
6610        TII->get(ldrOpc),scratch)
6611        .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
6612
6613      AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
6614        .addReg(scratch).addReg(destIn)
6615        .addReg(0).addImm(1));
6616    }
6617    srcIn = srcOut;
6618    destIn = destOut;
6619  }
6620
6621  MI->eraseFromParent();   // The instruction is gone now.
6622  return BB;
6623}
6624
6625MachineBasicBlock *
6626ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
6627                                               MachineBasicBlock *BB) const {
6628  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6629  DebugLoc dl = MI->getDebugLoc();
6630  bool isThumb2 = Subtarget->isThumb2();
6631  switch (MI->getOpcode()) {
6632  default: {
6633    MI->dump();
6634    llvm_unreachable("Unexpected instr type to insert");
6635  }
6636  // The Thumb2 pre-indexed stores have the same MI operands, they just
6637  // define them differently in the .td files from the isel patterns, so
6638  // they need pseudos.
6639  case ARM::t2STR_preidx:
6640    MI->setDesc(TII->get(ARM::t2STR_PRE));
6641    return BB;
6642  case ARM::t2STRB_preidx:
6643    MI->setDesc(TII->get(ARM::t2STRB_PRE));
6644    return BB;
6645  case ARM::t2STRH_preidx:
6646    MI->setDesc(TII->get(ARM::t2STRH_PRE));
6647    return BB;
6648
6649  case ARM::STRi_preidx:
6650  case ARM::STRBi_preidx: {
6651    unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
6652      ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
6653    // Decode the offset.
6654    unsigned Offset = MI->getOperand(4).getImm();
6655    bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
6656    Offset = ARM_AM::getAM2Offset(Offset);
6657    if (isSub)
6658      Offset = -Offset;
6659
6660    MachineMemOperand *MMO = *MI->memoperands_begin();
6661    BuildMI(*BB, MI, dl, TII->get(NewOpc))
6662      .addOperand(MI->getOperand(0))  // Rn_wb
6663      .addOperand(MI->getOperand(1))  // Rt
6664      .addOperand(MI->getOperand(2))  // Rn
6665      .addImm(Offset)                 // offset (skip GPR==zero_reg)
6666      .addOperand(MI->getOperand(5))  // pred
6667      .addOperand(MI->getOperand(6))
6668      .addMemOperand(MMO);
6669    MI->eraseFromParent();
6670    return BB;
6671  }
6672  case ARM::STRr_preidx:
6673  case ARM::STRBr_preidx:
6674  case ARM::STRH_preidx: {
6675    unsigned NewOpc;
6676    switch (MI->getOpcode()) {
6677    default: llvm_unreachable("unexpected opcode!");
6678    case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
6679    case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
6680    case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
6681    }
6682    MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
6683    for (unsigned i = 0; i < MI->getNumOperands(); ++i)
6684      MIB.addOperand(MI->getOperand(i));
6685    MI->eraseFromParent();
6686    return BB;
6687  }
6688  case ARM::ATOMIC_LOAD_ADD_I8:
6689     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6690  case ARM::ATOMIC_LOAD_ADD_I16:
6691     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6692  case ARM::ATOMIC_LOAD_ADD_I32:
6693     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
6694
6695  case ARM::ATOMIC_LOAD_AND_I8:
6696     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6697  case ARM::ATOMIC_LOAD_AND_I16:
6698     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6699  case ARM::ATOMIC_LOAD_AND_I32:
6700     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6701
6702  case ARM::ATOMIC_LOAD_OR_I8:
6703     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6704  case ARM::ATOMIC_LOAD_OR_I16:
6705     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6706  case ARM::ATOMIC_LOAD_OR_I32:
6707     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6708
6709  case ARM::ATOMIC_LOAD_XOR_I8:
6710     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6711  case ARM::ATOMIC_LOAD_XOR_I16:
6712     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6713  case ARM::ATOMIC_LOAD_XOR_I32:
6714     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6715
6716  case ARM::ATOMIC_LOAD_NAND_I8:
6717     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6718  case ARM::ATOMIC_LOAD_NAND_I16:
6719     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6720  case ARM::ATOMIC_LOAD_NAND_I32:
6721     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
6722
6723  case ARM::ATOMIC_LOAD_SUB_I8:
6724     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6725  case ARM::ATOMIC_LOAD_SUB_I16:
6726     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6727  case ARM::ATOMIC_LOAD_SUB_I32:
6728     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
6729
6730  case ARM::ATOMIC_LOAD_MIN_I8:
6731     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
6732  case ARM::ATOMIC_LOAD_MIN_I16:
6733     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
6734  case ARM::ATOMIC_LOAD_MIN_I32:
6735     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
6736
6737  case ARM::ATOMIC_LOAD_MAX_I8:
6738     return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
6739  case ARM::ATOMIC_LOAD_MAX_I16:
6740     return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
6741  case ARM::ATOMIC_LOAD_MAX_I32:
6742     return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
6743
6744  case ARM::ATOMIC_LOAD_UMIN_I8:
6745     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
6746  case ARM::ATOMIC_LOAD_UMIN_I16:
6747     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
6748  case ARM::ATOMIC_LOAD_UMIN_I32:
6749     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
6750
6751  case ARM::ATOMIC_LOAD_UMAX_I8:
6752     return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
6753  case ARM::ATOMIC_LOAD_UMAX_I16:
6754     return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
6755  case ARM::ATOMIC_LOAD_UMAX_I32:
6756     return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
6757
6758  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
6759  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
6760  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
6761
6762  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
6763  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
6764  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
6765
6766
6767  case ARM::ATOMADD6432:
6768    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
6769                              isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
6770                              /*NeedsCarry*/ true);
6771  case ARM::ATOMSUB6432:
6772    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
6773                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
6774                              /*NeedsCarry*/ true);
6775  case ARM::ATOMOR6432:
6776    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
6777                              isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
6778  case ARM::ATOMXOR6432:
6779    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
6780                              isThumb2 ? ARM::t2EORrr : ARM::EORrr);
6781  case ARM::ATOMAND6432:
6782    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
6783                              isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
6784  case ARM::ATOMSWAP6432:
6785    return EmitAtomicBinary64(MI, BB, 0, 0, false);
6786  case ARM::ATOMCMPXCHG6432:
6787    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
6788                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
6789                              /*NeedsCarry*/ false, /*IsCmpxchg*/true);
6790
6791  case ARM::tMOVCCr_pseudo: {
6792    // To "insert" a SELECT_CC instruction, we actually have to insert the
6793    // diamond control-flow pattern.  The incoming instruction knows the
6794    // destination vreg to set, the condition code register to branch on, the
6795    // true/false values to select between, and a branch opcode to use.
6796    const BasicBlock *LLVM_BB = BB->getBasicBlock();
6797    MachineFunction::iterator It = BB;
6798    ++It;
6799
6800    //  thisMBB:
6801    //  ...
6802    //   TrueVal = ...
6803    //   cmpTY ccX, r1, r2
6804    //   bCC copy1MBB
6805    //   fallthrough --> copy0MBB
6806    MachineBasicBlock *thisMBB  = BB;
6807    MachineFunction *F = BB->getParent();
6808    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
6809    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
6810    F->insert(It, copy0MBB);
6811    F->insert(It, sinkMBB);
6812
6813    // Transfer the remainder of BB and its successor edges to sinkMBB.
6814    sinkMBB->splice(sinkMBB->begin(), BB,
6815                    llvm::next(MachineBasicBlock::iterator(MI)),
6816                    BB->end());
6817    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
6818
6819    BB->addSuccessor(copy0MBB);
6820    BB->addSuccessor(sinkMBB);
6821
6822    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
6823      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
6824
6825    //  copy0MBB:
6826    //   %FalseValue = ...
6827    //   # fallthrough to sinkMBB
6828    BB = copy0MBB;
6829
6830    // Update machine-CFG edges
6831    BB->addSuccessor(sinkMBB);
6832
6833    //  sinkMBB:
6834    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
6835    //  ...
6836    BB = sinkMBB;
6837    BuildMI(*BB, BB->begin(), dl,
6838            TII->get(ARM::PHI), MI->getOperand(0).getReg())
6839      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
6840      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
6841
6842    MI->eraseFromParent();   // The pseudo instruction is gone now.
6843    return BB;
6844  }
6845
6846  case ARM::BCCi64:
6847  case ARM::BCCZi64: {
6848    // If there is an unconditional branch to the other successor, remove it.
6849    BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
6850
6851    // Compare both parts that make up the double comparison separately for
6852    // equality.
6853    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
6854
6855    unsigned LHS1 = MI->getOperand(1).getReg();
6856    unsigned LHS2 = MI->getOperand(2).getReg();
6857    if (RHSisZero) {
6858      AddDefaultPred(BuildMI(BB, dl,
6859                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6860                     .addReg(LHS1).addImm(0));
6861      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6862        .addReg(LHS2).addImm(0)
6863        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
6864    } else {
6865      unsigned RHS1 = MI->getOperand(3).getReg();
6866      unsigned RHS2 = MI->getOperand(4).getReg();
6867      AddDefaultPred(BuildMI(BB, dl,
6868                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
6869                     .addReg(LHS1).addReg(RHS1));
6870      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
6871        .addReg(LHS2).addReg(RHS2)
6872        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
6873    }
6874
6875    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
6876    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
6877    if (MI->getOperand(0).getImm() == ARMCC::NE)
6878      std::swap(destMBB, exitMBB);
6879
6880    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
6881      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
6882    if (isThumb2)
6883      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
6884    else
6885      BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
6886
6887    MI->eraseFromParent();   // The pseudo instruction is gone now.
6888    return BB;
6889  }
6890
6891  case ARM::Int_eh_sjlj_setjmp:
6892  case ARM::Int_eh_sjlj_setjmp_nofp:
6893  case ARM::tInt_eh_sjlj_setjmp:
6894  case ARM::t2Int_eh_sjlj_setjmp:
6895  case ARM::t2Int_eh_sjlj_setjmp_nofp:
6896    EmitSjLjDispatchBlock(MI, BB);
6897    return BB;
6898
6899  case ARM::ABS:
6900  case ARM::t2ABS: {
6901    // To insert an ABS instruction, we have to insert the
6902    // diamond control-flow pattern.  The incoming instruction knows the
6903    // source vreg to test against 0, the destination vreg to set,
6904    // the condition code register to branch on, the
6905    // true/false values to select between, and a branch opcode to use.
6906    // It transforms
6907    //     V1 = ABS V0
6908    // into
6909    //     V2 = MOVS V0
6910    //     BCC                      (branch to SinkBB if V0 >= 0)
6911    //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
6912    //     SinkBB: V1 = PHI(V2, V3)
6913    const BasicBlock *LLVM_BB = BB->getBasicBlock();
6914    MachineFunction::iterator BBI = BB;
6915    ++BBI;
6916    MachineFunction *Fn = BB->getParent();
6917    MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
6918    MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
6919    Fn->insert(BBI, RSBBB);
6920    Fn->insert(BBI, SinkBB);
6921
6922    unsigned int ABSSrcReg = MI->getOperand(1).getReg();
6923    unsigned int ABSDstReg = MI->getOperand(0).getReg();
6924    bool isThumb2 = Subtarget->isThumb2();
6925    MachineRegisterInfo &MRI = Fn->getRegInfo();
6926    // In Thumb mode S must not be specified if source register is the SP or
6927    // PC and if destination register is the SP, so restrict register class
6928    unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
6929      (const TargetRegisterClass*)&ARM::rGPRRegClass :
6930      (const TargetRegisterClass*)&ARM::GPRRegClass);
6931
6932    // Transfer the remainder of BB and its successor edges to sinkMBB.
6933    SinkBB->splice(SinkBB->begin(), BB,
6934      llvm::next(MachineBasicBlock::iterator(MI)),
6935      BB->end());
6936    SinkBB->transferSuccessorsAndUpdatePHIs(BB);
6937
6938    BB->addSuccessor(RSBBB);
6939    BB->addSuccessor(SinkBB);
6940
6941    // fall through to SinkMBB
6942    RSBBB->addSuccessor(SinkBB);
6943
6944    // insert a cmp at the end of BB
6945    AddDefaultPred(BuildMI(BB, dl,
6946                           TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
6947                   .addReg(ABSSrcReg).addImm(0));
6948
6949    // insert a bcc with opposite CC to ARMCC::MI at the end of BB
6950    BuildMI(BB, dl,
6951      TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
6952      .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
6953
6954    // insert rsbri in RSBBB
6955    // Note: BCC and rsbri will be converted into predicated rsbmi
6956    // by if-conversion pass
6957    BuildMI(*RSBBB, RSBBB->begin(), dl,
6958      TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
6959      .addReg(ABSSrcReg, RegState::Kill)
6960      .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
6961
6962    // insert PHI in SinkBB,
6963    // reuse ABSDstReg to not change uses of ABS instruction
6964    BuildMI(*SinkBB, SinkBB->begin(), dl,
6965      TII->get(ARM::PHI), ABSDstReg)
6966      .addReg(NewRsbDstReg).addMBB(RSBBB)
6967      .addReg(ABSSrcReg).addMBB(BB);
6968
6969    // remove ABS instruction
6970    MI->eraseFromParent();
6971
6972    // return last added BB
6973    return SinkBB;
6974  }
6975  case ARM::COPY_STRUCT_BYVAL_I32:
6976    ++NumLoopByVals;
6977    return EmitStructByval(MI, BB);
6978  }
6979}
6980
6981void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
6982                                                      SDNode *Node) const {
6983  if (!MI->hasPostISelHook()) {
6984    assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
6985           "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
6986    return;
6987  }
6988
6989  const MCInstrDesc *MCID = &MI->getDesc();
6990  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
6991  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
6992  // operand is still set to noreg. If needed, set the optional operand's
6993  // register to CPSR, and remove the redundant implicit def.
6994  //
6995  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
6996
6997  // Rename pseudo opcodes.
6998  unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
6999  if (NewOpc) {
7000    const ARMBaseInstrInfo *TII =
7001      static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
7002    MCID = &TII->get(NewOpc);
7003
7004    assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
7005           "converted opcode should be the same except for cc_out");
7006
7007    MI->setDesc(*MCID);
7008
7009    // Add the optional cc_out operand
7010    MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
7011  }
7012  unsigned ccOutIdx = MCID->getNumOperands() - 1;
7013
7014  // Any ARM instruction that sets the 's' bit should specify an optional
7015  // "cc_out" operand in the last operand position.
7016  if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
7017    assert(!NewOpc && "Optional cc_out operand required");
7018    return;
7019  }
7020  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
7021  // since we already have an optional CPSR def.
7022  bool definesCPSR = false;
7023  bool deadCPSR = false;
7024  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
7025       i != e; ++i) {
7026    const MachineOperand &MO = MI->getOperand(i);
7027    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
7028      definesCPSR = true;
7029      if (MO.isDead())
7030        deadCPSR = true;
7031      MI->RemoveOperand(i);
7032      break;
7033    }
7034  }
7035  if (!definesCPSR) {
7036    assert(!NewOpc && "Optional cc_out operand required");
7037    return;
7038  }
7039  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
7040  if (deadCPSR) {
7041    assert(!MI->getOperand(ccOutIdx).getReg() &&
7042           "expect uninitialized optional cc_out operand");
7043    return;
7044  }
7045
7046  // If this instruction was defined with an optional CPSR def and its dag node
7047  // had a live implicit CPSR def, then activate the optional CPSR def.
7048  MachineOperand &MO = MI->getOperand(ccOutIdx);
7049  MO.setReg(ARM::CPSR);
7050  MO.setIsDef(true);
7051}
7052
7053//===----------------------------------------------------------------------===//
7054//                           ARM Optimization Hooks
7055//===----------------------------------------------------------------------===//
7056
7057// Helper function that checks if N is a null or all ones constant.
7058static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
7059  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
7060  if (!C)
7061    return false;
7062  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
7063}
7064
7065// Return true if N is conditionally 0 or all ones.
7066// Detects these expressions where cc is an i1 value:
7067//
7068//   (select cc 0, y)   [AllOnes=0]
7069//   (select cc y, 0)   [AllOnes=0]
7070//   (zext cc)          [AllOnes=0]
7071//   (sext cc)          [AllOnes=0/1]
7072//   (select cc -1, y)  [AllOnes=1]
7073//   (select cc y, -1)  [AllOnes=1]
7074//
7075// Invert is set when N is the null/all ones constant when CC is false.
7076// OtherOp is set to the alternative value of N.
7077static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
7078                                       SDValue &CC, bool &Invert,
7079                                       SDValue &OtherOp,
7080                                       SelectionDAG &DAG) {
7081  switch (N->getOpcode()) {
7082  default: return false;
7083  case ISD::SELECT: {
7084    CC = N->getOperand(0);
7085    SDValue N1 = N->getOperand(1);
7086    SDValue N2 = N->getOperand(2);
7087    if (isZeroOrAllOnes(N1, AllOnes)) {
7088      Invert = false;
7089      OtherOp = N2;
7090      return true;
7091    }
7092    if (isZeroOrAllOnes(N2, AllOnes)) {
7093      Invert = true;
7094      OtherOp = N1;
7095      return true;
7096    }
7097    return false;
7098  }
7099  case ISD::ZERO_EXTEND:
7100    // (zext cc) can never be the all ones value.
7101    if (AllOnes)
7102      return false;
7103    // Fall through.
7104  case ISD::SIGN_EXTEND: {
7105    EVT VT = N->getValueType(0);
7106    CC = N->getOperand(0);
7107    if (CC.getValueType() != MVT::i1)
7108      return false;
7109    Invert = !AllOnes;
7110    if (AllOnes)
7111      // When looking for an AllOnes constant, N is an sext, and the 'other'
7112      // value is 0.
7113      OtherOp = DAG.getConstant(0, VT);
7114    else if (N->getOpcode() == ISD::ZERO_EXTEND)
7115      // When looking for a 0 constant, N can be zext or sext.
7116      OtherOp = DAG.getConstant(1, VT);
7117    else
7118      OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
7119    return true;
7120  }
7121  }
7122}
7123
7124// Combine a constant select operand into its use:
7125//
7126//   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))
7127//   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))
7128//   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]
7129//   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))
7130//   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))
7131//
7132// The transform is rejected if the select doesn't have a constant operand that
7133// is null, or all ones when AllOnes is set.
7134//
7135// Also recognize sext/zext from i1:
7136//
7137//   (add (zext cc), x) -> (select cc (add x, 1), x)
7138//   (add (sext cc), x) -> (select cc (add x, -1), x)
7139//
7140// These transformations eventually create predicated instructions.
7141//
7142// @param N       The node to transform.
7143// @param Slct    The N operand that is a select.
7144// @param OtherOp The other N operand (x above).
7145// @param DCI     Context.
7146// @param AllOnes Require the select constant to be all ones instead of null.
7147// @returns The new node, or SDValue() on failure.
7148static
7149SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
7150                            TargetLowering::DAGCombinerInfo &DCI,
7151                            bool AllOnes = false) {
7152  SelectionDAG &DAG = DCI.DAG;
7153  EVT VT = N->getValueType(0);
7154  SDValue NonConstantVal;
7155  SDValue CCOp;
7156  bool SwapSelectOps;
7157  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
7158                                  NonConstantVal, DAG))
7159    return SDValue();
7160
7161  // Slct is now know to be the desired identity constant when CC is true.
7162  SDValue TrueVal = OtherOp;
7163  SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
7164                                 OtherOp, NonConstantVal);
7165  // Unless SwapSelectOps says CC should be false.
7166  if (SwapSelectOps)
7167    std::swap(TrueVal, FalseVal);
7168
7169  return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
7170                     CCOp, TrueVal, FalseVal);
7171}
7172
7173// Attempt combineSelectAndUse on each operand of a commutative operator N.
7174static
7175SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
7176                                       TargetLowering::DAGCombinerInfo &DCI) {
7177  SDValue N0 = N->getOperand(0);
7178  SDValue N1 = N->getOperand(1);
7179  if (N0.getNode()->hasOneUse()) {
7180    SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
7181    if (Result.getNode())
7182      return Result;
7183  }
7184  if (N1.getNode()->hasOneUse()) {
7185    SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
7186    if (Result.getNode())
7187      return Result;
7188  }
7189  return SDValue();
7190}
7191
7192// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
7193// (only after legalization).
7194static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
7195                                 TargetLowering::DAGCombinerInfo &DCI,
7196                                 const ARMSubtarget *Subtarget) {
7197
7198  // Only perform optimization if after legalize, and if NEON is available. We
7199  // also expected both operands to be BUILD_VECTORs.
7200  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
7201      || N0.getOpcode() != ISD::BUILD_VECTOR
7202      || N1.getOpcode() != ISD::BUILD_VECTOR)
7203    return SDValue();
7204
7205  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
7206  EVT VT = N->getValueType(0);
7207  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
7208    return SDValue();
7209
7210  // Check that the vector operands are of the right form.
7211  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
7212  // operands, where N is the size of the formed vector.
7213  // Each EXTRACT_VECTOR should have the same input vector and odd or even
7214  // index such that we have a pair wise add pattern.
7215
7216  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
7217  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7218    return SDValue();
7219  SDValue Vec = N0->getOperand(0)->getOperand(0);
7220  SDNode *V = Vec.getNode();
7221  unsigned nextIndex = 0;
7222
7223  // For each operands to the ADD which are BUILD_VECTORs,
7224  // check to see if each of their operands are an EXTRACT_VECTOR with
7225  // the same vector and appropriate index.
7226  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
7227    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
7228        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
7229
7230      SDValue ExtVec0 = N0->getOperand(i);
7231      SDValue ExtVec1 = N1->getOperand(i);
7232
7233      // First operand is the vector, verify its the same.
7234      if (V != ExtVec0->getOperand(0).getNode() ||
7235          V != ExtVec1->getOperand(0).getNode())
7236        return SDValue();
7237
7238      // Second is the constant, verify its correct.
7239      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
7240      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
7241
7242      // For the constant, we want to see all the even or all the odd.
7243      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
7244          || C1->getZExtValue() != nextIndex+1)
7245        return SDValue();
7246
7247      // Increment index.
7248      nextIndex+=2;
7249    } else
7250      return SDValue();
7251  }
7252
7253  // Create VPADDL node.
7254  SelectionDAG &DAG = DCI.DAG;
7255  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7256
7257  // Build operand list.
7258  SmallVector<SDValue, 8> Ops;
7259  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
7260                                TLI.getPointerTy()));
7261
7262  // Input is the vector.
7263  Ops.push_back(Vec);
7264
7265  // Get widened type and narrowed type.
7266  MVT widenType;
7267  unsigned numElem = VT.getVectorNumElements();
7268  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
7269    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
7270    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
7271    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
7272    default:
7273      llvm_unreachable("Invalid vector element type for padd optimization.");
7274  }
7275
7276  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
7277                            widenType, &Ops[0], Ops.size());
7278  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
7279}
7280
7281static SDValue findMUL_LOHI(SDValue V) {
7282  if (V->getOpcode() == ISD::UMUL_LOHI ||
7283      V->getOpcode() == ISD::SMUL_LOHI)
7284    return V;
7285  return SDValue();
7286}
7287
7288static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
7289                                     TargetLowering::DAGCombinerInfo &DCI,
7290                                     const ARMSubtarget *Subtarget) {
7291
7292  if (Subtarget->isThumb1Only()) return SDValue();
7293
7294  // Only perform the checks after legalize when the pattern is available.
7295  if (DCI.isBeforeLegalize()) return SDValue();
7296
7297  // Look for multiply add opportunities.
7298  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
7299  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
7300  // a glue link from the first add to the second add.
7301  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
7302  // a S/UMLAL instruction.
7303  //          loAdd   UMUL_LOHI
7304  //            \    / :lo    \ :hi
7305  //             \  /          \          [no multiline comment]
7306  //              ADDC         |  hiAdd
7307  //                 \ :glue  /  /
7308  //                  \      /  /
7309  //                    ADDE
7310  //
7311  assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
7312  SDValue AddcOp0 = AddcNode->getOperand(0);
7313  SDValue AddcOp1 = AddcNode->getOperand(1);
7314
7315  // Check if the two operands are from the same mul_lohi node.
7316  if (AddcOp0.getNode() == AddcOp1.getNode())
7317    return SDValue();
7318
7319  assert(AddcNode->getNumValues() == 2 &&
7320         AddcNode->getValueType(0) == MVT::i32 &&
7321         AddcNode->getValueType(1) == MVT::Glue &&
7322         "Expect ADDC with two result values: i32, glue");
7323
7324  // Check that the ADDC adds the low result of the S/UMUL_LOHI.
7325  if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
7326      AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
7327      AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
7328      AddcOp1->getOpcode() != ISD::SMUL_LOHI)
7329    return SDValue();
7330
7331  // Look for the glued ADDE.
7332  SDNode* AddeNode = AddcNode->getGluedUser();
7333  if (AddeNode == NULL)
7334    return SDValue();
7335
7336  // Make sure it is really an ADDE.
7337  if (AddeNode->getOpcode() != ISD::ADDE)
7338    return SDValue();
7339
7340  assert(AddeNode->getNumOperands() == 3 &&
7341         AddeNode->getOperand(2).getValueType() == MVT::Glue &&
7342         "ADDE node has the wrong inputs");
7343
7344  // Check for the triangle shape.
7345  SDValue AddeOp0 = AddeNode->getOperand(0);
7346  SDValue AddeOp1 = AddeNode->getOperand(1);
7347
7348  // Make sure that the ADDE operands are not coming from the same node.
7349  if (AddeOp0.getNode() == AddeOp1.getNode())
7350    return SDValue();
7351
7352  // Find the MUL_LOHI node walking up ADDE's operands.
7353  bool IsLeftOperandMUL = false;
7354  SDValue MULOp = findMUL_LOHI(AddeOp0);
7355  if (MULOp == SDValue())
7356   MULOp = findMUL_LOHI(AddeOp1);
7357  else
7358    IsLeftOperandMUL = true;
7359  if (MULOp == SDValue())
7360     return SDValue();
7361
7362  // Figure out the right opcode.
7363  unsigned Opc = MULOp->getOpcode();
7364  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
7365
7366  // Figure out the high and low input values to the MLAL node.
7367  SDValue* HiMul = &MULOp;
7368  SDValue* HiAdd = NULL;
7369  SDValue* LoMul = NULL;
7370  SDValue* LowAdd = NULL;
7371
7372  if (IsLeftOperandMUL)
7373    HiAdd = &AddeOp1;
7374  else
7375    HiAdd = &AddeOp0;
7376
7377
7378  if (AddcOp0->getOpcode() == Opc) {
7379    LoMul = &AddcOp0;
7380    LowAdd = &AddcOp1;
7381  }
7382  if (AddcOp1->getOpcode() == Opc) {
7383    LoMul = &AddcOp1;
7384    LowAdd = &AddcOp0;
7385  }
7386
7387  if (LoMul == NULL)
7388    return SDValue();
7389
7390  if (LoMul->getNode() != HiMul->getNode())
7391    return SDValue();
7392
7393  // Create the merged node.
7394  SelectionDAG &DAG = DCI.DAG;
7395
7396  // Build operand list.
7397  SmallVector<SDValue, 8> Ops;
7398  Ops.push_back(LoMul->getOperand(0));
7399  Ops.push_back(LoMul->getOperand(1));
7400  Ops.push_back(*LowAdd);
7401  Ops.push_back(*HiAdd);
7402
7403  SDValue MLALNode =  DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
7404                                 DAG.getVTList(MVT::i32, MVT::i32),
7405                                 &Ops[0], Ops.size());
7406
7407  // Replace the ADDs' nodes uses by the MLA node's values.
7408  SDValue HiMLALResult(MLALNode.getNode(), 1);
7409  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
7410
7411  SDValue LoMLALResult(MLALNode.getNode(), 0);
7412  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
7413
7414  // Return original node to notify the driver to stop replacing.
7415  SDValue resNode(AddcNode, 0);
7416  return resNode;
7417}
7418
7419/// PerformADDCCombine - Target-specific dag combine transform from
7420/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
7421static SDValue PerformADDCCombine(SDNode *N,
7422                                 TargetLowering::DAGCombinerInfo &DCI,
7423                                 const ARMSubtarget *Subtarget) {
7424
7425  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
7426
7427}
7428
7429/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
7430/// operands N0 and N1.  This is a helper for PerformADDCombine that is
7431/// called with the default operands, and if that fails, with commuted
7432/// operands.
7433static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
7434                                          TargetLowering::DAGCombinerInfo &DCI,
7435                                          const ARMSubtarget *Subtarget){
7436
7437  // Attempt to create vpaddl for this add.
7438  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
7439  if (Result.getNode())
7440    return Result;
7441
7442  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
7443  if (N0.getNode()->hasOneUse()) {
7444    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
7445    if (Result.getNode()) return Result;
7446  }
7447  return SDValue();
7448}
7449
7450/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
7451///
7452static SDValue PerformADDCombine(SDNode *N,
7453                                 TargetLowering::DAGCombinerInfo &DCI,
7454                                 const ARMSubtarget *Subtarget) {
7455  SDValue N0 = N->getOperand(0);
7456  SDValue N1 = N->getOperand(1);
7457
7458  // First try with the default operand order.
7459  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
7460  if (Result.getNode())
7461    return Result;
7462
7463  // If that didn't work, try again with the operands commuted.
7464  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
7465}
7466
7467/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
7468///
7469static SDValue PerformSUBCombine(SDNode *N,
7470                                 TargetLowering::DAGCombinerInfo &DCI) {
7471  SDValue N0 = N->getOperand(0);
7472  SDValue N1 = N->getOperand(1);
7473
7474  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
7475  if (N1.getNode()->hasOneUse()) {
7476    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
7477    if (Result.getNode()) return Result;
7478  }
7479
7480  return SDValue();
7481}
7482
7483/// PerformVMULCombine
7484/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
7485/// special multiplier accumulator forwarding.
7486///   vmul d3, d0, d2
7487///   vmla d3, d1, d2
7488/// is faster than
7489///   vadd d3, d0, d1
7490///   vmul d3, d3, d2
7491static SDValue PerformVMULCombine(SDNode *N,
7492                                  TargetLowering::DAGCombinerInfo &DCI,
7493                                  const ARMSubtarget *Subtarget) {
7494  if (!Subtarget->hasVMLxForwarding())
7495    return SDValue();
7496
7497  SelectionDAG &DAG = DCI.DAG;
7498  SDValue N0 = N->getOperand(0);
7499  SDValue N1 = N->getOperand(1);
7500  unsigned Opcode = N0.getOpcode();
7501  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
7502      Opcode != ISD::FADD && Opcode != ISD::FSUB) {
7503    Opcode = N1.getOpcode();
7504    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
7505        Opcode != ISD::FADD && Opcode != ISD::FSUB)
7506      return SDValue();
7507    std::swap(N0, N1);
7508  }
7509
7510  EVT VT = N->getValueType(0);
7511  DebugLoc DL = N->getDebugLoc();
7512  SDValue N00 = N0->getOperand(0);
7513  SDValue N01 = N0->getOperand(1);
7514  return DAG.getNode(Opcode, DL, VT,
7515                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),
7516                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));
7517}
7518
7519static SDValue PerformMULCombine(SDNode *N,
7520                                 TargetLowering::DAGCombinerInfo &DCI,
7521                                 const ARMSubtarget *Subtarget) {
7522  SelectionDAG &DAG = DCI.DAG;
7523
7524  if (Subtarget->isThumb1Only())
7525    return SDValue();
7526
7527  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
7528    return SDValue();
7529
7530  EVT VT = N->getValueType(0);
7531  if (VT.is64BitVector() || VT.is128BitVector())
7532    return PerformVMULCombine(N, DCI, Subtarget);
7533  if (VT != MVT::i32)
7534    return SDValue();
7535
7536  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
7537  if (!C)
7538    return SDValue();
7539
7540  int64_t MulAmt = C->getSExtValue();
7541  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
7542
7543  ShiftAmt = ShiftAmt & (32 - 1);
7544  SDValue V = N->getOperand(0);
7545  DebugLoc DL = N->getDebugLoc();
7546
7547  SDValue Res;
7548  MulAmt >>= ShiftAmt;
7549
7550  if (MulAmt >= 0) {
7551    if (isPowerOf2_32(MulAmt - 1)) {
7552      // (mul x, 2^N + 1) => (add (shl x, N), x)
7553      Res = DAG.getNode(ISD::ADD, DL, VT,
7554                        V,
7555                        DAG.getNode(ISD::SHL, DL, VT,
7556                                    V,
7557                                    DAG.getConstant(Log2_32(MulAmt - 1),
7558                                                    MVT::i32)));
7559    } else if (isPowerOf2_32(MulAmt + 1)) {
7560      // (mul x, 2^N - 1) => (sub (shl x, N), x)
7561      Res = DAG.getNode(ISD::SUB, DL, VT,
7562                        DAG.getNode(ISD::SHL, DL, VT,
7563                                    V,
7564                                    DAG.getConstant(Log2_32(MulAmt + 1),
7565                                                    MVT::i32)),
7566                        V);
7567    } else
7568      return SDValue();
7569  } else {
7570    uint64_t MulAmtAbs = -MulAmt;
7571    if (isPowerOf2_32(MulAmtAbs + 1)) {
7572      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
7573      Res = DAG.getNode(ISD::SUB, DL, VT,
7574                        V,
7575                        DAG.getNode(ISD::SHL, DL, VT,
7576                                    V,
7577                                    DAG.getConstant(Log2_32(MulAmtAbs + 1),
7578                                                    MVT::i32)));
7579    } else if (isPowerOf2_32(MulAmtAbs - 1)) {
7580      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
7581      Res = DAG.getNode(ISD::ADD, DL, VT,
7582                        V,
7583                        DAG.getNode(ISD::SHL, DL, VT,
7584                                    V,
7585                                    DAG.getConstant(Log2_32(MulAmtAbs-1),
7586                                                    MVT::i32)));
7587      Res = DAG.getNode(ISD::SUB, DL, VT,
7588                        DAG.getConstant(0, MVT::i32),Res);
7589
7590    } else
7591      return SDValue();
7592  }
7593
7594  if (ShiftAmt != 0)
7595    Res = DAG.getNode(ISD::SHL, DL, VT,
7596                      Res, DAG.getConstant(ShiftAmt, MVT::i32));
7597
7598  // Do not add new nodes to DAG combiner worklist.
7599  DCI.CombineTo(N, Res, false);
7600  return SDValue();
7601}
7602
7603static SDValue PerformANDCombine(SDNode *N,
7604                                 TargetLowering::DAGCombinerInfo &DCI,
7605                                 const ARMSubtarget *Subtarget) {
7606
7607  // Attempt to use immediate-form VBIC
7608  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
7609  DebugLoc dl = N->getDebugLoc();
7610  EVT VT = N->getValueType(0);
7611  SelectionDAG &DAG = DCI.DAG;
7612
7613  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7614    return SDValue();
7615
7616  APInt SplatBits, SplatUndef;
7617  unsigned SplatBitSize;
7618  bool HasAnyUndefs;
7619  if (BVN &&
7620      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7621    if (SplatBitSize <= 64) {
7622      EVT VbicVT;
7623      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
7624                                      SplatUndef.getZExtValue(), SplatBitSize,
7625                                      DAG, VbicVT, VT.is128BitVector(),
7626                                      OtherModImm);
7627      if (Val.getNode()) {
7628        SDValue Input =
7629          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
7630        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
7631        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
7632      }
7633    }
7634  }
7635
7636  if (!Subtarget->isThumb1Only()) {
7637    // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
7638    SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
7639    if (Result.getNode())
7640      return Result;
7641  }
7642
7643  return SDValue();
7644}
7645
7646/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
7647static SDValue PerformORCombine(SDNode *N,
7648                                TargetLowering::DAGCombinerInfo &DCI,
7649                                const ARMSubtarget *Subtarget) {
7650  // Attempt to use immediate-form VORR
7651  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
7652  DebugLoc dl = N->getDebugLoc();
7653  EVT VT = N->getValueType(0);
7654  SelectionDAG &DAG = DCI.DAG;
7655
7656  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7657    return SDValue();
7658
7659  APInt SplatBits, SplatUndef;
7660  unsigned SplatBitSize;
7661  bool HasAnyUndefs;
7662  if (BVN && Subtarget->hasNEON() &&
7663      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7664    if (SplatBitSize <= 64) {
7665      EVT VorrVT;
7666      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
7667                                      SplatUndef.getZExtValue(), SplatBitSize,
7668                                      DAG, VorrVT, VT.is128BitVector(),
7669                                      OtherModImm);
7670      if (Val.getNode()) {
7671        SDValue Input =
7672          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
7673        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
7674        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
7675      }
7676    }
7677  }
7678
7679  if (!Subtarget->isThumb1Only()) {
7680    // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
7681    SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
7682    if (Result.getNode())
7683      return Result;
7684  }
7685
7686  // The code below optimizes (or (and X, Y), Z).
7687  // The AND operand needs to have a single user to make these optimizations
7688  // profitable.
7689  SDValue N0 = N->getOperand(0);
7690  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
7691    return SDValue();
7692  SDValue N1 = N->getOperand(1);
7693
7694  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
7695  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
7696      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
7697    APInt SplatUndef;
7698    unsigned SplatBitSize;
7699    bool HasAnyUndefs;
7700
7701    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
7702    APInt SplatBits0;
7703    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
7704                                  HasAnyUndefs) && !HasAnyUndefs) {
7705      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
7706      APInt SplatBits1;
7707      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
7708                                    HasAnyUndefs) && !HasAnyUndefs &&
7709          SplatBits0 == ~SplatBits1) {
7710        // Canonicalize the vector type to make instruction selection simpler.
7711        EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
7712        SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
7713                                     N0->getOperand(1), N0->getOperand(0),
7714                                     N1->getOperand(0));
7715        return DAG.getNode(ISD::BITCAST, dl, VT, Result);
7716      }
7717    }
7718  }
7719
7720  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
7721  // reasonable.
7722
7723  // BFI is only available on V6T2+
7724  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
7725    return SDValue();
7726
7727  DebugLoc DL = N->getDebugLoc();
7728  // 1) or (and A, mask), val => ARMbfi A, val, mask
7729  //      iff (val & mask) == val
7730  //
7731  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
7732  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
7733  //          && mask == ~mask2
7734  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
7735  //          && ~mask == mask2
7736  //  (i.e., copy a bitfield value into another bitfield of the same width)
7737
7738  if (VT != MVT::i32)
7739    return SDValue();
7740
7741  SDValue N00 = N0.getOperand(0);
7742
7743  // The value and the mask need to be constants so we can verify this is
7744  // actually a bitfield set. If the mask is 0xffff, we can do better
7745  // via a movt instruction, so don't use BFI in that case.
7746  SDValue MaskOp = N0.getOperand(1);
7747  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
7748  if (!MaskC)
7749    return SDValue();
7750  unsigned Mask = MaskC->getZExtValue();
7751  if (Mask == 0xffff)
7752    return SDValue();
7753  SDValue Res;
7754  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
7755  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7756  if (N1C) {
7757    unsigned Val = N1C->getZExtValue();
7758    if ((Val & ~Mask) != Val)
7759      return SDValue();
7760
7761    if (ARM::isBitFieldInvertedMask(Mask)) {
7762      Val >>= CountTrailingZeros_32(~Mask);
7763
7764      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
7765                        DAG.getConstant(Val, MVT::i32),
7766                        DAG.getConstant(Mask, MVT::i32));
7767
7768      // Do not add new nodes to DAG combiner worklist.
7769      DCI.CombineTo(N, Res, false);
7770      return SDValue();
7771    }
7772  } else if (N1.getOpcode() == ISD::AND) {
7773    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
7774    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7775    if (!N11C)
7776      return SDValue();
7777    unsigned Mask2 = N11C->getZExtValue();
7778
7779    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
7780    // as is to match.
7781    if (ARM::isBitFieldInvertedMask(Mask) &&
7782        (Mask == ~Mask2)) {
7783      // The pack halfword instruction works better for masks that fit it,
7784      // so use that when it's available.
7785      if (Subtarget->hasT2ExtractPack() &&
7786          (Mask == 0xffff || Mask == 0xffff0000))
7787        return SDValue();
7788      // 2a
7789      unsigned amt = CountTrailingZeros_32(Mask2);
7790      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
7791                        DAG.getConstant(amt, MVT::i32));
7792      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
7793                        DAG.getConstant(Mask, MVT::i32));
7794      // Do not add new nodes to DAG combiner worklist.
7795      DCI.CombineTo(N, Res, false);
7796      return SDValue();
7797    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
7798               (~Mask == Mask2)) {
7799      // The pack halfword instruction works better for masks that fit it,
7800      // so use that when it's available.
7801      if (Subtarget->hasT2ExtractPack() &&
7802          (Mask2 == 0xffff || Mask2 == 0xffff0000))
7803        return SDValue();
7804      // 2b
7805      unsigned lsb = CountTrailingZeros_32(Mask);
7806      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
7807                        DAG.getConstant(lsb, MVT::i32));
7808      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
7809                        DAG.getConstant(Mask2, MVT::i32));
7810      // Do not add new nodes to DAG combiner worklist.
7811      DCI.CombineTo(N, Res, false);
7812      return SDValue();
7813    }
7814  }
7815
7816  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
7817      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
7818      ARM::isBitFieldInvertedMask(~Mask)) {
7819    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
7820    // where lsb(mask) == #shamt and masked bits of B are known zero.
7821    SDValue ShAmt = N00.getOperand(1);
7822    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7823    unsigned LSB = CountTrailingZeros_32(Mask);
7824    if (ShAmtC != LSB)
7825      return SDValue();
7826
7827    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
7828                      DAG.getConstant(~Mask, MVT::i32));
7829
7830    // Do not add new nodes to DAG combiner worklist.
7831    DCI.CombineTo(N, Res, false);
7832  }
7833
7834  return SDValue();
7835}
7836
7837static SDValue PerformXORCombine(SDNode *N,
7838                                 TargetLowering::DAGCombinerInfo &DCI,
7839                                 const ARMSubtarget *Subtarget) {
7840  EVT VT = N->getValueType(0);
7841  SelectionDAG &DAG = DCI.DAG;
7842
7843  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7844    return SDValue();
7845
7846  if (!Subtarget->isThumb1Only()) {
7847    // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
7848    SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
7849    if (Result.getNode())
7850      return Result;
7851  }
7852
7853  return SDValue();
7854}
7855
7856/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
7857/// the bits being cleared by the AND are not demanded by the BFI.
7858static SDValue PerformBFICombine(SDNode *N,
7859                                 TargetLowering::DAGCombinerInfo &DCI) {
7860  SDValue N1 = N->getOperand(1);
7861  if (N1.getOpcode() == ISD::AND) {
7862    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7863    if (!N11C)
7864      return SDValue();
7865    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
7866    unsigned LSB = CountTrailingZeros_32(~InvMask);
7867    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
7868    unsigned Mask = (1 << Width)-1;
7869    unsigned Mask2 = N11C->getZExtValue();
7870    if ((Mask & (~Mask2)) == 0)
7871      return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
7872                             N->getOperand(0), N1.getOperand(0),
7873                             N->getOperand(2));
7874  }
7875  return SDValue();
7876}
7877
7878/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
7879/// ARMISD::VMOVRRD.
7880static SDValue PerformVMOVRRDCombine(SDNode *N,
7881                                     TargetLowering::DAGCombinerInfo &DCI) {
7882  // vmovrrd(vmovdrr x, y) -> x,y
7883  SDValue InDouble = N->getOperand(0);
7884  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
7885    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
7886
7887  // vmovrrd(load f64) -> (load i32), (load i32)
7888  SDNode *InNode = InDouble.getNode();
7889  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
7890      InNode->getValueType(0) == MVT::f64 &&
7891      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
7892      !cast<LoadSDNode>(InNode)->isVolatile()) {
7893    // TODO: Should this be done for non-FrameIndex operands?
7894    LoadSDNode *LD = cast<LoadSDNode>(InNode);
7895
7896    SelectionDAG &DAG = DCI.DAG;
7897    DebugLoc DL = LD->getDebugLoc();
7898    SDValue BasePtr = LD->getBasePtr();
7899    SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
7900                                 LD->getPointerInfo(), LD->isVolatile(),
7901                                 LD->isNonTemporal(), LD->isInvariant(),
7902                                 LD->getAlignment());
7903
7904    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
7905                                    DAG.getConstant(4, MVT::i32));
7906    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
7907                                 LD->getPointerInfo(), LD->isVolatile(),
7908                                 LD->isNonTemporal(), LD->isInvariant(),
7909                                 std::min(4U, LD->getAlignment() / 2));
7910
7911    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
7912    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
7913    DCI.RemoveFromWorklist(LD);
7914    DAG.DeleteNode(LD);
7915    return Result;
7916  }
7917
7918  return SDValue();
7919}
7920
7921/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
7922/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
7923static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
7924  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
7925  SDValue Op0 = N->getOperand(0);
7926  SDValue Op1 = N->getOperand(1);
7927  if (Op0.getOpcode() == ISD::BITCAST)
7928    Op0 = Op0.getOperand(0);
7929  if (Op1.getOpcode() == ISD::BITCAST)
7930    Op1 = Op1.getOperand(0);
7931  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
7932      Op0.getNode() == Op1.getNode() &&
7933      Op0.getResNo() == 0 && Op1.getResNo() == 1)
7934    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
7935                       N->getValueType(0), Op0.getOperand(0));
7936  return SDValue();
7937}
7938
7939/// PerformSTORECombine - Target-specific dag combine xforms for
7940/// ISD::STORE.
7941static SDValue PerformSTORECombine(SDNode *N,
7942                                   TargetLowering::DAGCombinerInfo &DCI) {
7943  StoreSDNode *St = cast<StoreSDNode>(N);
7944  if (St->isVolatile())
7945    return SDValue();
7946
7947  // Optimize trunc store (of multiple scalars) to shuffle and store.  First,
7948  // pack all of the elements in one place.  Next, store to memory in fewer
7949  // chunks.
7950  SDValue StVal = St->getValue();
7951  EVT VT = StVal.getValueType();
7952  if (St->isTruncatingStore() && VT.isVector()) {
7953    SelectionDAG &DAG = DCI.DAG;
7954    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7955    EVT StVT = St->getMemoryVT();
7956    unsigned NumElems = VT.getVectorNumElements();
7957    assert(StVT != VT && "Cannot truncate to the same type");
7958    unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
7959    unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
7960
7961    // From, To sizes and ElemCount must be pow of two
7962    if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
7963
7964    // We are going to use the original vector elt for storing.
7965    // Accumulated smaller vector elements must be a multiple of the store size.
7966    if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
7967
7968    unsigned SizeRatio  = FromEltSz / ToEltSz;
7969    assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
7970
7971    // Create a type on which we perform the shuffle.
7972    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
7973                                     NumElems*SizeRatio);
7974    assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
7975
7976    DebugLoc DL = St->getDebugLoc();
7977    SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
7978    SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
7979    for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
7980
7981    // Can't shuffle using an illegal type.
7982    if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
7983
7984    SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
7985                                DAG.getUNDEF(WideVec.getValueType()),
7986                                ShuffleVec.data());
7987    // At this point all of the data is stored at the bottom of the
7988    // register. We now need to save it to mem.
7989
7990    // Find the largest store unit
7991    MVT StoreType = MVT::i8;
7992    for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
7993         tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
7994      MVT Tp = (MVT::SimpleValueType)tp;
7995      if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
7996        StoreType = Tp;
7997    }
7998    // Didn't find a legal store type.
7999    if (!TLI.isTypeLegal(StoreType))
8000      return SDValue();
8001
8002    // Bitcast the original vector into a vector of store-size units
8003    EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
8004            StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
8005    assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
8006    SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
8007    SmallVector<SDValue, 8> Chains;
8008    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
8009                                        TLI.getPointerTy());
8010    SDValue BasePtr = St->getBasePtr();
8011
8012    // Perform one or more big stores into memory.
8013    unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
8014    for (unsigned I = 0; I < E; I++) {
8015      SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
8016                                   StoreType, ShuffWide,
8017                                   DAG.getIntPtrConstant(I));
8018      SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
8019                                St->getPointerInfo(), St->isVolatile(),
8020                                St->isNonTemporal(), St->getAlignment());
8021      BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8022                            Increment);
8023      Chains.push_back(Ch);
8024    }
8025    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
8026                       Chains.size());
8027  }
8028
8029  if (!ISD::isNormalStore(St))
8030    return SDValue();
8031
8032  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
8033  // ARM stores of arguments in the same cache line.
8034  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
8035      StVal.getNode()->hasOneUse()) {
8036    SelectionDAG  &DAG = DCI.DAG;
8037    DebugLoc DL = St->getDebugLoc();
8038    SDValue BasePtr = St->getBasePtr();
8039    SDValue NewST1 = DAG.getStore(St->getChain(), DL,
8040                                  StVal.getNode()->getOperand(0), BasePtr,
8041                                  St->getPointerInfo(), St->isVolatile(),
8042                                  St->isNonTemporal(), St->getAlignment());
8043
8044    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
8045                                    DAG.getConstant(4, MVT::i32));
8046    return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
8047                        OffsetPtr, St->getPointerInfo(), St->isVolatile(),
8048                        St->isNonTemporal(),
8049                        std::min(4U, St->getAlignment() / 2));
8050  }
8051
8052  if (StVal.getValueType() != MVT::i64 ||
8053      StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
8054    return SDValue();
8055
8056  // Bitcast an i64 store extracted from a vector to f64.
8057  // Otherwise, the i64 value will be legalized to a pair of i32 values.
8058  SelectionDAG &DAG = DCI.DAG;
8059  DebugLoc dl = StVal.getDebugLoc();
8060  SDValue IntVec = StVal.getOperand(0);
8061  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
8062                                 IntVec.getValueType().getVectorNumElements());
8063  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
8064  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
8065                               Vec, StVal.getOperand(1));
8066  dl = N->getDebugLoc();
8067  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
8068  // Make the DAGCombiner fold the bitcasts.
8069  DCI.AddToWorklist(Vec.getNode());
8070  DCI.AddToWorklist(ExtElt.getNode());
8071  DCI.AddToWorklist(V.getNode());
8072  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
8073                      St->getPointerInfo(), St->isVolatile(),
8074                      St->isNonTemporal(), St->getAlignment(),
8075                      St->getTBAAInfo());
8076}
8077
8078/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
8079/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
8080/// i64 vector to have f64 elements, since the value can then be loaded
8081/// directly into a VFP register.
8082static bool hasNormalLoadOperand(SDNode *N) {
8083  unsigned NumElts = N->getValueType(0).getVectorNumElements();
8084  for (unsigned i = 0; i < NumElts; ++i) {
8085    SDNode *Elt = N->getOperand(i).getNode();
8086    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
8087      return true;
8088  }
8089  return false;
8090}
8091
8092/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
8093/// ISD::BUILD_VECTOR.
8094static SDValue PerformBUILD_VECTORCombine(SDNode *N,
8095                                          TargetLowering::DAGCombinerInfo &DCI){
8096  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
8097  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
8098  // into a pair of GPRs, which is fine when the value is used as a scalar,
8099  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
8100  SelectionDAG &DAG = DCI.DAG;
8101  if (N->getNumOperands() == 2) {
8102    SDValue RV = PerformVMOVDRRCombine(N, DAG);
8103    if (RV.getNode())
8104      return RV;
8105  }
8106
8107  // Load i64 elements as f64 values so that type legalization does not split
8108  // them up into i32 values.
8109  EVT VT = N->getValueType(0);
8110  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
8111    return SDValue();
8112  DebugLoc dl = N->getDebugLoc();
8113  SmallVector<SDValue, 8> Ops;
8114  unsigned NumElts = VT.getVectorNumElements();
8115  for (unsigned i = 0; i < NumElts; ++i) {
8116    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
8117    Ops.push_back(V);
8118    // Make the DAGCombiner fold the bitcast.
8119    DCI.AddToWorklist(V.getNode());
8120  }
8121  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
8122  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
8123  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
8124}
8125
8126/// PerformInsertEltCombine - Target-specific dag combine xforms for
8127/// ISD::INSERT_VECTOR_ELT.
8128static SDValue PerformInsertEltCombine(SDNode *N,
8129                                       TargetLowering::DAGCombinerInfo &DCI) {
8130  // Bitcast an i64 load inserted into a vector to f64.
8131  // Otherwise, the i64 value will be legalized to a pair of i32 values.
8132  EVT VT = N->getValueType(0);
8133  SDNode *Elt = N->getOperand(1).getNode();
8134  if (VT.getVectorElementType() != MVT::i64 ||
8135      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
8136    return SDValue();
8137
8138  SelectionDAG &DAG = DCI.DAG;
8139  DebugLoc dl = N->getDebugLoc();
8140  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
8141                                 VT.getVectorNumElements());
8142  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
8143  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
8144  // Make the DAGCombiner fold the bitcasts.
8145  DCI.AddToWorklist(Vec.getNode());
8146  DCI.AddToWorklist(V.getNode());
8147  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
8148                               Vec, V, N->getOperand(2));
8149  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
8150}
8151
8152/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
8153/// ISD::VECTOR_SHUFFLE.
8154static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
8155  // The LLVM shufflevector instruction does not require the shuffle mask
8156  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
8157  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
8158  // operands do not match the mask length, they are extended by concatenating
8159  // them with undef vectors.  That is probably the right thing for other
8160  // targets, but for NEON it is better to concatenate two double-register
8161  // size vector operands into a single quad-register size vector.  Do that
8162  // transformation here:
8163  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
8164  //   shuffle(concat(v1, v2), undef)
8165  SDValue Op0 = N->getOperand(0);
8166  SDValue Op1 = N->getOperand(1);
8167  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
8168      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
8169      Op0.getNumOperands() != 2 ||
8170      Op1.getNumOperands() != 2)
8171    return SDValue();
8172  SDValue Concat0Op1 = Op0.getOperand(1);
8173  SDValue Concat1Op1 = Op1.getOperand(1);
8174  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
8175      Concat1Op1.getOpcode() != ISD::UNDEF)
8176    return SDValue();
8177  // Skip the transformation if any of the types are illegal.
8178  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8179  EVT VT = N->getValueType(0);
8180  if (!TLI.isTypeLegal(VT) ||
8181      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
8182      !TLI.isTypeLegal(Concat1Op1.getValueType()))
8183    return SDValue();
8184
8185  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
8186                                  Op0.getOperand(0), Op1.getOperand(0));
8187  // Translate the shuffle mask.
8188  SmallVector<int, 16> NewMask;
8189  unsigned NumElts = VT.getVectorNumElements();
8190  unsigned HalfElts = NumElts/2;
8191  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8192  for (unsigned n = 0; n < NumElts; ++n) {
8193    int MaskElt = SVN->getMaskElt(n);
8194    int NewElt = -1;
8195    if (MaskElt < (int)HalfElts)
8196      NewElt = MaskElt;
8197    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
8198      NewElt = HalfElts + MaskElt - NumElts;
8199    NewMask.push_back(NewElt);
8200  }
8201  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
8202                              DAG.getUNDEF(VT), NewMask.data());
8203}
8204
8205/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
8206/// NEON load/store intrinsics to merge base address updates.
8207static SDValue CombineBaseUpdate(SDNode *N,
8208                                 TargetLowering::DAGCombinerInfo &DCI) {
8209  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
8210    return SDValue();
8211
8212  SelectionDAG &DAG = DCI.DAG;
8213  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
8214                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
8215  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
8216  SDValue Addr = N->getOperand(AddrOpIdx);
8217
8218  // Search for a use of the address operand that is an increment.
8219  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
8220         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
8221    SDNode *User = *UI;
8222    if (User->getOpcode() != ISD::ADD ||
8223        UI.getUse().getResNo() != Addr.getResNo())
8224      continue;
8225
8226    // Check that the add is independent of the load/store.  Otherwise, folding
8227    // it would create a cycle.
8228    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
8229      continue;
8230
8231    // Find the new opcode for the updating load/store.
8232    bool isLoad = true;
8233    bool isLaneOp = false;
8234    unsigned NewOpc = 0;
8235    unsigned NumVecs = 0;
8236    if (isIntrinsic) {
8237      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8238      switch (IntNo) {
8239      default: llvm_unreachable("unexpected intrinsic for Neon base update");
8240      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
8241        NumVecs = 1; break;
8242      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
8243        NumVecs = 2; break;
8244      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
8245        NumVecs = 3; break;
8246      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
8247        NumVecs = 4; break;
8248      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
8249        NumVecs = 2; isLaneOp = true; break;
8250      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
8251        NumVecs = 3; isLaneOp = true; break;
8252      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
8253        NumVecs = 4; isLaneOp = true; break;
8254      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
8255        NumVecs = 1; isLoad = false; break;
8256      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
8257        NumVecs = 2; isLoad = false; break;
8258      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
8259        NumVecs = 3; isLoad = false; break;
8260      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
8261        NumVecs = 4; isLoad = false; break;
8262      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
8263        NumVecs = 2; isLoad = false; isLaneOp = true; break;
8264      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
8265        NumVecs = 3; isLoad = false; isLaneOp = true; break;
8266      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
8267        NumVecs = 4; isLoad = false; isLaneOp = true; break;
8268      }
8269    } else {
8270      isLaneOp = true;
8271      switch (N->getOpcode()) {
8272      default: llvm_unreachable("unexpected opcode for Neon base update");
8273      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
8274      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
8275      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
8276      }
8277    }
8278
8279    // Find the size of memory referenced by the load/store.
8280    EVT VecTy;
8281    if (isLoad)
8282      VecTy = N->getValueType(0);
8283    else
8284      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
8285    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
8286    if (isLaneOp)
8287      NumBytes /= VecTy.getVectorNumElements();
8288
8289    // If the increment is a constant, it must match the memory ref size.
8290    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
8291    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
8292      uint64_t IncVal = CInc->getZExtValue();
8293      if (IncVal != NumBytes)
8294        continue;
8295    } else if (NumBytes >= 3 * 16) {
8296      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
8297      // separate instructions that make it harder to use a non-constant update.
8298      continue;
8299    }
8300
8301    // Create the new updating load/store node.
8302    EVT Tys[6];
8303    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
8304    unsigned n;
8305    for (n = 0; n < NumResultVecs; ++n)
8306      Tys[n] = VecTy;
8307    Tys[n++] = MVT::i32;
8308    Tys[n] = MVT::Other;
8309    SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
8310    SmallVector<SDValue, 8> Ops;
8311    Ops.push_back(N->getOperand(0)); // incoming chain
8312    Ops.push_back(N->getOperand(AddrOpIdx));
8313    Ops.push_back(Inc);
8314    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
8315      Ops.push_back(N->getOperand(i));
8316    }
8317    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
8318    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
8319                                           Ops.data(), Ops.size(),
8320                                           MemInt->getMemoryVT(),
8321                                           MemInt->getMemOperand());
8322
8323    // Update the uses.
8324    std::vector<SDValue> NewResults;
8325    for (unsigned i = 0; i < NumResultVecs; ++i) {
8326      NewResults.push_back(SDValue(UpdN.getNode(), i));
8327    }
8328    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
8329    DCI.CombineTo(N, NewResults);
8330    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
8331
8332    break;
8333  }
8334  return SDValue();
8335}
8336
8337/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
8338/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
8339/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
8340/// return true.
8341static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
8342  SelectionDAG &DAG = DCI.DAG;
8343  EVT VT = N->getValueType(0);
8344  // vldN-dup instructions only support 64-bit vectors for N > 1.
8345  if (!VT.is64BitVector())
8346    return false;
8347
8348  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
8349  SDNode *VLD = N->getOperand(0).getNode();
8350  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
8351    return false;
8352  unsigned NumVecs = 0;
8353  unsigned NewOpc = 0;
8354  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
8355  if (IntNo == Intrinsic::arm_neon_vld2lane) {
8356    NumVecs = 2;
8357    NewOpc = ARMISD::VLD2DUP;
8358  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
8359    NumVecs = 3;
8360    NewOpc = ARMISD::VLD3DUP;
8361  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
8362    NumVecs = 4;
8363    NewOpc = ARMISD::VLD4DUP;
8364  } else {
8365    return false;
8366  }
8367
8368  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
8369  // numbers match the load.
8370  unsigned VLDLaneNo =
8371    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
8372  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
8373       UI != UE; ++UI) {
8374    // Ignore uses of the chain result.
8375    if (UI.getUse().getResNo() == NumVecs)
8376      continue;
8377    SDNode *User = *UI;
8378    if (User->getOpcode() != ARMISD::VDUPLANE ||
8379        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
8380      return false;
8381  }
8382
8383  // Create the vldN-dup node.
8384  EVT Tys[5];
8385  unsigned n;
8386  for (n = 0; n < NumVecs; ++n)
8387    Tys[n] = VT;
8388  Tys[n] = MVT::Other;
8389  SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
8390  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
8391  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
8392  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
8393                                           Ops, 2, VLDMemInt->getMemoryVT(),
8394                                           VLDMemInt->getMemOperand());
8395
8396  // Update the uses.
8397  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
8398       UI != UE; ++UI) {
8399    unsigned ResNo = UI.getUse().getResNo();
8400    // Ignore uses of the chain result.
8401    if (ResNo == NumVecs)
8402      continue;
8403    SDNode *User = *UI;
8404    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
8405  }
8406
8407  // Now the vldN-lane intrinsic is dead except for its chain result.
8408  // Update uses of the chain.
8409  std::vector<SDValue> VLDDupResults;
8410  for (unsigned n = 0; n < NumVecs; ++n)
8411    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
8412  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
8413  DCI.CombineTo(VLD, VLDDupResults);
8414
8415  return true;
8416}
8417
8418/// PerformVDUPLANECombine - Target-specific dag combine xforms for
8419/// ARMISD::VDUPLANE.
8420static SDValue PerformVDUPLANECombine(SDNode *N,
8421                                      TargetLowering::DAGCombinerInfo &DCI) {
8422  SDValue Op = N->getOperand(0);
8423
8424  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
8425  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
8426  if (CombineVLDDUP(N, DCI))
8427    return SDValue(N, 0);
8428
8429  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
8430  // redundant.  Ignore bit_converts for now; element sizes are checked below.
8431  while (Op.getOpcode() == ISD::BITCAST)
8432    Op = Op.getOperand(0);
8433  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
8434    return SDValue();
8435
8436  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
8437  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
8438  // The canonical VMOV for a zero vector uses a 32-bit element size.
8439  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8440  unsigned EltBits;
8441  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
8442    EltSize = 8;
8443  EVT VT = N->getValueType(0);
8444  if (EltSize > VT.getVectorElementType().getSizeInBits())
8445    return SDValue();
8446
8447  return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
8448}
8449
8450// isConstVecPow2 - Return true if each vector element is a power of 2, all
8451// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
8452static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
8453{
8454  integerPart cN;
8455  integerPart c0 = 0;
8456  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
8457       I != E; I++) {
8458    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
8459    if (!C)
8460      return false;
8461
8462    bool isExact;
8463    APFloat APF = C->getValueAPF();
8464    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
8465        != APFloat::opOK || !isExact)
8466      return false;
8467
8468    c0 = (I == 0) ? cN : c0;
8469    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
8470      return false;
8471  }
8472  C = c0;
8473  return true;
8474}
8475
8476/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
8477/// can replace combinations of VMUL and VCVT (floating-point to integer)
8478/// when the VMUL has a constant operand that is a power of 2.
8479///
8480/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
8481///  vmul.f32        d16, d17, d16
8482///  vcvt.s32.f32    d16, d16
8483/// becomes:
8484///  vcvt.s32.f32    d16, d16, #3
8485static SDValue PerformVCVTCombine(SDNode *N,
8486                                  TargetLowering::DAGCombinerInfo &DCI,
8487                                  const ARMSubtarget *Subtarget) {
8488  SelectionDAG &DAG = DCI.DAG;
8489  SDValue Op = N->getOperand(0);
8490
8491  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
8492      Op.getOpcode() != ISD::FMUL)
8493    return SDValue();
8494
8495  uint64_t C;
8496  SDValue N0 = Op->getOperand(0);
8497  SDValue ConstVec = Op->getOperand(1);
8498  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
8499
8500  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
8501      !isConstVecPow2(ConstVec, isSigned, C))
8502    return SDValue();
8503
8504  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
8505    Intrinsic::arm_neon_vcvtfp2fxu;
8506  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
8507                     N->getValueType(0),
8508                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
8509                     DAG.getConstant(Log2_64(C), MVT::i32));
8510}
8511
8512/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
8513/// can replace combinations of VCVT (integer to floating-point) and VDIV
8514/// when the VDIV has a constant operand that is a power of 2.
8515///
8516/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
8517///  vcvt.f32.s32    d16, d16
8518///  vdiv.f32        d16, d17, d16
8519/// becomes:
8520///  vcvt.f32.s32    d16, d16, #3
8521static SDValue PerformVDIVCombine(SDNode *N,
8522                                  TargetLowering::DAGCombinerInfo &DCI,
8523                                  const ARMSubtarget *Subtarget) {
8524  SelectionDAG &DAG = DCI.DAG;
8525  SDValue Op = N->getOperand(0);
8526  unsigned OpOpcode = Op.getNode()->getOpcode();
8527
8528  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
8529      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
8530    return SDValue();
8531
8532  uint64_t C;
8533  SDValue ConstVec = N->getOperand(1);
8534  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
8535
8536  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
8537      !isConstVecPow2(ConstVec, isSigned, C))
8538    return SDValue();
8539
8540  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
8541    Intrinsic::arm_neon_vcvtfxu2fp;
8542  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
8543                     Op.getValueType(),
8544                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
8545                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
8546}
8547
8548/// Getvshiftimm - Check if this is a valid build_vector for the immediate
8549/// operand of a vector shift operation, where all the elements of the
8550/// build_vector must have the same constant integer value.
8551static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
8552  // Ignore bit_converts.
8553  while (Op.getOpcode() == ISD::BITCAST)
8554    Op = Op.getOperand(0);
8555  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8556  APInt SplatBits, SplatUndef;
8557  unsigned SplatBitSize;
8558  bool HasAnyUndefs;
8559  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
8560                                      HasAnyUndefs, ElementBits) ||
8561      SplatBitSize > ElementBits)
8562    return false;
8563  Cnt = SplatBits.getSExtValue();
8564  return true;
8565}
8566
8567/// isVShiftLImm - Check if this is a valid build_vector for the immediate
8568/// operand of a vector shift left operation.  That value must be in the range:
8569///   0 <= Value < ElementBits for a left shift; or
8570///   0 <= Value <= ElementBits for a long left shift.
8571static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
8572  assert(VT.isVector() && "vector shift count is not a vector type");
8573  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
8574  if (! getVShiftImm(Op, ElementBits, Cnt))
8575    return false;
8576  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
8577}
8578
8579/// isVShiftRImm - Check if this is a valid build_vector for the immediate
8580/// operand of a vector shift right operation.  For a shift opcode, the value
8581/// is positive, but for an intrinsic the value count must be negative. The
8582/// absolute value must be in the range:
8583///   1 <= |Value| <= ElementBits for a right shift; or
8584///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
8585static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
8586                         int64_t &Cnt) {
8587  assert(VT.isVector() && "vector shift count is not a vector type");
8588  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
8589  if (! getVShiftImm(Op, ElementBits, Cnt))
8590    return false;
8591  if (isIntrinsic)
8592    Cnt = -Cnt;
8593  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
8594}
8595
8596/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
8597static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
8598  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
8599  switch (IntNo) {
8600  default:
8601    // Don't do anything for most intrinsics.
8602    break;
8603
8604  // Vector shifts: check for immediate versions and lower them.
8605  // Note: This is done during DAG combining instead of DAG legalizing because
8606  // the build_vectors for 64-bit vector element shift counts are generally
8607  // not legal, and it is hard to see their values after they get legalized to
8608  // loads from a constant pool.
8609  case Intrinsic::arm_neon_vshifts:
8610  case Intrinsic::arm_neon_vshiftu:
8611  case Intrinsic::arm_neon_vshiftls:
8612  case Intrinsic::arm_neon_vshiftlu:
8613  case Intrinsic::arm_neon_vshiftn:
8614  case Intrinsic::arm_neon_vrshifts:
8615  case Intrinsic::arm_neon_vrshiftu:
8616  case Intrinsic::arm_neon_vrshiftn:
8617  case Intrinsic::arm_neon_vqshifts:
8618  case Intrinsic::arm_neon_vqshiftu:
8619  case Intrinsic::arm_neon_vqshiftsu:
8620  case Intrinsic::arm_neon_vqshiftns:
8621  case Intrinsic::arm_neon_vqshiftnu:
8622  case Intrinsic::arm_neon_vqshiftnsu:
8623  case Intrinsic::arm_neon_vqrshiftns:
8624  case Intrinsic::arm_neon_vqrshiftnu:
8625  case Intrinsic::arm_neon_vqrshiftnsu: {
8626    EVT VT = N->getOperand(1).getValueType();
8627    int64_t Cnt;
8628    unsigned VShiftOpc = 0;
8629
8630    switch (IntNo) {
8631    case Intrinsic::arm_neon_vshifts:
8632    case Intrinsic::arm_neon_vshiftu:
8633      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
8634        VShiftOpc = ARMISD::VSHL;
8635        break;
8636      }
8637      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
8638        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
8639                     ARMISD::VSHRs : ARMISD::VSHRu);
8640        break;
8641      }
8642      return SDValue();
8643
8644    case Intrinsic::arm_neon_vshiftls:
8645    case Intrinsic::arm_neon_vshiftlu:
8646      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
8647        break;
8648      llvm_unreachable("invalid shift count for vshll intrinsic");
8649
8650    case Intrinsic::arm_neon_vrshifts:
8651    case Intrinsic::arm_neon_vrshiftu:
8652      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
8653        break;
8654      return SDValue();
8655
8656    case Intrinsic::arm_neon_vqshifts:
8657    case Intrinsic::arm_neon_vqshiftu:
8658      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
8659        break;
8660      return SDValue();
8661
8662    case Intrinsic::arm_neon_vqshiftsu:
8663      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
8664        break;
8665      llvm_unreachable("invalid shift count for vqshlu intrinsic");
8666
8667    case Intrinsic::arm_neon_vshiftn:
8668    case Intrinsic::arm_neon_vrshiftn:
8669    case Intrinsic::arm_neon_vqshiftns:
8670    case Intrinsic::arm_neon_vqshiftnu:
8671    case Intrinsic::arm_neon_vqshiftnsu:
8672    case Intrinsic::arm_neon_vqrshiftns:
8673    case Intrinsic::arm_neon_vqrshiftnu:
8674    case Intrinsic::arm_neon_vqrshiftnsu:
8675      // Narrowing shifts require an immediate right shift.
8676      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
8677        break;
8678      llvm_unreachable("invalid shift count for narrowing vector shift "
8679                       "intrinsic");
8680
8681    default:
8682      llvm_unreachable("unhandled vector shift");
8683    }
8684
8685    switch (IntNo) {
8686    case Intrinsic::arm_neon_vshifts:
8687    case Intrinsic::arm_neon_vshiftu:
8688      // Opcode already set above.
8689      break;
8690    case Intrinsic::arm_neon_vshiftls:
8691    case Intrinsic::arm_neon_vshiftlu:
8692      if (Cnt == VT.getVectorElementType().getSizeInBits())
8693        VShiftOpc = ARMISD::VSHLLi;
8694      else
8695        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
8696                     ARMISD::VSHLLs : ARMISD::VSHLLu);
8697      break;
8698    case Intrinsic::arm_neon_vshiftn:
8699      VShiftOpc = ARMISD::VSHRN; break;
8700    case Intrinsic::arm_neon_vrshifts:
8701      VShiftOpc = ARMISD::VRSHRs; break;
8702    case Intrinsic::arm_neon_vrshiftu:
8703      VShiftOpc = ARMISD::VRSHRu; break;
8704    case Intrinsic::arm_neon_vrshiftn:
8705      VShiftOpc = ARMISD::VRSHRN; break;
8706    case Intrinsic::arm_neon_vqshifts:
8707      VShiftOpc = ARMISD::VQSHLs; break;
8708    case Intrinsic::arm_neon_vqshiftu:
8709      VShiftOpc = ARMISD::VQSHLu; break;
8710    case Intrinsic::arm_neon_vqshiftsu:
8711      VShiftOpc = ARMISD::VQSHLsu; break;
8712    case Intrinsic::arm_neon_vqshiftns:
8713      VShiftOpc = ARMISD::VQSHRNs; break;
8714    case Intrinsic::arm_neon_vqshiftnu:
8715      VShiftOpc = ARMISD::VQSHRNu; break;
8716    case Intrinsic::arm_neon_vqshiftnsu:
8717      VShiftOpc = ARMISD::VQSHRNsu; break;
8718    case Intrinsic::arm_neon_vqrshiftns:
8719      VShiftOpc = ARMISD::VQRSHRNs; break;
8720    case Intrinsic::arm_neon_vqrshiftnu:
8721      VShiftOpc = ARMISD::VQRSHRNu; break;
8722    case Intrinsic::arm_neon_vqrshiftnsu:
8723      VShiftOpc = ARMISD::VQRSHRNsu; break;
8724    }
8725
8726    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
8727                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
8728  }
8729
8730  case Intrinsic::arm_neon_vshiftins: {
8731    EVT VT = N->getOperand(1).getValueType();
8732    int64_t Cnt;
8733    unsigned VShiftOpc = 0;
8734
8735    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
8736      VShiftOpc = ARMISD::VSLI;
8737    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
8738      VShiftOpc = ARMISD::VSRI;
8739    else {
8740      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
8741    }
8742
8743    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
8744                       N->getOperand(1), N->getOperand(2),
8745                       DAG.getConstant(Cnt, MVT::i32));
8746  }
8747
8748  case Intrinsic::arm_neon_vqrshifts:
8749  case Intrinsic::arm_neon_vqrshiftu:
8750    // No immediate versions of these to check for.
8751    break;
8752  }
8753
8754  return SDValue();
8755}
8756
8757/// PerformShiftCombine - Checks for immediate versions of vector shifts and
8758/// lowers them.  As with the vector shift intrinsics, this is done during DAG
8759/// combining instead of DAG legalizing because the build_vectors for 64-bit
8760/// vector element shift counts are generally not legal, and it is hard to see
8761/// their values after they get legalized to loads from a constant pool.
8762static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
8763                                   const ARMSubtarget *ST) {
8764  EVT VT = N->getValueType(0);
8765  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
8766    // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
8767    // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
8768    SDValue N1 = N->getOperand(1);
8769    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
8770      SDValue N0 = N->getOperand(0);
8771      if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
8772          DAG.MaskedValueIsZero(N0.getOperand(0),
8773                                APInt::getHighBitsSet(32, 16)))
8774        return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
8775    }
8776  }
8777
8778  // Nothing to be done for scalar shifts.
8779  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8780  if (!VT.isVector() || !TLI.isTypeLegal(VT))
8781    return SDValue();
8782
8783  assert(ST->hasNEON() && "unexpected vector shift");
8784  int64_t Cnt;
8785
8786  switch (N->getOpcode()) {
8787  default: llvm_unreachable("unexpected shift opcode");
8788
8789  case ISD::SHL:
8790    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
8791      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
8792                         DAG.getConstant(Cnt, MVT::i32));
8793    break;
8794
8795  case ISD::SRA:
8796  case ISD::SRL:
8797    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
8798      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
8799                            ARMISD::VSHRs : ARMISD::VSHRu);
8800      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
8801                         DAG.getConstant(Cnt, MVT::i32));
8802    }
8803  }
8804  return SDValue();
8805}
8806
8807/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
8808/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
8809static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
8810                                    const ARMSubtarget *ST) {
8811  SDValue N0 = N->getOperand(0);
8812
8813  // Check for sign- and zero-extensions of vector extract operations of 8-
8814  // and 16-bit vector elements.  NEON supports these directly.  They are
8815  // handled during DAG combining because type legalization will promote them
8816  // to 32-bit types and it is messy to recognize the operations after that.
8817  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
8818    SDValue Vec = N0.getOperand(0);
8819    SDValue Lane = N0.getOperand(1);
8820    EVT VT = N->getValueType(0);
8821    EVT EltVT = N0.getValueType();
8822    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8823
8824    if (VT == MVT::i32 &&
8825        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
8826        TLI.isTypeLegal(Vec.getValueType()) &&
8827        isa<ConstantSDNode>(Lane)) {
8828
8829      unsigned Opc = 0;
8830      switch (N->getOpcode()) {
8831      default: llvm_unreachable("unexpected opcode");
8832      case ISD::SIGN_EXTEND:
8833        Opc = ARMISD::VGETLANEs;
8834        break;
8835      case ISD::ZERO_EXTEND:
8836      case ISD::ANY_EXTEND:
8837        Opc = ARMISD::VGETLANEu;
8838        break;
8839      }
8840      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
8841    }
8842  }
8843
8844  return SDValue();
8845}
8846
8847/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
8848/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
8849static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
8850                                       const ARMSubtarget *ST) {
8851  // If the target supports NEON, try to use vmax/vmin instructions for f32
8852  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
8853  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
8854  // a NaN; only do the transformation when it matches that behavior.
8855
8856  // For now only do this when using NEON for FP operations; if using VFP, it
8857  // is not obvious that the benefit outweighs the cost of switching to the
8858  // NEON pipeline.
8859  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
8860      N->getValueType(0) != MVT::f32)
8861    return SDValue();
8862
8863  SDValue CondLHS = N->getOperand(0);
8864  SDValue CondRHS = N->getOperand(1);
8865  SDValue LHS = N->getOperand(2);
8866  SDValue RHS = N->getOperand(3);
8867  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
8868
8869  unsigned Opcode = 0;
8870  bool IsReversed;
8871  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
8872    IsReversed = false; // x CC y ? x : y
8873  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
8874    IsReversed = true ; // x CC y ? y : x
8875  } else {
8876    return SDValue();
8877  }
8878
8879  bool IsUnordered;
8880  switch (CC) {
8881  default: break;
8882  case ISD::SETOLT:
8883  case ISD::SETOLE:
8884  case ISD::SETLT:
8885  case ISD::SETLE:
8886  case ISD::SETULT:
8887  case ISD::SETULE:
8888    // If LHS is NaN, an ordered comparison will be false and the result will
8889    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
8890    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
8891    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
8892    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
8893      break;
8894    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
8895    // will return -0, so vmin can only be used for unsafe math or if one of
8896    // the operands is known to be nonzero.
8897    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
8898        !DAG.getTarget().Options.UnsafeFPMath &&
8899        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
8900      break;
8901    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
8902    break;
8903
8904  case ISD::SETOGT:
8905  case ISD::SETOGE:
8906  case ISD::SETGT:
8907  case ISD::SETGE:
8908  case ISD::SETUGT:
8909  case ISD::SETUGE:
8910    // If LHS is NaN, an ordered comparison will be false and the result will
8911    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
8912    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
8913    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
8914    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
8915      break;
8916    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
8917    // will return +0, so vmax can only be used for unsafe math or if one of
8918    // the operands is known to be nonzero.
8919    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
8920        !DAG.getTarget().Options.UnsafeFPMath &&
8921        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
8922      break;
8923    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
8924    break;
8925  }
8926
8927  if (!Opcode)
8928    return SDValue();
8929  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
8930}
8931
8932/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
8933SDValue
8934ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
8935  SDValue Cmp = N->getOperand(4);
8936  if (Cmp.getOpcode() != ARMISD::CMPZ)
8937    // Only looking at EQ and NE cases.
8938    return SDValue();
8939
8940  EVT VT = N->getValueType(0);
8941  DebugLoc dl = N->getDebugLoc();
8942  SDValue LHS = Cmp.getOperand(0);
8943  SDValue RHS = Cmp.getOperand(1);
8944  SDValue FalseVal = N->getOperand(0);
8945  SDValue TrueVal = N->getOperand(1);
8946  SDValue ARMcc = N->getOperand(2);
8947  ARMCC::CondCodes CC =
8948    (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
8949
8950  // Simplify
8951  //   mov     r1, r0
8952  //   cmp     r1, x
8953  //   mov     r0, y
8954  //   moveq   r0, x
8955  // to
8956  //   cmp     r0, x
8957  //   movne   r0, y
8958  //
8959  //   mov     r1, r0
8960  //   cmp     r1, x
8961  //   mov     r0, x
8962  //   movne   r0, y
8963  // to
8964  //   cmp     r0, x
8965  //   movne   r0, y
8966  /// FIXME: Turn this into a target neutral optimization?
8967  SDValue Res;
8968  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
8969    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
8970                      N->getOperand(3), Cmp);
8971  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
8972    SDValue ARMcc;
8973    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
8974    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
8975                      N->getOperand(3), NewCmp);
8976  }
8977
8978  if (Res.getNode()) {
8979    APInt KnownZero, KnownOne;
8980    DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
8981    // Capture demanded bits information that would be otherwise lost.
8982    if (KnownZero == 0xfffffffe)
8983      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8984                        DAG.getValueType(MVT::i1));
8985    else if (KnownZero == 0xffffff00)
8986      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8987                        DAG.getValueType(MVT::i8));
8988    else if (KnownZero == 0xffff0000)
8989      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
8990                        DAG.getValueType(MVT::i16));
8991  }
8992
8993  return Res;
8994}
8995
8996SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
8997                                             DAGCombinerInfo &DCI) const {
8998  switch (N->getOpcode()) {
8999  default: break;
9000  case ISD::ADDC:       return PerformADDCCombine(N, DCI, Subtarget);
9001  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
9002  case ISD::SUB:        return PerformSUBCombine(N, DCI);
9003  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
9004  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
9005  case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
9006  case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
9007  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
9008  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
9009  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
9010  case ISD::STORE:      return PerformSTORECombine(N, DCI);
9011  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
9012  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
9013  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
9014  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
9015  case ISD::FP_TO_SINT:
9016  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
9017  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
9018  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
9019  case ISD::SHL:
9020  case ISD::SRA:
9021  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
9022  case ISD::SIGN_EXTEND:
9023  case ISD::ZERO_EXTEND:
9024  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
9025  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
9026  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
9027  case ARMISD::VLD2DUP:
9028  case ARMISD::VLD3DUP:
9029  case ARMISD::VLD4DUP:
9030    return CombineBaseUpdate(N, DCI);
9031  case ISD::INTRINSIC_VOID:
9032  case ISD::INTRINSIC_W_CHAIN:
9033    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9034    case Intrinsic::arm_neon_vld1:
9035    case Intrinsic::arm_neon_vld2:
9036    case Intrinsic::arm_neon_vld3:
9037    case Intrinsic::arm_neon_vld4:
9038    case Intrinsic::arm_neon_vld2lane:
9039    case Intrinsic::arm_neon_vld3lane:
9040    case Intrinsic::arm_neon_vld4lane:
9041    case Intrinsic::arm_neon_vst1:
9042    case Intrinsic::arm_neon_vst2:
9043    case Intrinsic::arm_neon_vst3:
9044    case Intrinsic::arm_neon_vst4:
9045    case Intrinsic::arm_neon_vst2lane:
9046    case Intrinsic::arm_neon_vst3lane:
9047    case Intrinsic::arm_neon_vst4lane:
9048      return CombineBaseUpdate(N, DCI);
9049    default: break;
9050    }
9051    break;
9052  }
9053  return SDValue();
9054}
9055
9056bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
9057                                                          EVT VT) const {
9058  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
9059}
9060
9061bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
9062  // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
9063  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
9064
9065  switch (VT.getSimpleVT().SimpleTy) {
9066  default:
9067    return false;
9068  case MVT::i8:
9069  case MVT::i16:
9070  case MVT::i32:
9071    // Unaligned access can use (for example) LRDB, LRDH, LDR
9072    return AllowsUnaligned;
9073  case MVT::f64:
9074  case MVT::v2f64:
9075    // For any little-endian targets with neon, we can support unaligned ld/st
9076    // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
9077    // A big-endian target may also explictly support unaligned accesses
9078    return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
9079  }
9080}
9081
9082static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
9083                       unsigned AlignCheck) {
9084  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
9085          (DstAlign == 0 || DstAlign % AlignCheck == 0));
9086}
9087
9088EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
9089                                           unsigned DstAlign, unsigned SrcAlign,
9090                                           bool IsZeroVal,
9091                                           bool MemcpyStrSrc,
9092                                           MachineFunction &MF) const {
9093  const Function *F = MF.getFunction();
9094
9095  // See if we can use NEON instructions for this...
9096  if (IsZeroVal &&
9097      !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) &&
9098      Subtarget->hasNEON()) {
9099    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
9100      return MVT::v4i32;
9101    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
9102      return MVT::v2i32;
9103    }
9104  }
9105
9106  // Lowering to i32/i16 if the size permits.
9107  if (Size >= 4) {
9108    return MVT::i32;
9109  } else if (Size >= 2) {
9110    return MVT::i16;
9111  }
9112
9113  // Let the target-independent logic figure it out.
9114  return MVT::Other;
9115}
9116
9117static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
9118  if (V < 0)
9119    return false;
9120
9121  unsigned Scale = 1;
9122  switch (VT.getSimpleVT().SimpleTy) {
9123  default: return false;
9124  case MVT::i1:
9125  case MVT::i8:
9126    // Scale == 1;
9127    break;
9128  case MVT::i16:
9129    // Scale == 2;
9130    Scale = 2;
9131    break;
9132  case MVT::i32:
9133    // Scale == 4;
9134    Scale = 4;
9135    break;
9136  }
9137
9138  if ((V & (Scale - 1)) != 0)
9139    return false;
9140  V /= Scale;
9141  return V == (V & ((1LL << 5) - 1));
9142}
9143
9144static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
9145                                      const ARMSubtarget *Subtarget) {
9146  bool isNeg = false;
9147  if (V < 0) {
9148    isNeg = true;
9149    V = - V;
9150  }
9151
9152  switch (VT.getSimpleVT().SimpleTy) {
9153  default: return false;
9154  case MVT::i1:
9155  case MVT::i8:
9156  case MVT::i16:
9157  case MVT::i32:
9158    // + imm12 or - imm8
9159    if (isNeg)
9160      return V == (V & ((1LL << 8) - 1));
9161    return V == (V & ((1LL << 12) - 1));
9162  case MVT::f32:
9163  case MVT::f64:
9164    // Same as ARM mode. FIXME: NEON?
9165    if (!Subtarget->hasVFP2())
9166      return false;
9167    if ((V & 3) != 0)
9168      return false;
9169    V >>= 2;
9170    return V == (V & ((1LL << 8) - 1));
9171  }
9172}
9173
9174/// isLegalAddressImmediate - Return true if the integer value can be used
9175/// as the offset of the target addressing mode for load / store of the
9176/// given type.
9177static bool isLegalAddressImmediate(int64_t V, EVT VT,
9178                                    const ARMSubtarget *Subtarget) {
9179  if (V == 0)
9180    return true;
9181
9182  if (!VT.isSimple())
9183    return false;
9184
9185  if (Subtarget->isThumb1Only())
9186    return isLegalT1AddressImmediate(V, VT);
9187  else if (Subtarget->isThumb2())
9188    return isLegalT2AddressImmediate(V, VT, Subtarget);
9189
9190  // ARM mode.
9191  if (V < 0)
9192    V = - V;
9193  switch (VT.getSimpleVT().SimpleTy) {
9194  default: return false;
9195  case MVT::i1:
9196  case MVT::i8:
9197  case MVT::i32:
9198    // +- imm12
9199    return V == (V & ((1LL << 12) - 1));
9200  case MVT::i16:
9201    // +- imm8
9202    return V == (V & ((1LL << 8) - 1));
9203  case MVT::f32:
9204  case MVT::f64:
9205    if (!Subtarget->hasVFP2()) // FIXME: NEON?
9206      return false;
9207    if ((V & 3) != 0)
9208      return false;
9209    V >>= 2;
9210    return V == (V & ((1LL << 8) - 1));
9211  }
9212}
9213
9214bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
9215                                                      EVT VT) const {
9216  int Scale = AM.Scale;
9217  if (Scale < 0)
9218    return false;
9219
9220  switch (VT.getSimpleVT().SimpleTy) {
9221  default: return false;
9222  case MVT::i1:
9223  case MVT::i8:
9224  case MVT::i16:
9225  case MVT::i32:
9226    if (Scale == 1)
9227      return true;
9228    // r + r << imm
9229    Scale = Scale & ~1;
9230    return Scale == 2 || Scale == 4 || Scale == 8;
9231  case MVT::i64:
9232    // r + r
9233    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
9234      return true;
9235    return false;
9236  case MVT::isVoid:
9237    // Note, we allow "void" uses (basically, uses that aren't loads or
9238    // stores), because arm allows folding a scale into many arithmetic
9239    // operations.  This should be made more precise and revisited later.
9240
9241    // Allow r << imm, but the imm has to be a multiple of two.
9242    if (Scale & 1) return false;
9243    return isPowerOf2_32(Scale);
9244  }
9245}
9246
9247/// isLegalAddressingMode - Return true if the addressing mode represented
9248/// by AM is legal for this target, for a load/store of the specified type.
9249bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
9250                                              Type *Ty) const {
9251  EVT VT = getValueType(Ty, true);
9252  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
9253    return false;
9254
9255  // Can never fold addr of global into load/store.
9256  if (AM.BaseGV)
9257    return false;
9258
9259  switch (AM.Scale) {
9260  case 0:  // no scale reg, must be "r+i" or "r", or "i".
9261    break;
9262  case 1:
9263    if (Subtarget->isThumb1Only())
9264      return false;
9265    // FALL THROUGH.
9266  default:
9267    // ARM doesn't support any R+R*scale+imm addr modes.
9268    if (AM.BaseOffs)
9269      return false;
9270
9271    if (!VT.isSimple())
9272      return false;
9273
9274    if (Subtarget->isThumb2())
9275      return isLegalT2ScaledAddressingMode(AM, VT);
9276
9277    int Scale = AM.Scale;
9278    switch (VT.getSimpleVT().SimpleTy) {
9279    default: return false;
9280    case MVT::i1:
9281    case MVT::i8:
9282    case MVT::i32:
9283      if (Scale < 0) Scale = -Scale;
9284      if (Scale == 1)
9285        return true;
9286      // r + r << imm
9287      return isPowerOf2_32(Scale & ~1);
9288    case MVT::i16:
9289    case MVT::i64:
9290      // r + r
9291      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
9292        return true;
9293      return false;
9294
9295    case MVT::isVoid:
9296      // Note, we allow "void" uses (basically, uses that aren't loads or
9297      // stores), because arm allows folding a scale into many arithmetic
9298      // operations.  This should be made more precise and revisited later.
9299
9300      // Allow r << imm, but the imm has to be a multiple of two.
9301      if (Scale & 1) return false;
9302      return isPowerOf2_32(Scale);
9303    }
9304  }
9305  return true;
9306}
9307
9308/// isLegalICmpImmediate - Return true if the specified immediate is legal
9309/// icmp immediate, that is the target has icmp instructions which can compare
9310/// a register against the immediate without having to materialize the
9311/// immediate into a register.
9312bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
9313  // Thumb2 and ARM modes can use cmn for negative immediates.
9314  if (!Subtarget->isThumb())
9315    return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
9316  if (Subtarget->isThumb2())
9317    return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
9318  // Thumb1 doesn't have cmn, and only 8-bit immediates.
9319  return Imm >= 0 && Imm <= 255;
9320}
9321
9322/// isLegalAddImmediate - Return true if the specified immediate is a legal add
9323/// *or sub* immediate, that is the target has add or sub instructions which can
9324/// add a register with the immediate without having to materialize the
9325/// immediate into a register.
9326bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
9327  // Same encoding for add/sub, just flip the sign.
9328  int64_t AbsImm = llvm::abs64(Imm);
9329  if (!Subtarget->isThumb())
9330    return ARM_AM::getSOImmVal(AbsImm) != -1;
9331  if (Subtarget->isThumb2())
9332    return ARM_AM::getT2SOImmVal(AbsImm) != -1;
9333  // Thumb1 only has 8-bit unsigned immediate.
9334  return AbsImm >= 0 && AbsImm <= 255;
9335}
9336
9337static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
9338                                      bool isSEXTLoad, SDValue &Base,
9339                                      SDValue &Offset, bool &isInc,
9340                                      SelectionDAG &DAG) {
9341  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
9342    return false;
9343
9344  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
9345    // AddressingMode 3
9346    Base = Ptr->getOperand(0);
9347    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
9348      int RHSC = (int)RHS->getZExtValue();
9349      if (RHSC < 0 && RHSC > -256) {
9350        assert(Ptr->getOpcode() == ISD::ADD);
9351        isInc = false;
9352        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
9353        return true;
9354      }
9355    }
9356    isInc = (Ptr->getOpcode() == ISD::ADD);
9357    Offset = Ptr->getOperand(1);
9358    return true;
9359  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
9360    // AddressingMode 2
9361    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
9362      int RHSC = (int)RHS->getZExtValue();
9363      if (RHSC < 0 && RHSC > -0x1000) {
9364        assert(Ptr->getOpcode() == ISD::ADD);
9365        isInc = false;
9366        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
9367        Base = Ptr->getOperand(0);
9368        return true;
9369      }
9370    }
9371
9372    if (Ptr->getOpcode() == ISD::ADD) {
9373      isInc = true;
9374      ARM_AM::ShiftOpc ShOpcVal=
9375        ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
9376      if (ShOpcVal != ARM_AM::no_shift) {
9377        Base = Ptr->getOperand(1);
9378        Offset = Ptr->getOperand(0);
9379      } else {
9380        Base = Ptr->getOperand(0);
9381        Offset = Ptr->getOperand(1);
9382      }
9383      return true;
9384    }
9385
9386    isInc = (Ptr->getOpcode() == ISD::ADD);
9387    Base = Ptr->getOperand(0);
9388    Offset = Ptr->getOperand(1);
9389    return true;
9390  }
9391
9392  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
9393  return false;
9394}
9395
9396static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
9397                                     bool isSEXTLoad, SDValue &Base,
9398                                     SDValue &Offset, bool &isInc,
9399                                     SelectionDAG &DAG) {
9400  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
9401    return false;
9402
9403  Base = Ptr->getOperand(0);
9404  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
9405    int RHSC = (int)RHS->getZExtValue();
9406    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
9407      assert(Ptr->getOpcode() == ISD::ADD);
9408      isInc = false;
9409      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
9410      return true;
9411    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
9412      isInc = Ptr->getOpcode() == ISD::ADD;
9413      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
9414      return true;
9415    }
9416  }
9417
9418  return false;
9419}
9420
9421/// getPreIndexedAddressParts - returns true by value, base pointer and
9422/// offset pointer and addressing mode by reference if the node's address
9423/// can be legally represented as pre-indexed load / store address.
9424bool
9425ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
9426                                             SDValue &Offset,
9427                                             ISD::MemIndexedMode &AM,
9428                                             SelectionDAG &DAG) const {
9429  if (Subtarget->isThumb1Only())
9430    return false;
9431
9432  EVT VT;
9433  SDValue Ptr;
9434  bool isSEXTLoad = false;
9435  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9436    Ptr = LD->getBasePtr();
9437    VT  = LD->getMemoryVT();
9438    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
9439  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9440    Ptr = ST->getBasePtr();
9441    VT  = ST->getMemoryVT();
9442  } else
9443    return false;
9444
9445  bool isInc;
9446  bool isLegal = false;
9447  if (Subtarget->isThumb2())
9448    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
9449                                       Offset, isInc, DAG);
9450  else
9451    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
9452                                        Offset, isInc, DAG);
9453  if (!isLegal)
9454    return false;
9455
9456  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
9457  return true;
9458}
9459
9460/// getPostIndexedAddressParts - returns true by value, base pointer and
9461/// offset pointer and addressing mode by reference if this node can be
9462/// combined with a load / store to form a post-indexed load / store.
9463bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
9464                                                   SDValue &Base,
9465                                                   SDValue &Offset,
9466                                                   ISD::MemIndexedMode &AM,
9467                                                   SelectionDAG &DAG) const {
9468  if (Subtarget->isThumb1Only())
9469    return false;
9470
9471  EVT VT;
9472  SDValue Ptr;
9473  bool isSEXTLoad = false;
9474  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9475    VT  = LD->getMemoryVT();
9476    Ptr = LD->getBasePtr();
9477    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
9478  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9479    VT  = ST->getMemoryVT();
9480    Ptr = ST->getBasePtr();
9481  } else
9482    return false;
9483
9484  bool isInc;
9485  bool isLegal = false;
9486  if (Subtarget->isThumb2())
9487    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
9488                                       isInc, DAG);
9489  else
9490    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
9491                                        isInc, DAG);
9492  if (!isLegal)
9493    return false;
9494
9495  if (Ptr != Base) {
9496    // Swap base ptr and offset to catch more post-index load / store when
9497    // it's legal. In Thumb2 mode, offset must be an immediate.
9498    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
9499        !Subtarget->isThumb2())
9500      std::swap(Base, Offset);
9501
9502    // Post-indexed load / store update the base pointer.
9503    if (Ptr != Base)
9504      return false;
9505  }
9506
9507  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
9508  return true;
9509}
9510
9511void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
9512                                                       APInt &KnownZero,
9513                                                       APInt &KnownOne,
9514                                                       const SelectionDAG &DAG,
9515                                                       unsigned Depth) const {
9516  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
9517  switch (Op.getOpcode()) {
9518  default: break;
9519  case ARMISD::CMOV: {
9520    // Bits are known zero/one if known on the LHS and RHS.
9521    DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
9522    if (KnownZero == 0 && KnownOne == 0) return;
9523
9524    APInt KnownZeroRHS, KnownOneRHS;
9525    DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
9526    KnownZero &= KnownZeroRHS;
9527    KnownOne  &= KnownOneRHS;
9528    return;
9529  }
9530  }
9531}
9532
9533//===----------------------------------------------------------------------===//
9534//                           ARM Inline Assembly Support
9535//===----------------------------------------------------------------------===//
9536
9537bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
9538  // Looking for "rev" which is V6+.
9539  if (!Subtarget->hasV6Ops())
9540    return false;
9541
9542  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
9543  std::string AsmStr = IA->getAsmString();
9544  SmallVector<StringRef, 4> AsmPieces;
9545  SplitString(AsmStr, AsmPieces, ";\n");
9546
9547  switch (AsmPieces.size()) {
9548  default: return false;
9549  case 1:
9550    AsmStr = AsmPieces[0];
9551    AsmPieces.clear();
9552    SplitString(AsmStr, AsmPieces, " \t,");
9553
9554    // rev $0, $1
9555    if (AsmPieces.size() == 3 &&
9556        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
9557        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
9558      IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
9559      if (Ty && Ty->getBitWidth() == 32)
9560        return IntrinsicLowering::LowerToByteSwap(CI);
9561    }
9562    break;
9563  }
9564
9565  return false;
9566}
9567
9568/// getConstraintType - Given a constraint letter, return the type of
9569/// constraint it is for this target.
9570ARMTargetLowering::ConstraintType
9571ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
9572  if (Constraint.size() == 1) {
9573    switch (Constraint[0]) {
9574    default:  break;
9575    case 'l': return C_RegisterClass;
9576    case 'w': return C_RegisterClass;
9577    case 'h': return C_RegisterClass;
9578    case 'x': return C_RegisterClass;
9579    case 't': return C_RegisterClass;
9580    case 'j': return C_Other; // Constant for movw.
9581      // An address with a single base register. Due to the way we
9582      // currently handle addresses it is the same as an 'r' memory constraint.
9583    case 'Q': return C_Memory;
9584    }
9585  } else if (Constraint.size() == 2) {
9586    switch (Constraint[0]) {
9587    default: break;
9588    // All 'U+' constraints are addresses.
9589    case 'U': return C_Memory;
9590    }
9591  }
9592  return TargetLowering::getConstraintType(Constraint);
9593}
9594
9595/// Examine constraint type and operand type and determine a weight value.
9596/// This object must already have been set up with the operand type
9597/// and the current alternative constraint selected.
9598TargetLowering::ConstraintWeight
9599ARMTargetLowering::getSingleConstraintMatchWeight(
9600    AsmOperandInfo &info, const char *constraint) const {
9601  ConstraintWeight weight = CW_Invalid;
9602  Value *CallOperandVal = info.CallOperandVal;
9603    // If we don't have a value, we can't do a match,
9604    // but allow it at the lowest weight.
9605  if (CallOperandVal == NULL)
9606    return CW_Default;
9607  Type *type = CallOperandVal->getType();
9608  // Look at the constraint type.
9609  switch (*constraint) {
9610  default:
9611    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
9612    break;
9613  case 'l':
9614    if (type->isIntegerTy()) {
9615      if (Subtarget->isThumb())
9616        weight = CW_SpecificReg;
9617      else
9618        weight = CW_Register;
9619    }
9620    break;
9621  case 'w':
9622    if (type->isFloatingPointTy())
9623      weight = CW_Register;
9624    break;
9625  }
9626  return weight;
9627}
9628
9629typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
9630RCPair
9631ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
9632                                                EVT VT) const {
9633  if (Constraint.size() == 1) {
9634    // GCC ARM Constraint Letters
9635    switch (Constraint[0]) {
9636    case 'l': // Low regs or general regs.
9637      if (Subtarget->isThumb())
9638        return RCPair(0U, &ARM::tGPRRegClass);
9639      return RCPair(0U, &ARM::GPRRegClass);
9640    case 'h': // High regs or no regs.
9641      if (Subtarget->isThumb())
9642        return RCPair(0U, &ARM::hGPRRegClass);
9643      break;
9644    case 'r':
9645      return RCPair(0U, &ARM::GPRRegClass);
9646    case 'w':
9647      if (VT == MVT::f32)
9648        return RCPair(0U, &ARM::SPRRegClass);
9649      if (VT.getSizeInBits() == 64)
9650        return RCPair(0U, &ARM::DPRRegClass);
9651      if (VT.getSizeInBits() == 128)
9652        return RCPair(0U, &ARM::QPRRegClass);
9653      break;
9654    case 'x':
9655      if (VT == MVT::f32)
9656        return RCPair(0U, &ARM::SPR_8RegClass);
9657      if (VT.getSizeInBits() == 64)
9658        return RCPair(0U, &ARM::DPR_8RegClass);
9659      if (VT.getSizeInBits() == 128)
9660        return RCPair(0U, &ARM::QPR_8RegClass);
9661      break;
9662    case 't':
9663      if (VT == MVT::f32)
9664        return RCPair(0U, &ARM::SPRRegClass);
9665      break;
9666    }
9667  }
9668  if (StringRef("{cc}").equals_lower(Constraint))
9669    return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
9670
9671  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
9672}
9673
9674/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
9675/// vector.  If it is invalid, don't add anything to Ops.
9676void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
9677                                                     std::string &Constraint,
9678                                                     std::vector<SDValue>&Ops,
9679                                                     SelectionDAG &DAG) const {
9680  SDValue Result(0, 0);
9681
9682  // Currently only support length 1 constraints.
9683  if (Constraint.length() != 1) return;
9684
9685  char ConstraintLetter = Constraint[0];
9686  switch (ConstraintLetter) {
9687  default: break;
9688  case 'j':
9689  case 'I': case 'J': case 'K': case 'L':
9690  case 'M': case 'N': case 'O':
9691    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
9692    if (!C)
9693      return;
9694
9695    int64_t CVal64 = C->getSExtValue();
9696    int CVal = (int) CVal64;
9697    // None of these constraints allow values larger than 32 bits.  Check
9698    // that the value fits in an int.
9699    if (CVal != CVal64)
9700      return;
9701
9702    switch (ConstraintLetter) {
9703      case 'j':
9704        // Constant suitable for movw, must be between 0 and
9705        // 65535.
9706        if (Subtarget->hasV6T2Ops())
9707          if (CVal >= 0 && CVal <= 65535)
9708            break;
9709        return;
9710      case 'I':
9711        if (Subtarget->isThumb1Only()) {
9712          // This must be a constant between 0 and 255, for ADD
9713          // immediates.
9714          if (CVal >= 0 && CVal <= 255)
9715            break;
9716        } else if (Subtarget->isThumb2()) {
9717          // A constant that can be used as an immediate value in a
9718          // data-processing instruction.
9719          if (ARM_AM::getT2SOImmVal(CVal) != -1)
9720            break;
9721        } else {
9722          // A constant that can be used as an immediate value in a
9723          // data-processing instruction.
9724          if (ARM_AM::getSOImmVal(CVal) != -1)
9725            break;
9726        }
9727        return;
9728
9729      case 'J':
9730        if (Subtarget->isThumb()) {  // FIXME thumb2
9731          // This must be a constant between -255 and -1, for negated ADD
9732          // immediates. This can be used in GCC with an "n" modifier that
9733          // prints the negated value, for use with SUB instructions. It is
9734          // not useful otherwise but is implemented for compatibility.
9735          if (CVal >= -255 && CVal <= -1)
9736            break;
9737        } else {
9738          // This must be a constant between -4095 and 4095. It is not clear
9739          // what this constraint is intended for. Implemented for
9740          // compatibility with GCC.
9741          if (CVal >= -4095 && CVal <= 4095)
9742            break;
9743        }
9744        return;
9745
9746      case 'K':
9747        if (Subtarget->isThumb1Only()) {
9748          // A 32-bit value where only one byte has a nonzero value. Exclude
9749          // zero to match GCC. This constraint is used by GCC internally for
9750          // constants that can be loaded with a move/shift combination.
9751          // It is not useful otherwise but is implemented for compatibility.
9752          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
9753            break;
9754        } else if (Subtarget->isThumb2()) {
9755          // A constant whose bitwise inverse can be used as an immediate
9756          // value in a data-processing instruction. This can be used in GCC
9757          // with a "B" modifier that prints the inverted value, for use with
9758          // BIC and MVN instructions. It is not useful otherwise but is
9759          // implemented for compatibility.
9760          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
9761            break;
9762        } else {
9763          // A constant whose bitwise inverse can be used as an immediate
9764          // value in a data-processing instruction. This can be used in GCC
9765          // with a "B" modifier that prints the inverted value, for use with
9766          // BIC and MVN instructions. It is not useful otherwise but is
9767          // implemented for compatibility.
9768          if (ARM_AM::getSOImmVal(~CVal) != -1)
9769            break;
9770        }
9771        return;
9772
9773      case 'L':
9774        if (Subtarget->isThumb1Only()) {
9775          // This must be a constant between -7 and 7,
9776          // for 3-operand ADD/SUB immediate instructions.
9777          if (CVal >= -7 && CVal < 7)
9778            break;
9779        } else if (Subtarget->isThumb2()) {
9780          // A constant whose negation can be used as an immediate value in a
9781          // data-processing instruction. This can be used in GCC with an "n"
9782          // modifier that prints the negated value, for use with SUB
9783          // instructions. It is not useful otherwise but is implemented for
9784          // compatibility.
9785          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
9786            break;
9787        } else {
9788          // A constant whose negation can be used as an immediate value in a
9789          // data-processing instruction. This can be used in GCC with an "n"
9790          // modifier that prints the negated value, for use with SUB
9791          // instructions. It is not useful otherwise but is implemented for
9792          // compatibility.
9793          if (ARM_AM::getSOImmVal(-CVal) != -1)
9794            break;
9795        }
9796        return;
9797
9798      case 'M':
9799        if (Subtarget->isThumb()) { // FIXME thumb2
9800          // This must be a multiple of 4 between 0 and 1020, for
9801          // ADD sp + immediate.
9802          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
9803            break;
9804        } else {
9805          // A power of two or a constant between 0 and 32.  This is used in
9806          // GCC for the shift amount on shifted register operands, but it is
9807          // useful in general for any shift amounts.
9808          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
9809            break;
9810        }
9811        return;
9812
9813      case 'N':
9814        if (Subtarget->isThumb()) {  // FIXME thumb2
9815          // This must be a constant between 0 and 31, for shift amounts.
9816          if (CVal >= 0 && CVal <= 31)
9817            break;
9818        }
9819        return;
9820
9821      case 'O':
9822        if (Subtarget->isThumb()) {  // FIXME thumb2
9823          // This must be a multiple of 4 between -508 and 508, for
9824          // ADD/SUB sp = sp + immediate.
9825          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
9826            break;
9827        }
9828        return;
9829    }
9830    Result = DAG.getTargetConstant(CVal, Op.getValueType());
9831    break;
9832  }
9833
9834  if (Result.getNode()) {
9835    Ops.push_back(Result);
9836    return;
9837  }
9838  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9839}
9840
9841bool
9842ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
9843  // The ARM target isn't yet aware of offsets.
9844  return false;
9845}
9846
9847bool ARM::isBitFieldInvertedMask(unsigned v) {
9848  if (v == 0xffffffff)
9849    return 0;
9850  // there can be 1's on either or both "outsides", all the "inside"
9851  // bits must be 0's
9852  unsigned int lsb = 0, msb = 31;
9853  while (v & (1 << msb)) --msb;
9854  while (v & (1 << lsb)) ++lsb;
9855  for (unsigned int i = lsb; i <= msb; ++i) {
9856    if (v & (1 << i))
9857      return 0;
9858  }
9859  return 1;
9860}
9861
9862/// isFPImmLegal - Returns true if the target can instruction select the
9863/// specified FP immediate natively. If false, the legalizer will
9864/// materialize the FP immediate as a load from a constant pool.
9865bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
9866  if (!Subtarget->hasVFP3())
9867    return false;
9868  if (VT == MVT::f32)
9869    return ARM_AM::getFP32Imm(Imm) != -1;
9870  if (VT == MVT::f64)
9871    return ARM_AM::getFP64Imm(Imm) != -1;
9872  return false;
9873}
9874
9875/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
9876/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
9877/// specified in the intrinsic calls.
9878bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
9879                                           const CallInst &I,
9880                                           unsigned Intrinsic) const {
9881  switch (Intrinsic) {
9882  case Intrinsic::arm_neon_vld1:
9883  case Intrinsic::arm_neon_vld2:
9884  case Intrinsic::arm_neon_vld3:
9885  case Intrinsic::arm_neon_vld4:
9886  case Intrinsic::arm_neon_vld2lane:
9887  case Intrinsic::arm_neon_vld3lane:
9888  case Intrinsic::arm_neon_vld4lane: {
9889    Info.opc = ISD::INTRINSIC_W_CHAIN;
9890    // Conservatively set memVT to the entire set of vectors loaded.
9891    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
9892    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
9893    Info.ptrVal = I.getArgOperand(0);
9894    Info.offset = 0;
9895    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
9896    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
9897    Info.vol = false; // volatile loads with NEON intrinsics not supported
9898    Info.readMem = true;
9899    Info.writeMem = false;
9900    return true;
9901  }
9902  case Intrinsic::arm_neon_vst1:
9903  case Intrinsic::arm_neon_vst2:
9904  case Intrinsic::arm_neon_vst3:
9905  case Intrinsic::arm_neon_vst4:
9906  case Intrinsic::arm_neon_vst2lane:
9907  case Intrinsic::arm_neon_vst3lane:
9908  case Intrinsic::arm_neon_vst4lane: {
9909    Info.opc = ISD::INTRINSIC_VOID;
9910    // Conservatively set memVT to the entire set of vectors stored.
9911    unsigned NumElts = 0;
9912    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
9913      Type *ArgTy = I.getArgOperand(ArgI)->getType();
9914      if (!ArgTy->isVectorTy())
9915        break;
9916      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
9917    }
9918    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
9919    Info.ptrVal = I.getArgOperand(0);
9920    Info.offset = 0;
9921    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
9922    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
9923    Info.vol = false; // volatile stores with NEON intrinsics not supported
9924    Info.readMem = false;
9925    Info.writeMem = true;
9926    return true;
9927  }
9928  case Intrinsic::arm_strexd: {
9929    Info.opc = ISD::INTRINSIC_W_CHAIN;
9930    Info.memVT = MVT::i64;
9931    Info.ptrVal = I.getArgOperand(2);
9932    Info.offset = 0;
9933    Info.align = 8;
9934    Info.vol = true;
9935    Info.readMem = false;
9936    Info.writeMem = true;
9937    return true;
9938  }
9939  case Intrinsic::arm_ldrexd: {
9940    Info.opc = ISD::INTRINSIC_W_CHAIN;
9941    Info.memVT = MVT::i64;
9942    Info.ptrVal = I.getArgOperand(0);
9943    Info.offset = 0;
9944    Info.align = 8;
9945    Info.vol = true;
9946    Info.readMem = true;
9947    Info.writeMem = false;
9948    return true;
9949  }
9950  default:
9951    break;
9952  }
9953
9954  return false;
9955}
9956