ARMISelLowering.cpp revision 3e0dc0606aed30b1fa6e1abcecf2cbf5e9ac1af9
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMCallingConv.h" 18#include "ARMConstantPoolValue.h" 19#include "ARMISelLowering.h" 20#include "ARMMachineFunctionInfo.h" 21#include "ARMPerfectShuffle.h" 22#include "ARMRegisterInfo.h" 23#include "ARMSubtarget.h" 24#include "ARMTargetMachine.h" 25#include "ARMTargetObjectFile.h" 26#include "MCTargetDesc/ARMAddressingModes.h" 27#include "llvm/CallingConv.h" 28#include "llvm/Constants.h" 29#include "llvm/Function.h" 30#include "llvm/GlobalValue.h" 31#include "llvm/Instruction.h" 32#include "llvm/Instructions.h" 33#include "llvm/Intrinsics.h" 34#include "llvm/Type.h" 35#include "llvm/CodeGen/CallingConvLower.h" 36#include "llvm/CodeGen/IntrinsicLowering.h" 37#include "llvm/CodeGen/MachineBasicBlock.h" 38#include "llvm/CodeGen/MachineFrameInfo.h" 39#include "llvm/CodeGen/MachineFunction.h" 40#include "llvm/CodeGen/MachineInstrBuilder.h" 41#include "llvm/CodeGen/MachineModuleInfo.h" 42#include "llvm/CodeGen/MachineRegisterInfo.h" 43#include "llvm/CodeGen/SelectionDAG.h" 44#include "llvm/MC/MCSectionMachO.h" 45#include "llvm/Target/TargetOptions.h" 46#include "llvm/ADT/VectorExtras.h" 47#include "llvm/ADT/StringExtras.h" 48#include "llvm/ADT/Statistic.h" 49#include "llvm/Support/CommandLine.h" 50#include "llvm/Support/ErrorHandling.h" 51#include "llvm/Support/MathExtras.h" 52#include "llvm/Support/raw_ostream.h" 53#include <sstream> 54using namespace llvm; 55 56STATISTIC(NumTailCalls, "Number of tail calls"); 57STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 58 59// This option should go away when tail calls fully work. 60static cl::opt<bool> 61EnableARMTailCalls("arm-tail-calls", cl::Hidden, 62 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 63 cl::init(false)); 64 65cl::opt<bool> 66EnableARMLongCalls("arm-long-calls", cl::Hidden, 67 cl::desc("Generate calls via indirect call instructions"), 68 cl::init(false)); 69 70static cl::opt<bool> 71ARMInterworking("arm-interworking", cl::Hidden, 72 cl::desc("Enable / disable ARM interworking (for debugging only)"), 73 cl::init(true)); 74 75namespace { 76 class ARMCCState : public CCState { 77 public: 78 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 79 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 80 LLVMContext &C, ParmContext PC) 81 : CCState(CC, isVarArg, MF, TM, locs, C) { 82 assert(((PC == Call) || (PC == Prologue)) && 83 "ARMCCState users must specify whether their context is call" 84 "or prologue generation."); 85 CallOrPrologue = PC; 86 } 87 }; 88} 89 90// The APCS parameter registers. 91static const unsigned GPRArgRegs[] = { 92 ARM::R0, ARM::R1, ARM::R2, ARM::R3 93}; 94 95void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 96 EVT PromotedBitwiseVT) { 97 if (VT != PromotedLdStVT) { 98 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 99 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 100 PromotedLdStVT.getSimpleVT()); 101 102 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 103 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 104 PromotedLdStVT.getSimpleVT()); 105 } 106 107 EVT ElemTy = VT.getVectorElementType(); 108 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 109 setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); 110 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); 111 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 112 if (ElemTy == MVT::i32) { 113 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); 114 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); 115 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); 116 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); 117 } else { 118 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 119 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 120 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 121 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 122 } 123 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 124 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 125 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 126 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); 127 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 128 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 129 setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); 130 if (VT.isInteger()) { 131 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 132 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 133 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 134 } 135 136 // Promote all bit-wise operations. 137 if (VT.isInteger() && VT != PromotedBitwiseVT) { 138 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 139 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 140 PromotedBitwiseVT.getSimpleVT()); 141 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 142 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 143 PromotedBitwiseVT.getSimpleVT()); 144 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 145 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 146 PromotedBitwiseVT.getSimpleVT()); 147 } 148 149 // Neon does not support vector divide/remainder operations. 150 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 151 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 152 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 153 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 154 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 155 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 156} 157 158void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 159 addRegisterClass(VT, ARM::DPRRegisterClass); 160 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 161} 162 163void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 164 addRegisterClass(VT, ARM::QPRRegisterClass); 165 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 166} 167 168static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 169 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 170 return new TargetLoweringObjectFileMachO(); 171 172 return new ARMElfTargetObjectFile(); 173} 174 175ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 176 : TargetLowering(TM, createTLOF(TM)) { 177 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 178 RegInfo = TM.getRegisterInfo(); 179 Itins = TM.getInstrItineraryData(); 180 181 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 182 183 if (Subtarget->isTargetDarwin()) { 184 // Uses VFP for Thumb libfuncs if available. 185 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 186 // Single-precision floating-point arithmetic. 187 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 188 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 189 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 190 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 191 192 // Double-precision floating-point arithmetic. 193 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 194 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 195 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 196 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 197 198 // Single-precision comparisons. 199 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 200 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 201 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 202 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 203 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 204 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 205 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 206 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 207 208 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 214 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 215 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 216 217 // Double-precision comparisons. 218 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 219 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 220 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 221 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 222 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 223 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 224 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 225 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 226 227 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 229 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 230 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 231 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 232 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 233 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 234 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 235 236 // Floating-point to integer conversions. 237 // i64 conversions are done via library routines even when generating VFP 238 // instructions, so use the same ones. 239 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 240 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 241 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 242 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 243 244 // Conversions between floating types. 245 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 246 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 247 248 // Integer to floating-point conversions. 249 // i64 conversions are done via library routines even when generating VFP 250 // instructions, so use the same ones. 251 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 252 // e.g., __floatunsidf vs. __floatunssidfvfp. 253 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 254 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 255 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 256 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 257 } 258 } 259 260 // These libcalls are not available in 32-bit. 261 setLibcallName(RTLIB::SHL_I128, 0); 262 setLibcallName(RTLIB::SRL_I128, 0); 263 setLibcallName(RTLIB::SRA_I128, 0); 264 265 if (Subtarget->isAAPCS_ABI()) { 266 // Double-precision floating-point arithmetic helper functions 267 // RTABI chapter 4.1.2, Table 2 268 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 269 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 270 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 271 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 272 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 273 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 274 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 275 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 276 277 // Double-precision floating-point comparison helper functions 278 // RTABI chapter 4.1.2, Table 3 279 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 280 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 281 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 282 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 283 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 284 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 285 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 286 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 287 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 288 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 289 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 290 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 291 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 292 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 293 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 294 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 295 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 297 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 298 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 299 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 300 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 301 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 302 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 303 304 // Single-precision floating-point arithmetic helper functions 305 // RTABI chapter 4.1.2, Table 4 306 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 307 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 308 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 309 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 310 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 311 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 312 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 313 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 314 315 // Single-precision floating-point comparison helper functions 316 // RTABI chapter 4.1.2, Table 5 317 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 318 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 319 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 320 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 321 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 322 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 323 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 324 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 325 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 326 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 327 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 328 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 329 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 330 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 331 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 332 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 333 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 338 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 339 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 340 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 341 342 // Floating-point to integer conversions. 343 // RTABI chapter 4.1.2, Table 6 344 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 345 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 346 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 347 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 348 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 349 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 350 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 351 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 354 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 355 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 356 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 357 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 358 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 359 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 360 361 // Conversions between floating types. 362 // RTABI chapter 4.1.2, Table 7 363 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 364 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 365 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 366 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 367 368 // Integer to floating-point conversions. 369 // RTABI chapter 4.1.2, Table 8 370 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 371 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 372 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 373 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 374 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 375 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 376 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 377 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 382 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 383 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 384 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 385 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 386 387 // Long long helper functions 388 // RTABI chapter 4.2, Table 9 389 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 390 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 391 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 392 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 393 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 394 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 395 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 396 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 397 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 398 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 399 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 400 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 401 402 // Integer division functions 403 // RTABI chapter 4.3.1 404 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 405 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 406 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 407 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 408 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 409 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 410 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 412 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 413 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 414 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 415 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 416 417 // Memory operations 418 // RTABI chapter 4.3.4 419 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 420 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 421 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 422 } 423 424 // Use divmod compiler-rt calls for iOS 5.0 and later. 425 if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 426 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 427 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 428 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 429 } 430 431 if (Subtarget->isThumb1Only()) 432 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 433 else 434 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 435 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 436 !Subtarget->isThumb1Only()) { 437 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 438 if (!Subtarget->isFPOnlySP()) 439 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 440 441 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 442 } 443 444 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 445 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 446 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 447 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 448 setTruncStoreAction((MVT::SimpleValueType)VT, 449 (MVT::SimpleValueType)InnerVT, Expand); 450 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 451 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 452 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 453 } 454 455 if (Subtarget->hasNEON()) { 456 addDRTypeForNEON(MVT::v2f32); 457 addDRTypeForNEON(MVT::v8i8); 458 addDRTypeForNEON(MVT::v4i16); 459 addDRTypeForNEON(MVT::v2i32); 460 addDRTypeForNEON(MVT::v1i64); 461 462 addQRTypeForNEON(MVT::v4f32); 463 addQRTypeForNEON(MVT::v2f64); 464 addQRTypeForNEON(MVT::v16i8); 465 addQRTypeForNEON(MVT::v8i16); 466 addQRTypeForNEON(MVT::v4i32); 467 addQRTypeForNEON(MVT::v2i64); 468 469 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 470 // neither Neon nor VFP support any arithmetic operations on it. 471 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 472 // supported for v4f32. 473 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 474 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 475 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 476 // FIXME: Code duplication: FDIV and FREM are expanded always, see 477 // ARMTargetLowering::addTypeForNEON method for details. 478 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 479 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 480 // FIXME: Create unittest. 481 // In another words, find a way when "copysign" appears in DAG with vector 482 // operands. 483 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 484 // FIXME: Code duplication: SETCC has custom operation action, see 485 // ARMTargetLowering::addTypeForNEON method for details. 486 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 487 // FIXME: Create unittest for FNEG and for FABS. 488 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 489 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 490 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 491 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 492 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 493 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 494 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 495 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 496 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 497 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 498 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 499 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 500 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 501 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 502 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 503 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 504 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 505 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 506 507 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 508 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 509 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 510 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 511 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 512 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 513 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 514 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 515 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 516 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 517 518 // Neon does not support some operations on v1i64 and v2i64 types. 519 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 520 // Custom handling for some quad-vector types to detect VMULL. 521 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 522 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 523 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 524 // Custom handling for some vector types to avoid expensive expansions 525 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 526 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 527 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 528 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 529 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 530 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 531 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 532 // a destination type that is wider than the source. 533 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 534 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 535 536 setTargetDAGCombine(ISD::INTRINSIC_VOID); 537 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 538 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 539 setTargetDAGCombine(ISD::SHL); 540 setTargetDAGCombine(ISD::SRL); 541 setTargetDAGCombine(ISD::SRA); 542 setTargetDAGCombine(ISD::SIGN_EXTEND); 543 setTargetDAGCombine(ISD::ZERO_EXTEND); 544 setTargetDAGCombine(ISD::ANY_EXTEND); 545 setTargetDAGCombine(ISD::SELECT_CC); 546 setTargetDAGCombine(ISD::BUILD_VECTOR); 547 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 548 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 549 setTargetDAGCombine(ISD::STORE); 550 setTargetDAGCombine(ISD::FP_TO_SINT); 551 setTargetDAGCombine(ISD::FP_TO_UINT); 552 setTargetDAGCombine(ISD::FDIV); 553 554 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 555 } 556 557 computeRegisterProperties(); 558 559 // ARM does not have f32 extending load. 560 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 561 562 // ARM does not have i1 sign extending load. 563 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 564 565 // ARM supports all 4 flavors of integer indexed load / store. 566 if (!Subtarget->isThumb1Only()) { 567 for (unsigned im = (unsigned)ISD::PRE_INC; 568 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 569 setIndexedLoadAction(im, MVT::i1, Legal); 570 setIndexedLoadAction(im, MVT::i8, Legal); 571 setIndexedLoadAction(im, MVT::i16, Legal); 572 setIndexedLoadAction(im, MVT::i32, Legal); 573 setIndexedStoreAction(im, MVT::i1, Legal); 574 setIndexedStoreAction(im, MVT::i8, Legal); 575 setIndexedStoreAction(im, MVT::i16, Legal); 576 setIndexedStoreAction(im, MVT::i32, Legal); 577 } 578 } 579 580 // i64 operation support. 581 setOperationAction(ISD::MUL, MVT::i64, Expand); 582 setOperationAction(ISD::MULHU, MVT::i32, Expand); 583 if (Subtarget->isThumb1Only()) { 584 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 585 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 586 } 587 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 588 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 589 setOperationAction(ISD::MULHS, MVT::i32, Expand); 590 591 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 592 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 593 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 594 setOperationAction(ISD::SRL, MVT::i64, Custom); 595 setOperationAction(ISD::SRA, MVT::i64, Custom); 596 597 if (!Subtarget->isThumb1Only()) { 598 // FIXME: We should do this for Thumb1 as well. 599 setOperationAction(ISD::ADDC, MVT::i32, Custom); 600 setOperationAction(ISD::ADDE, MVT::i32, Custom); 601 setOperationAction(ISD::SUBC, MVT::i32, Custom); 602 setOperationAction(ISD::SUBE, MVT::i32, Custom); 603 } 604 605 // ARM does not have ROTL. 606 setOperationAction(ISD::ROTL, MVT::i32, Expand); 607 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 608 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 609 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 610 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 611 612 // Only ARMv6 has BSWAP. 613 if (!Subtarget->hasV6Ops()) 614 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 615 616 // These are expanded into libcalls. 617 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 618 // v7M has a hardware divider 619 setOperationAction(ISD::SDIV, MVT::i32, Expand); 620 setOperationAction(ISD::UDIV, MVT::i32, Expand); 621 } 622 setOperationAction(ISD::SREM, MVT::i32, Expand); 623 setOperationAction(ISD::UREM, MVT::i32, Expand); 624 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 625 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 626 627 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 628 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 629 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 630 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 631 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 632 633 setOperationAction(ISD::TRAP, MVT::Other, Legal); 634 635 // Use the default implementation. 636 setOperationAction(ISD::VASTART, MVT::Other, Custom); 637 setOperationAction(ISD::VAARG, MVT::Other, Expand); 638 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 639 setOperationAction(ISD::VAEND, MVT::Other, Expand); 640 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 641 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 642 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 643 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 644 setExceptionPointerRegister(ARM::R0); 645 setExceptionSelectorRegister(ARM::R1); 646 647 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 648 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 649 // the default expansion. 650 // FIXME: This should be checking for v6k, not just v6. 651 if (Subtarget->hasDataBarrier() || 652 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 653 // membarrier needs custom lowering; the rest are legal and handled 654 // normally. 655 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 656 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 657 // Custom lowering for 64-bit ops 658 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 659 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 660 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 661 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 662 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 663 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 664 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 665 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 666 setInsertFencesForAtomic(true); 667 } else { 668 // Set them all for expansion, which will force libcalls. 669 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 670 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 671 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 672 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 673 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 674 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 675 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 676 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 677 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 678 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 679 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 680 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 681 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 682 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 683 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 684 // Unordered/Monotonic case. 685 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 686 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 687 // Since the libcalls include locking, fold in the fences 688 setShouldFoldAtomicFences(true); 689 } 690 691 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 692 693 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 694 if (!Subtarget->hasV6Ops()) { 695 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 696 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 697 } 698 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 699 700 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 701 !Subtarget->isThumb1Only()) { 702 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 703 // iff target supports vfp2. 704 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 705 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 706 } 707 708 // We want to custom lower some of our intrinsics. 709 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 710 if (Subtarget->isTargetDarwin()) { 711 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 712 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 713 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 714 } 715 716 setOperationAction(ISD::SETCC, MVT::i32, Expand); 717 setOperationAction(ISD::SETCC, MVT::f32, Expand); 718 setOperationAction(ISD::SETCC, MVT::f64, Expand); 719 setOperationAction(ISD::SELECT, MVT::i32, Custom); 720 setOperationAction(ISD::SELECT, MVT::f32, Custom); 721 setOperationAction(ISD::SELECT, MVT::f64, Custom); 722 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 723 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 724 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 725 726 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 727 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 728 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 729 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 730 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 731 732 // We don't support sin/cos/fmod/copysign/pow 733 setOperationAction(ISD::FSIN, MVT::f64, Expand); 734 setOperationAction(ISD::FSIN, MVT::f32, Expand); 735 setOperationAction(ISD::FCOS, MVT::f32, Expand); 736 setOperationAction(ISD::FCOS, MVT::f64, Expand); 737 setOperationAction(ISD::FREM, MVT::f64, Expand); 738 setOperationAction(ISD::FREM, MVT::f32, Expand); 739 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 740 !Subtarget->isThumb1Only()) { 741 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 742 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 743 } 744 setOperationAction(ISD::FPOW, MVT::f64, Expand); 745 setOperationAction(ISD::FPOW, MVT::f32, Expand); 746 747 setOperationAction(ISD::FMA, MVT::f64, Expand); 748 setOperationAction(ISD::FMA, MVT::f32, Expand); 749 750 // Various VFP goodness 751 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 752 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 753 if (Subtarget->hasVFP2()) { 754 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 755 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 756 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 757 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 758 } 759 // Special handling for half-precision FP. 760 if (!Subtarget->hasFP16()) { 761 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 762 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 763 } 764 } 765 766 // We have target-specific dag combine patterns for the following nodes: 767 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 768 setTargetDAGCombine(ISD::ADD); 769 setTargetDAGCombine(ISD::SUB); 770 setTargetDAGCombine(ISD::MUL); 771 772 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) 773 setTargetDAGCombine(ISD::OR); 774 if (Subtarget->hasNEON()) 775 setTargetDAGCombine(ISD::AND); 776 777 setStackPointerRegisterToSaveRestore(ARM::SP); 778 779 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 780 !Subtarget->hasVFP2()) 781 setSchedulingPreference(Sched::RegPressure); 782 else 783 setSchedulingPreference(Sched::Hybrid); 784 785 //// temporary - rewrite interface to use type 786 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 787 maxStoresPerMemset = 16; 788 maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 789 790 // On ARM arguments smaller than 4 bytes are extended, so all arguments 791 // are at least 4 bytes aligned. 792 setMinStackArgumentAlignment(4); 793 794 benefitFromCodePlacementOpt = true; 795 796 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 797} 798 799// FIXME: It might make sense to define the representative register class as the 800// nearest super-register that has a non-null superset. For example, DPR_VFP2 is 801// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 802// SPR's representative would be DPR_VFP2. This should work well if register 803// pressure tracking were modified such that a register use would increment the 804// pressure of the register class's representative and all of it's super 805// classes' representatives transitively. We have not implemented this because 806// of the difficulty prior to coalescing of modeling operand register classes 807// due to the common occurrence of cross class copies and subregister insertions 808// and extractions. 809std::pair<const TargetRegisterClass*, uint8_t> 810ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 811 const TargetRegisterClass *RRC = 0; 812 uint8_t Cost = 1; 813 switch (VT.getSimpleVT().SimpleTy) { 814 default: 815 return TargetLowering::findRepresentativeClass(VT); 816 // Use DPR as representative register class for all floating point 817 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 818 // the cost is 1 for both f32 and f64. 819 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 820 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 821 RRC = ARM::DPRRegisterClass; 822 // When NEON is used for SP, only half of the register file is available 823 // because operations that define both SP and DP results will be constrained 824 // to the VFP2 class (D0-D15). We currently model this constraint prior to 825 // coalescing by double-counting the SP regs. See the FIXME above. 826 if (Subtarget->useNEONForSinglePrecisionFP()) 827 Cost = 2; 828 break; 829 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 830 case MVT::v4f32: case MVT::v2f64: 831 RRC = ARM::DPRRegisterClass; 832 Cost = 2; 833 break; 834 case MVT::v4i64: 835 RRC = ARM::DPRRegisterClass; 836 Cost = 4; 837 break; 838 case MVT::v8i64: 839 RRC = ARM::DPRRegisterClass; 840 Cost = 8; 841 break; 842 } 843 return std::make_pair(RRC, Cost); 844} 845 846const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 847 switch (Opcode) { 848 default: return 0; 849 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 850 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 851 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 852 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 853 case ARMISD::CALL: return "ARMISD::CALL"; 854 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 855 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 856 case ARMISD::tCALL: return "ARMISD::tCALL"; 857 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 858 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 859 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 860 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 861 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 862 case ARMISD::CMP: return "ARMISD::CMP"; 863 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 864 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 865 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 866 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 867 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 868 case ARMISD::CMOV: return "ARMISD::CMOV"; 869 870 case ARMISD::RBIT: return "ARMISD::RBIT"; 871 872 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 873 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 874 case ARMISD::SITOF: return "ARMISD::SITOF"; 875 case ARMISD::UITOF: return "ARMISD::UITOF"; 876 877 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 878 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 879 case ARMISD::RRX: return "ARMISD::RRX"; 880 881 case ARMISD::ADDC: return "ARMISD::ADDC"; 882 case ARMISD::ADDE: return "ARMISD::ADDE"; 883 case ARMISD::SUBC: return "ARMISD::SUBC"; 884 case ARMISD::SUBE: return "ARMISD::SUBE"; 885 886 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 887 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 888 889 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 890 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 891 892 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 893 894 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 895 896 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 897 898 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 899 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 900 901 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 902 903 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 904 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 905 case ARMISD::VCGE: return "ARMISD::VCGE"; 906 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 907 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 908 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 909 case ARMISD::VCGT: return "ARMISD::VCGT"; 910 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 911 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 912 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 913 case ARMISD::VTST: return "ARMISD::VTST"; 914 915 case ARMISD::VSHL: return "ARMISD::VSHL"; 916 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 917 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 918 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 919 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 920 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 921 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 922 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 923 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 924 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 925 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 926 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 927 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 928 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 929 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 930 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 931 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 932 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 933 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 934 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 935 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 936 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 937 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 938 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 939 case ARMISD::VDUP: return "ARMISD::VDUP"; 940 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 941 case ARMISD::VEXT: return "ARMISD::VEXT"; 942 case ARMISD::VREV64: return "ARMISD::VREV64"; 943 case ARMISD::VREV32: return "ARMISD::VREV32"; 944 case ARMISD::VREV16: return "ARMISD::VREV16"; 945 case ARMISD::VZIP: return "ARMISD::VZIP"; 946 case ARMISD::VUZP: return "ARMISD::VUZP"; 947 case ARMISD::VTRN: return "ARMISD::VTRN"; 948 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 949 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 950 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 951 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 952 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 953 case ARMISD::FMAX: return "ARMISD::FMAX"; 954 case ARMISD::FMIN: return "ARMISD::FMIN"; 955 case ARMISD::BFI: return "ARMISD::BFI"; 956 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 957 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 958 case ARMISD::VBSL: return "ARMISD::VBSL"; 959 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 960 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 961 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 962 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 963 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 964 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 965 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 966 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 967 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 968 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 969 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 970 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 971 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 972 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 973 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 974 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 975 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 976 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 977 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 978 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 979 } 980} 981 982EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { 983 if (!VT.isVector()) return getPointerTy(); 984 return VT.changeVectorElementTypeToInteger(); 985} 986 987/// getRegClassFor - Return the register class that should be used for the 988/// specified value type. 989TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 990 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 991 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 992 // load / store 4 to 8 consecutive D registers. 993 if (Subtarget->hasNEON()) { 994 if (VT == MVT::v4i64) 995 return ARM::QQPRRegisterClass; 996 else if (VT == MVT::v8i64) 997 return ARM::QQQQPRRegisterClass; 998 } 999 return TargetLowering::getRegClassFor(VT); 1000} 1001 1002// Create a fast isel object. 1003FastISel * 1004ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 1005 return ARM::createFastISel(funcInfo); 1006} 1007 1008/// getMaximalGlobalOffset - Returns the maximal possible offset which can 1009/// be used for loads / stores from the global. 1010unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1011 return (Subtarget->isThumb1Only() ? 127 : 4095); 1012} 1013 1014Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1015 unsigned NumVals = N->getNumValues(); 1016 if (!NumVals) 1017 return Sched::RegPressure; 1018 1019 for (unsigned i = 0; i != NumVals; ++i) { 1020 EVT VT = N->getValueType(i); 1021 if (VT == MVT::Glue || VT == MVT::Other) 1022 continue; 1023 if (VT.isFloatingPoint() || VT.isVector()) 1024 return Sched::ILP; 1025 } 1026 1027 if (!N->isMachineOpcode()) 1028 return Sched::RegPressure; 1029 1030 // Load are scheduled for latency even if there instruction itinerary 1031 // is not available. 1032 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1033 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1034 1035 if (MCID.getNumDefs() == 0) 1036 return Sched::RegPressure; 1037 if (!Itins->isEmpty() && 1038 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1039 return Sched::ILP; 1040 1041 return Sched::RegPressure; 1042} 1043 1044//===----------------------------------------------------------------------===// 1045// Lowering Code 1046//===----------------------------------------------------------------------===// 1047 1048/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1049static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1050 switch (CC) { 1051 default: llvm_unreachable("Unknown condition code!"); 1052 case ISD::SETNE: return ARMCC::NE; 1053 case ISD::SETEQ: return ARMCC::EQ; 1054 case ISD::SETGT: return ARMCC::GT; 1055 case ISD::SETGE: return ARMCC::GE; 1056 case ISD::SETLT: return ARMCC::LT; 1057 case ISD::SETLE: return ARMCC::LE; 1058 case ISD::SETUGT: return ARMCC::HI; 1059 case ISD::SETUGE: return ARMCC::HS; 1060 case ISD::SETULT: return ARMCC::LO; 1061 case ISD::SETULE: return ARMCC::LS; 1062 } 1063} 1064 1065/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1066static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1067 ARMCC::CondCodes &CondCode2) { 1068 CondCode2 = ARMCC::AL; 1069 switch (CC) { 1070 default: llvm_unreachable("Unknown FP condition!"); 1071 case ISD::SETEQ: 1072 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1073 case ISD::SETGT: 1074 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1075 case ISD::SETGE: 1076 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1077 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1078 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1079 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1080 case ISD::SETO: CondCode = ARMCC::VC; break; 1081 case ISD::SETUO: CondCode = ARMCC::VS; break; 1082 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1083 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1084 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1085 case ISD::SETLT: 1086 case ISD::SETULT: CondCode = ARMCC::LT; break; 1087 case ISD::SETLE: 1088 case ISD::SETULE: CondCode = ARMCC::LE; break; 1089 case ISD::SETNE: 1090 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1091 } 1092} 1093 1094//===----------------------------------------------------------------------===// 1095// Calling Convention Implementation 1096//===----------------------------------------------------------------------===// 1097 1098#include "ARMGenCallingConv.inc" 1099 1100/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1101/// given CallingConvention value. 1102CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1103 bool Return, 1104 bool isVarArg) const { 1105 switch (CC) { 1106 default: 1107 llvm_unreachable("Unsupported calling convention"); 1108 case CallingConv::Fast: 1109 if (Subtarget->hasVFP2() && !isVarArg) { 1110 if (!Subtarget->isAAPCS_ABI()) 1111 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1112 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1113 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1114 } 1115 // Fallthrough 1116 case CallingConv::C: { 1117 // Use target triple & subtarget features to do actual dispatch. 1118 if (!Subtarget->isAAPCS_ABI()) 1119 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1120 else if (Subtarget->hasVFP2() && 1121 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1122 !isVarArg) 1123 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1124 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1125 } 1126 case CallingConv::ARM_AAPCS_VFP: 1127 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1128 case CallingConv::ARM_AAPCS: 1129 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1130 case CallingConv::ARM_APCS: 1131 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1132 } 1133} 1134 1135/// LowerCallResult - Lower the result values of a call into the 1136/// appropriate copies out of appropriate physical registers. 1137SDValue 1138ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1139 CallingConv::ID CallConv, bool isVarArg, 1140 const SmallVectorImpl<ISD::InputArg> &Ins, 1141 DebugLoc dl, SelectionDAG &DAG, 1142 SmallVectorImpl<SDValue> &InVals) const { 1143 1144 // Assign locations to each value returned by this call. 1145 SmallVector<CCValAssign, 16> RVLocs; 1146 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1147 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1148 CCInfo.AnalyzeCallResult(Ins, 1149 CCAssignFnForNode(CallConv, /* Return*/ true, 1150 isVarArg)); 1151 1152 // Copy all of the result registers out of their specified physreg. 1153 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1154 CCValAssign VA = RVLocs[i]; 1155 1156 SDValue Val; 1157 if (VA.needsCustom()) { 1158 // Handle f64 or half of a v2f64. 1159 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1160 InFlag); 1161 Chain = Lo.getValue(1); 1162 InFlag = Lo.getValue(2); 1163 VA = RVLocs[++i]; // skip ahead to next loc 1164 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1165 InFlag); 1166 Chain = Hi.getValue(1); 1167 InFlag = Hi.getValue(2); 1168 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1169 1170 if (VA.getLocVT() == MVT::v2f64) { 1171 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1172 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1173 DAG.getConstant(0, MVT::i32)); 1174 1175 VA = RVLocs[++i]; // skip ahead to next loc 1176 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1177 Chain = Lo.getValue(1); 1178 InFlag = Lo.getValue(2); 1179 VA = RVLocs[++i]; // skip ahead to next loc 1180 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1181 Chain = Hi.getValue(1); 1182 InFlag = Hi.getValue(2); 1183 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1184 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1185 DAG.getConstant(1, MVT::i32)); 1186 } 1187 } else { 1188 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1189 InFlag); 1190 Chain = Val.getValue(1); 1191 InFlag = Val.getValue(2); 1192 } 1193 1194 switch (VA.getLocInfo()) { 1195 default: llvm_unreachable("Unknown loc info!"); 1196 case CCValAssign::Full: break; 1197 case CCValAssign::BCvt: 1198 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1199 break; 1200 } 1201 1202 InVals.push_back(Val); 1203 } 1204 1205 return Chain; 1206} 1207 1208/// LowerMemOpCallTo - Store the argument to the stack. 1209SDValue 1210ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1211 SDValue StackPtr, SDValue Arg, 1212 DebugLoc dl, SelectionDAG &DAG, 1213 const CCValAssign &VA, 1214 ISD::ArgFlagsTy Flags) const { 1215 unsigned LocMemOffset = VA.getLocMemOffset(); 1216 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1217 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1218 return DAG.getStore(Chain, dl, Arg, PtrOff, 1219 MachinePointerInfo::getStack(LocMemOffset), 1220 false, false, 0); 1221} 1222 1223void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1224 SDValue Chain, SDValue &Arg, 1225 RegsToPassVector &RegsToPass, 1226 CCValAssign &VA, CCValAssign &NextVA, 1227 SDValue &StackPtr, 1228 SmallVector<SDValue, 8> &MemOpChains, 1229 ISD::ArgFlagsTy Flags) const { 1230 1231 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1232 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1233 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1234 1235 if (NextVA.isRegLoc()) 1236 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1237 else { 1238 assert(NextVA.isMemLoc()); 1239 if (StackPtr.getNode() == 0) 1240 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1241 1242 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1243 dl, DAG, NextVA, 1244 Flags)); 1245 } 1246} 1247 1248/// LowerCall - Lowering a call into a callseq_start <- 1249/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1250/// nodes. 1251SDValue 1252ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1253 CallingConv::ID CallConv, bool isVarArg, 1254 bool &isTailCall, 1255 const SmallVectorImpl<ISD::OutputArg> &Outs, 1256 const SmallVectorImpl<SDValue> &OutVals, 1257 const SmallVectorImpl<ISD::InputArg> &Ins, 1258 DebugLoc dl, SelectionDAG &DAG, 1259 SmallVectorImpl<SDValue> &InVals) const { 1260 MachineFunction &MF = DAG.getMachineFunction(); 1261 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1262 bool IsSibCall = false; 1263 // Disable tail calls if they're not supported. 1264 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1265 isTailCall = false; 1266 if (isTailCall) { 1267 // Check if it's really possible to do a tail call. 1268 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1269 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1270 Outs, OutVals, Ins, DAG); 1271 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1272 // detected sibcalls. 1273 if (isTailCall) { 1274 ++NumTailCalls; 1275 IsSibCall = true; 1276 } 1277 } 1278 1279 // Analyze operands of the call, assigning locations to each operand. 1280 SmallVector<CCValAssign, 16> ArgLocs; 1281 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1282 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1283 CCInfo.AnalyzeCallOperands(Outs, 1284 CCAssignFnForNode(CallConv, /* Return*/ false, 1285 isVarArg)); 1286 1287 // Get a count of how many bytes are to be pushed on the stack. 1288 unsigned NumBytes = CCInfo.getNextStackOffset(); 1289 1290 // For tail calls, memory operands are available in our caller's stack. 1291 if (IsSibCall) 1292 NumBytes = 0; 1293 1294 // Adjust the stack pointer for the new arguments... 1295 // These operations are automatically eliminated by the prolog/epilog pass 1296 if (!IsSibCall) 1297 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1298 1299 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1300 1301 RegsToPassVector RegsToPass; 1302 SmallVector<SDValue, 8> MemOpChains; 1303 1304 // Walk the register/memloc assignments, inserting copies/loads. In the case 1305 // of tail call optimization, arguments are handled later. 1306 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1307 i != e; 1308 ++i, ++realArgIdx) { 1309 CCValAssign &VA = ArgLocs[i]; 1310 SDValue Arg = OutVals[realArgIdx]; 1311 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1312 bool isByVal = Flags.isByVal(); 1313 1314 // Promote the value if needed. 1315 switch (VA.getLocInfo()) { 1316 default: llvm_unreachable("Unknown loc info!"); 1317 case CCValAssign::Full: break; 1318 case CCValAssign::SExt: 1319 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1320 break; 1321 case CCValAssign::ZExt: 1322 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1323 break; 1324 case CCValAssign::AExt: 1325 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1326 break; 1327 case CCValAssign::BCvt: 1328 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1329 break; 1330 } 1331 1332 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1333 if (VA.needsCustom()) { 1334 if (VA.getLocVT() == MVT::v2f64) { 1335 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1336 DAG.getConstant(0, MVT::i32)); 1337 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1338 DAG.getConstant(1, MVT::i32)); 1339 1340 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1341 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1342 1343 VA = ArgLocs[++i]; // skip ahead to next loc 1344 if (VA.isRegLoc()) { 1345 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1346 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1347 } else { 1348 assert(VA.isMemLoc()); 1349 1350 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1351 dl, DAG, VA, Flags)); 1352 } 1353 } else { 1354 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1355 StackPtr, MemOpChains, Flags); 1356 } 1357 } else if (VA.isRegLoc()) { 1358 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1359 } else if (isByVal) { 1360 assert(VA.isMemLoc()); 1361 unsigned offset = 0; 1362 1363 // True if this byval aggregate will be split between registers 1364 // and memory. 1365 if (CCInfo.isFirstByValRegValid()) { 1366 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1367 unsigned int i, j; 1368 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1369 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1370 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1371 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1372 MachinePointerInfo(), 1373 false, false, false, 0); 1374 MemOpChains.push_back(Load.getValue(1)); 1375 RegsToPass.push_back(std::make_pair(j, Load)); 1376 } 1377 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1378 CCInfo.clearFirstByValReg(); 1379 } 1380 1381 unsigned LocMemOffset = VA.getLocMemOffset(); 1382 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1383 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1384 StkPtrOff); 1385 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1386 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1387 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1388 MVT::i32); 1389 MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, 1390 Flags.getByValAlign(), 1391 /*isVolatile=*/false, 1392 /*AlwaysInline=*/false, 1393 MachinePointerInfo(0), 1394 MachinePointerInfo(0))); 1395 1396 } else if (!IsSibCall) { 1397 assert(VA.isMemLoc()); 1398 1399 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1400 dl, DAG, VA, Flags)); 1401 } 1402 } 1403 1404 if (!MemOpChains.empty()) 1405 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1406 &MemOpChains[0], MemOpChains.size()); 1407 1408 // Build a sequence of copy-to-reg nodes chained together with token chain 1409 // and flag operands which copy the outgoing args into the appropriate regs. 1410 SDValue InFlag; 1411 // Tail call byval lowering might overwrite argument registers so in case of 1412 // tail call optimization the copies to registers are lowered later. 1413 if (!isTailCall) 1414 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1415 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1416 RegsToPass[i].second, InFlag); 1417 InFlag = Chain.getValue(1); 1418 } 1419 1420 // For tail calls lower the arguments to the 'real' stack slot. 1421 if (isTailCall) { 1422 // Force all the incoming stack arguments to be loaded from the stack 1423 // before any new outgoing arguments are stored to the stack, because the 1424 // outgoing stack slots may alias the incoming argument stack slots, and 1425 // the alias isn't otherwise explicit. This is slightly more conservative 1426 // than necessary, because it means that each store effectively depends 1427 // on every argument instead of just those arguments it would clobber. 1428 1429 // Do not flag preceding copytoreg stuff together with the following stuff. 1430 InFlag = SDValue(); 1431 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1432 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1433 RegsToPass[i].second, InFlag); 1434 InFlag = Chain.getValue(1); 1435 } 1436 InFlag =SDValue(); 1437 } 1438 1439 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1440 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1441 // node so that legalize doesn't hack it. 1442 bool isDirect = false; 1443 bool isARMFunc = false; 1444 bool isLocalARMFunc = false; 1445 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1446 1447 if (EnableARMLongCalls) { 1448 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1449 && "long-calls with non-static relocation model!"); 1450 // Handle a global address or an external symbol. If it's not one of 1451 // those, the target's already in a register, so we don't need to do 1452 // anything extra. 1453 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1454 const GlobalValue *GV = G->getGlobal(); 1455 // Create a constant pool entry for the callee address 1456 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1457 ARMConstantPoolValue *CPV = 1458 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1459 1460 // Get the address of the callee into a register 1461 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1462 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1463 Callee = DAG.getLoad(getPointerTy(), dl, 1464 DAG.getEntryNode(), CPAddr, 1465 MachinePointerInfo::getConstantPool(), 1466 false, false, false, 0); 1467 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1468 const char *Sym = S->getSymbol(); 1469 1470 // Create a constant pool entry for the callee address 1471 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1472 ARMConstantPoolValue *CPV = 1473 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1474 ARMPCLabelIndex, 0); 1475 // Get the address of the callee into a register 1476 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1477 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1478 Callee = DAG.getLoad(getPointerTy(), dl, 1479 DAG.getEntryNode(), CPAddr, 1480 MachinePointerInfo::getConstantPool(), 1481 false, false, false, 0); 1482 } 1483 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1484 const GlobalValue *GV = G->getGlobal(); 1485 isDirect = true; 1486 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1487 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1488 getTargetMachine().getRelocationModel() != Reloc::Static; 1489 isARMFunc = !Subtarget->isThumb() || isStub; 1490 // ARM call to a local ARM function is predicable. 1491 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1492 // tBX takes a register source operand. 1493 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1494 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1495 ARMConstantPoolValue *CPV = 1496 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1497 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1498 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1499 Callee = DAG.getLoad(getPointerTy(), dl, 1500 DAG.getEntryNode(), CPAddr, 1501 MachinePointerInfo::getConstantPool(), 1502 false, false, false, 0); 1503 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1504 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1505 getPointerTy(), Callee, PICLabel); 1506 } else { 1507 // On ELF targets for PIC code, direct calls should go through the PLT 1508 unsigned OpFlags = 0; 1509 if (Subtarget->isTargetELF() && 1510 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1511 OpFlags = ARMII::MO_PLT; 1512 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1513 } 1514 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1515 isDirect = true; 1516 bool isStub = Subtarget->isTargetDarwin() && 1517 getTargetMachine().getRelocationModel() != Reloc::Static; 1518 isARMFunc = !Subtarget->isThumb() || isStub; 1519 // tBX takes a register source operand. 1520 const char *Sym = S->getSymbol(); 1521 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1522 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1523 ARMConstantPoolValue *CPV = 1524 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1525 ARMPCLabelIndex, 4); 1526 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1527 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1528 Callee = DAG.getLoad(getPointerTy(), dl, 1529 DAG.getEntryNode(), CPAddr, 1530 MachinePointerInfo::getConstantPool(), 1531 false, false, false, 0); 1532 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1533 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1534 getPointerTy(), Callee, PICLabel); 1535 } else { 1536 unsigned OpFlags = 0; 1537 // On ELF targets for PIC code, direct calls should go through the PLT 1538 if (Subtarget->isTargetELF() && 1539 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1540 OpFlags = ARMII::MO_PLT; 1541 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1542 } 1543 } 1544 1545 // FIXME: handle tail calls differently. 1546 unsigned CallOpc; 1547 if (Subtarget->isThumb()) { 1548 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1549 CallOpc = ARMISD::CALL_NOLINK; 1550 else 1551 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1552 } else { 1553 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1554 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1555 : ARMISD::CALL_NOLINK; 1556 } 1557 1558 std::vector<SDValue> Ops; 1559 Ops.push_back(Chain); 1560 Ops.push_back(Callee); 1561 1562 // Add argument registers to the end of the list so that they are known live 1563 // into the call. 1564 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1565 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1566 RegsToPass[i].second.getValueType())); 1567 1568 if (InFlag.getNode()) 1569 Ops.push_back(InFlag); 1570 1571 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1572 if (isTailCall) 1573 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1574 1575 // Returns a chain and a flag for retval copy to use. 1576 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1577 InFlag = Chain.getValue(1); 1578 1579 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1580 DAG.getIntPtrConstant(0, true), InFlag); 1581 if (!Ins.empty()) 1582 InFlag = Chain.getValue(1); 1583 1584 // Handle result values, copying them out of physregs into vregs that we 1585 // return. 1586 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1587 dl, DAG, InVals); 1588} 1589 1590/// HandleByVal - Every parameter *after* a byval parameter is passed 1591/// on the stack. Remember the next parameter register to allocate, 1592/// and then confiscate the rest of the parameter registers to insure 1593/// this. 1594void 1595llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { 1596 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1597 assert((State->getCallOrPrologue() == Prologue || 1598 State->getCallOrPrologue() == Call) && 1599 "unhandled ParmContext"); 1600 if ((!State->isFirstByValRegValid()) && 1601 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1602 State->setFirstByValReg(reg); 1603 // At a call site, a byval parameter that is split between 1604 // registers and memory needs its size truncated here. In a 1605 // function prologue, such byval parameters are reassembled in 1606 // memory, and are not truncated. 1607 if (State->getCallOrPrologue() == Call) { 1608 unsigned excess = 4 * (ARM::R4 - reg); 1609 assert(size >= excess && "expected larger existing stack allocation"); 1610 size -= excess; 1611 } 1612 } 1613 // Confiscate any remaining parameter registers to preclude their 1614 // assignment to subsequent parameters. 1615 while (State->AllocateReg(GPRArgRegs, 4)) 1616 ; 1617} 1618 1619/// MatchingStackOffset - Return true if the given stack call argument is 1620/// already available in the same position (relatively) of the caller's 1621/// incoming argument stack. 1622static 1623bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1624 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1625 const ARMInstrInfo *TII) { 1626 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1627 int FI = INT_MAX; 1628 if (Arg.getOpcode() == ISD::CopyFromReg) { 1629 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1630 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1631 return false; 1632 MachineInstr *Def = MRI->getVRegDef(VR); 1633 if (!Def) 1634 return false; 1635 if (!Flags.isByVal()) { 1636 if (!TII->isLoadFromStackSlot(Def, FI)) 1637 return false; 1638 } else { 1639 return false; 1640 } 1641 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1642 if (Flags.isByVal()) 1643 // ByVal argument is passed in as a pointer but it's now being 1644 // dereferenced. e.g. 1645 // define @foo(%struct.X* %A) { 1646 // tail call @bar(%struct.X* byval %A) 1647 // } 1648 return false; 1649 SDValue Ptr = Ld->getBasePtr(); 1650 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1651 if (!FINode) 1652 return false; 1653 FI = FINode->getIndex(); 1654 } else 1655 return false; 1656 1657 assert(FI != INT_MAX); 1658 if (!MFI->isFixedObjectIndex(FI)) 1659 return false; 1660 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1661} 1662 1663/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1664/// for tail call optimization. Targets which want to do tail call 1665/// optimization should implement this function. 1666bool 1667ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1668 CallingConv::ID CalleeCC, 1669 bool isVarArg, 1670 bool isCalleeStructRet, 1671 bool isCallerStructRet, 1672 const SmallVectorImpl<ISD::OutputArg> &Outs, 1673 const SmallVectorImpl<SDValue> &OutVals, 1674 const SmallVectorImpl<ISD::InputArg> &Ins, 1675 SelectionDAG& DAG) const { 1676 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1677 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1678 bool CCMatch = CallerCC == CalleeCC; 1679 1680 // Look for obvious safe cases to perform tail call optimization that do not 1681 // require ABI changes. This is what gcc calls sibcall. 1682 1683 // Do not sibcall optimize vararg calls unless the call site is not passing 1684 // any arguments. 1685 if (isVarArg && !Outs.empty()) 1686 return false; 1687 1688 // Also avoid sibcall optimization if either caller or callee uses struct 1689 // return semantics. 1690 if (isCalleeStructRet || isCallerStructRet) 1691 return false; 1692 1693 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1694 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1695 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1696 // support in the assembler and linker to be used. This would need to be 1697 // fixed to fully support tail calls in Thumb1. 1698 // 1699 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1700 // LR. This means if we need to reload LR, it takes an extra instructions, 1701 // which outweighs the value of the tail call; but here we don't know yet 1702 // whether LR is going to be used. Probably the right approach is to 1703 // generate the tail call here and turn it back into CALL/RET in 1704 // emitEpilogue if LR is used. 1705 1706 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1707 // but we need to make sure there are enough registers; the only valid 1708 // registers are the 4 used for parameters. We don't currently do this 1709 // case. 1710 if (Subtarget->isThumb1Only()) 1711 return false; 1712 1713 // If the calling conventions do not match, then we'd better make sure the 1714 // results are returned in the same way as what the caller expects. 1715 if (!CCMatch) { 1716 SmallVector<CCValAssign, 16> RVLocs1; 1717 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1718 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1719 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1720 1721 SmallVector<CCValAssign, 16> RVLocs2; 1722 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1723 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1724 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1725 1726 if (RVLocs1.size() != RVLocs2.size()) 1727 return false; 1728 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1729 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1730 return false; 1731 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1732 return false; 1733 if (RVLocs1[i].isRegLoc()) { 1734 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1735 return false; 1736 } else { 1737 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1738 return false; 1739 } 1740 } 1741 } 1742 1743 // If the callee takes no arguments then go on to check the results of the 1744 // call. 1745 if (!Outs.empty()) { 1746 // Check if stack adjustment is needed. For now, do not do this if any 1747 // argument is passed on the stack. 1748 SmallVector<CCValAssign, 16> ArgLocs; 1749 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1750 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1751 CCInfo.AnalyzeCallOperands(Outs, 1752 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1753 if (CCInfo.getNextStackOffset()) { 1754 MachineFunction &MF = DAG.getMachineFunction(); 1755 1756 // Check if the arguments are already laid out in the right way as 1757 // the caller's fixed stack objects. 1758 MachineFrameInfo *MFI = MF.getFrameInfo(); 1759 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1760 const ARMInstrInfo *TII = 1761 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1762 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1763 i != e; 1764 ++i, ++realArgIdx) { 1765 CCValAssign &VA = ArgLocs[i]; 1766 EVT RegVT = VA.getLocVT(); 1767 SDValue Arg = OutVals[realArgIdx]; 1768 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1769 if (VA.getLocInfo() == CCValAssign::Indirect) 1770 return false; 1771 if (VA.needsCustom()) { 1772 // f64 and vector types are split into multiple registers or 1773 // register/stack-slot combinations. The types will not match 1774 // the registers; give up on memory f64 refs until we figure 1775 // out what to do about this. 1776 if (!VA.isRegLoc()) 1777 return false; 1778 if (!ArgLocs[++i].isRegLoc()) 1779 return false; 1780 if (RegVT == MVT::v2f64) { 1781 if (!ArgLocs[++i].isRegLoc()) 1782 return false; 1783 if (!ArgLocs[++i].isRegLoc()) 1784 return false; 1785 } 1786 } else if (!VA.isRegLoc()) { 1787 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1788 MFI, MRI, TII)) 1789 return false; 1790 } 1791 } 1792 } 1793 } 1794 1795 return true; 1796} 1797 1798SDValue 1799ARMTargetLowering::LowerReturn(SDValue Chain, 1800 CallingConv::ID CallConv, bool isVarArg, 1801 const SmallVectorImpl<ISD::OutputArg> &Outs, 1802 const SmallVectorImpl<SDValue> &OutVals, 1803 DebugLoc dl, SelectionDAG &DAG) const { 1804 1805 // CCValAssign - represent the assignment of the return value to a location. 1806 SmallVector<CCValAssign, 16> RVLocs; 1807 1808 // CCState - Info about the registers and stack slots. 1809 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1810 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1811 1812 // Analyze outgoing return values. 1813 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1814 isVarArg)); 1815 1816 // If this is the first return lowered for this function, add 1817 // the regs to the liveout set for the function. 1818 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1819 for (unsigned i = 0; i != RVLocs.size(); ++i) 1820 if (RVLocs[i].isRegLoc()) 1821 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1822 } 1823 1824 SDValue Flag; 1825 1826 // Copy the result values into the output registers. 1827 for (unsigned i = 0, realRVLocIdx = 0; 1828 i != RVLocs.size(); 1829 ++i, ++realRVLocIdx) { 1830 CCValAssign &VA = RVLocs[i]; 1831 assert(VA.isRegLoc() && "Can only return in registers!"); 1832 1833 SDValue Arg = OutVals[realRVLocIdx]; 1834 1835 switch (VA.getLocInfo()) { 1836 default: llvm_unreachable("Unknown loc info!"); 1837 case CCValAssign::Full: break; 1838 case CCValAssign::BCvt: 1839 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1840 break; 1841 } 1842 1843 if (VA.needsCustom()) { 1844 if (VA.getLocVT() == MVT::v2f64) { 1845 // Extract the first half and return it in two registers. 1846 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1847 DAG.getConstant(0, MVT::i32)); 1848 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1849 DAG.getVTList(MVT::i32, MVT::i32), Half); 1850 1851 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1852 Flag = Chain.getValue(1); 1853 VA = RVLocs[++i]; // skip ahead to next loc 1854 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1855 HalfGPRs.getValue(1), Flag); 1856 Flag = Chain.getValue(1); 1857 VA = RVLocs[++i]; // skip ahead to next loc 1858 1859 // Extract the 2nd half and fall through to handle it as an f64 value. 1860 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1861 DAG.getConstant(1, MVT::i32)); 1862 } 1863 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1864 // available. 1865 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1866 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1867 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1868 Flag = Chain.getValue(1); 1869 VA = RVLocs[++i]; // skip ahead to next loc 1870 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1871 Flag); 1872 } else 1873 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1874 1875 // Guarantee that all emitted copies are 1876 // stuck together, avoiding something bad. 1877 Flag = Chain.getValue(1); 1878 } 1879 1880 SDValue result; 1881 if (Flag.getNode()) 1882 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1883 else // Return Void 1884 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1885 1886 return result; 1887} 1888 1889bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { 1890 if (N->getNumValues() != 1) 1891 return false; 1892 if (!N->hasNUsesOfValue(1, 0)) 1893 return false; 1894 1895 unsigned NumCopies = 0; 1896 SDNode* Copies[2]; 1897 SDNode *Use = *N->use_begin(); 1898 if (Use->getOpcode() == ISD::CopyToReg) { 1899 Copies[NumCopies++] = Use; 1900 } else if (Use->getOpcode() == ARMISD::VMOVRRD) { 1901 // f64 returned in a pair of GPRs. 1902 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); 1903 UI != UE; ++UI) { 1904 if (UI->getOpcode() != ISD::CopyToReg) 1905 return false; 1906 Copies[UI.getUse().getResNo()] = *UI; 1907 ++NumCopies; 1908 } 1909 } else if (Use->getOpcode() == ISD::BITCAST) { 1910 // f32 returned in a single GPR. 1911 if (!Use->hasNUsesOfValue(1, 0)) 1912 return false; 1913 Use = *Use->use_begin(); 1914 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) 1915 return false; 1916 Copies[NumCopies++] = Use; 1917 } else { 1918 return false; 1919 } 1920 1921 if (NumCopies != 1 && NumCopies != 2) 1922 return false; 1923 1924 bool HasRet = false; 1925 for (unsigned i = 0; i < NumCopies; ++i) { 1926 SDNode *Copy = Copies[i]; 1927 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1928 UI != UE; ++UI) { 1929 if (UI->getOpcode() == ISD::CopyToReg) { 1930 SDNode *Use = *UI; 1931 if (Use == Copies[0] || Use == Copies[1]) 1932 continue; 1933 return false; 1934 } 1935 if (UI->getOpcode() != ARMISD::RET_FLAG) 1936 return false; 1937 HasRet = true; 1938 } 1939 } 1940 1941 return HasRet; 1942} 1943 1944bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 1945 if (!EnableARMTailCalls) 1946 return false; 1947 1948 if (!CI->isTailCall()) 1949 return false; 1950 1951 return !Subtarget->isThumb1Only(); 1952} 1953 1954// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1955// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1956// one of the above mentioned nodes. It has to be wrapped because otherwise 1957// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1958// be used to form addressing mode. These wrapped nodes will be selected 1959// into MOVi. 1960static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1961 EVT PtrVT = Op.getValueType(); 1962 // FIXME there is no actual debug info here 1963 DebugLoc dl = Op.getDebugLoc(); 1964 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1965 SDValue Res; 1966 if (CP->isMachineConstantPoolEntry()) 1967 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1968 CP->getAlignment()); 1969 else 1970 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1971 CP->getAlignment()); 1972 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1973} 1974 1975unsigned ARMTargetLowering::getJumpTableEncoding() const { 1976 return MachineJumpTableInfo::EK_Inline; 1977} 1978 1979SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1980 SelectionDAG &DAG) const { 1981 MachineFunction &MF = DAG.getMachineFunction(); 1982 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1983 unsigned ARMPCLabelIndex = 0; 1984 DebugLoc DL = Op.getDebugLoc(); 1985 EVT PtrVT = getPointerTy(); 1986 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1987 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1988 SDValue CPAddr; 1989 if (RelocM == Reloc::Static) { 1990 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1991 } else { 1992 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1993 ARMPCLabelIndex = AFI->createPICLabelUId(); 1994 ARMConstantPoolValue *CPV = 1995 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 1996 ARMCP::CPBlockAddress, PCAdj); 1997 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1998 } 1999 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2000 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2001 MachinePointerInfo::getConstantPool(), 2002 false, false, false, 0); 2003 if (RelocM == Reloc::Static) 2004 return Result; 2005 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2006 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2007} 2008 2009// Lower ISD::GlobalTLSAddress using the "general dynamic" model 2010SDValue 2011ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2012 SelectionDAG &DAG) const { 2013 DebugLoc dl = GA->getDebugLoc(); 2014 EVT PtrVT = getPointerTy(); 2015 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2016 MachineFunction &MF = DAG.getMachineFunction(); 2017 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2018 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2019 ARMConstantPoolValue *CPV = 2020 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2021 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2022 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2023 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2024 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2025 MachinePointerInfo::getConstantPool(), 2026 false, false, false, 0); 2027 SDValue Chain = Argument.getValue(1); 2028 2029 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2030 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2031 2032 // call __tls_get_addr. 2033 ArgListTy Args; 2034 ArgListEntry Entry; 2035 Entry.Node = Argument; 2036 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2037 Args.push_back(Entry); 2038 // FIXME: is there useful debug info available here? 2039 std::pair<SDValue, SDValue> CallResult = 2040 LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), 2041 false, false, false, false, 2042 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 2043 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2044 return CallResult.first; 2045} 2046 2047// Lower ISD::GlobalTLSAddress using the "initial exec" or 2048// "local exec" model. 2049SDValue 2050ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2051 SelectionDAG &DAG) const { 2052 const GlobalValue *GV = GA->getGlobal(); 2053 DebugLoc dl = GA->getDebugLoc(); 2054 SDValue Offset; 2055 SDValue Chain = DAG.getEntryNode(); 2056 EVT PtrVT = getPointerTy(); 2057 // Get the Thread Pointer 2058 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2059 2060 if (GV->isDeclaration()) { 2061 MachineFunction &MF = DAG.getMachineFunction(); 2062 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2063 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2064 // Initial exec model. 2065 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2066 ARMConstantPoolValue *CPV = 2067 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2068 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2069 true); 2070 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2071 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2072 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2073 MachinePointerInfo::getConstantPool(), 2074 false, false, false, 0); 2075 Chain = Offset.getValue(1); 2076 2077 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2078 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2079 2080 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2081 MachinePointerInfo::getConstantPool(), 2082 false, false, false, 0); 2083 } else { 2084 // local exec model 2085 ARMConstantPoolValue *CPV = 2086 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2087 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2088 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2089 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2090 MachinePointerInfo::getConstantPool(), 2091 false, false, false, 0); 2092 } 2093 2094 // The address of the thread local variable is the add of the thread 2095 // pointer with the offset of the variable. 2096 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2097} 2098 2099SDValue 2100ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2101 // TODO: implement the "local dynamic" model 2102 assert(Subtarget->isTargetELF() && 2103 "TLS not implemented for non-ELF targets"); 2104 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2105 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 2106 // otherwise use the "Local Exec" TLS Model 2107 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2108 return LowerToTLSGeneralDynamicModel(GA, DAG); 2109 else 2110 return LowerToTLSExecModels(GA, DAG); 2111} 2112 2113SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2114 SelectionDAG &DAG) const { 2115 EVT PtrVT = getPointerTy(); 2116 DebugLoc dl = Op.getDebugLoc(); 2117 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2118 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2119 if (RelocM == Reloc::PIC_) { 2120 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2121 ARMConstantPoolValue *CPV = 2122 ARMConstantPoolConstant::Create(GV, 2123 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2124 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2125 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2126 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2127 CPAddr, 2128 MachinePointerInfo::getConstantPool(), 2129 false, false, false, 0); 2130 SDValue Chain = Result.getValue(1); 2131 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2132 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2133 if (!UseGOTOFF) 2134 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2135 MachinePointerInfo::getGOT(), 2136 false, false, false, 0); 2137 return Result; 2138 } 2139 2140 // If we have T2 ops, we can materialize the address directly via movt/movw 2141 // pair. This is always cheaper. 2142 if (Subtarget->useMovt()) { 2143 ++NumMovwMovt; 2144 // FIXME: Once remat is capable of dealing with instructions with register 2145 // operands, expand this into two nodes. 2146 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2147 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2148 } else { 2149 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2150 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2151 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2152 MachinePointerInfo::getConstantPool(), 2153 false, false, false, 0); 2154 } 2155} 2156 2157SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2158 SelectionDAG &DAG) const { 2159 EVT PtrVT = getPointerTy(); 2160 DebugLoc dl = Op.getDebugLoc(); 2161 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2162 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2163 MachineFunction &MF = DAG.getMachineFunction(); 2164 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2165 2166 // FIXME: Enable this for static codegen when tool issues are fixed. 2167 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2168 ++NumMovwMovt; 2169 // FIXME: Once remat is capable of dealing with instructions with register 2170 // operands, expand this into two nodes. 2171 if (RelocM == Reloc::Static) 2172 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2173 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2174 2175 unsigned Wrapper = (RelocM == Reloc::PIC_) 2176 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2177 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2178 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2179 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2180 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2181 MachinePointerInfo::getGOT(), 2182 false, false, false, 0); 2183 return Result; 2184 } 2185 2186 unsigned ARMPCLabelIndex = 0; 2187 SDValue CPAddr; 2188 if (RelocM == Reloc::Static) { 2189 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2190 } else { 2191 ARMPCLabelIndex = AFI->createPICLabelUId(); 2192 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2193 ARMConstantPoolValue *CPV = 2194 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2195 PCAdj); 2196 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2197 } 2198 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2199 2200 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2201 MachinePointerInfo::getConstantPool(), 2202 false, false, false, 0); 2203 SDValue Chain = Result.getValue(1); 2204 2205 if (RelocM == Reloc::PIC_) { 2206 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2207 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2208 } 2209 2210 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2211 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2212 false, false, false, 0); 2213 2214 return Result; 2215} 2216 2217SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2218 SelectionDAG &DAG) const { 2219 assert(Subtarget->isTargetELF() && 2220 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2221 MachineFunction &MF = DAG.getMachineFunction(); 2222 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2223 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2224 EVT PtrVT = getPointerTy(); 2225 DebugLoc dl = Op.getDebugLoc(); 2226 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2227 ARMConstantPoolValue *CPV = 2228 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2229 ARMPCLabelIndex, PCAdj); 2230 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2231 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2232 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2233 MachinePointerInfo::getConstantPool(), 2234 false, false, false, 0); 2235 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2236 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2237} 2238 2239SDValue 2240ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2241 DebugLoc dl = Op.getDebugLoc(); 2242 SDValue Val = DAG.getConstant(0, MVT::i32); 2243 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2244 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2245 Op.getOperand(1), Val); 2246} 2247 2248SDValue 2249ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2250 DebugLoc dl = Op.getDebugLoc(); 2251 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2252 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2253} 2254 2255SDValue 2256ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2257 const ARMSubtarget *Subtarget) const { 2258 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2259 DebugLoc dl = Op.getDebugLoc(); 2260 switch (IntNo) { 2261 default: return SDValue(); // Don't custom lower most intrinsics. 2262 case Intrinsic::arm_thread_pointer: { 2263 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2264 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2265 } 2266 case Intrinsic::eh_sjlj_lsda: { 2267 MachineFunction &MF = DAG.getMachineFunction(); 2268 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2269 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2270 EVT PtrVT = getPointerTy(); 2271 DebugLoc dl = Op.getDebugLoc(); 2272 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2273 SDValue CPAddr; 2274 unsigned PCAdj = (RelocM != Reloc::PIC_) 2275 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2276 ARMConstantPoolValue *CPV = 2277 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2278 ARMCP::CPLSDA, PCAdj); 2279 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2280 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2281 SDValue Result = 2282 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2283 MachinePointerInfo::getConstantPool(), 2284 false, false, false, 0); 2285 2286 if (RelocM == Reloc::PIC_) { 2287 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2288 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2289 } 2290 return Result; 2291 } 2292 case Intrinsic::arm_neon_vmulls: 2293 case Intrinsic::arm_neon_vmullu: { 2294 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2295 ? ARMISD::VMULLs : ARMISD::VMULLu; 2296 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2297 Op.getOperand(1), Op.getOperand(2)); 2298 } 2299 } 2300} 2301 2302static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2303 const ARMSubtarget *Subtarget) { 2304 DebugLoc dl = Op.getDebugLoc(); 2305 if (!Subtarget->hasDataBarrier()) { 2306 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2307 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2308 // here. 2309 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2310 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2311 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2312 DAG.getConstant(0, MVT::i32)); 2313 } 2314 2315 SDValue Op5 = Op.getOperand(5); 2316 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2317 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2318 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2319 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2320 2321 ARM_MB::MemBOpt DMBOpt; 2322 if (isDeviceBarrier) 2323 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2324 else 2325 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2326 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2327 DAG.getConstant(DMBOpt, MVT::i32)); 2328} 2329 2330 2331static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2332 const ARMSubtarget *Subtarget) { 2333 // FIXME: handle "fence singlethread" more efficiently. 2334 DebugLoc dl = Op.getDebugLoc(); 2335 if (!Subtarget->hasDataBarrier()) { 2336 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2337 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2338 // here. 2339 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2340 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2341 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2342 DAG.getConstant(0, MVT::i32)); 2343 } 2344 2345 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2346 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 2347} 2348 2349static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2350 const ARMSubtarget *Subtarget) { 2351 // ARM pre v5TE and Thumb1 does not have preload instructions. 2352 if (!(Subtarget->isThumb2() || 2353 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2354 // Just preserve the chain. 2355 return Op.getOperand(0); 2356 2357 DebugLoc dl = Op.getDebugLoc(); 2358 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2359 if (!isRead && 2360 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2361 // ARMv7 with MP extension has PLDW. 2362 return Op.getOperand(0); 2363 2364 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2365 if (Subtarget->isThumb()) { 2366 // Invert the bits. 2367 isRead = ~isRead & 1; 2368 isData = ~isData & 1; 2369 } 2370 2371 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2372 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2373 DAG.getConstant(isData, MVT::i32)); 2374} 2375 2376static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2377 MachineFunction &MF = DAG.getMachineFunction(); 2378 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2379 2380 // vastart just stores the address of the VarArgsFrameIndex slot into the 2381 // memory location argument. 2382 DebugLoc dl = Op.getDebugLoc(); 2383 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2384 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2385 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2386 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2387 MachinePointerInfo(SV), false, false, 0); 2388} 2389 2390SDValue 2391ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2392 SDValue &Root, SelectionDAG &DAG, 2393 DebugLoc dl) const { 2394 MachineFunction &MF = DAG.getMachineFunction(); 2395 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2396 2397 TargetRegisterClass *RC; 2398 if (AFI->isThumb1OnlyFunction()) 2399 RC = ARM::tGPRRegisterClass; 2400 else 2401 RC = ARM::GPRRegisterClass; 2402 2403 // Transform the arguments stored in physical registers into virtual ones. 2404 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2405 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2406 2407 SDValue ArgValue2; 2408 if (NextVA.isMemLoc()) { 2409 MachineFrameInfo *MFI = MF.getFrameInfo(); 2410 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2411 2412 // Create load node to retrieve arguments from the stack. 2413 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2414 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2415 MachinePointerInfo::getFixedStack(FI), 2416 false, false, false, 0); 2417 } else { 2418 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2419 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2420 } 2421 2422 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2423} 2424 2425void 2426ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2427 unsigned &VARegSize, unsigned &VARegSaveSize) 2428 const { 2429 unsigned NumGPRs; 2430 if (CCInfo.isFirstByValRegValid()) 2431 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2432 else { 2433 unsigned int firstUnalloced; 2434 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2435 sizeof(GPRArgRegs) / 2436 sizeof(GPRArgRegs[0])); 2437 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2438 } 2439 2440 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2441 VARegSize = NumGPRs * 4; 2442 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2443} 2444 2445// The remaining GPRs hold either the beginning of variable-argument 2446// data, or the beginning of an aggregate passed by value (usuall 2447// byval). Either way, we allocate stack slots adjacent to the data 2448// provided by our caller, and store the unallocated registers there. 2449// If this is a variadic function, the va_list pointer will begin with 2450// these values; otherwise, this reassembles a (byval) structure that 2451// was split between registers and memory. 2452void 2453ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2454 DebugLoc dl, SDValue &Chain, 2455 unsigned ArgOffset) const { 2456 MachineFunction &MF = DAG.getMachineFunction(); 2457 MachineFrameInfo *MFI = MF.getFrameInfo(); 2458 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2459 unsigned firstRegToSaveIndex; 2460 if (CCInfo.isFirstByValRegValid()) 2461 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2462 else { 2463 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2464 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2465 } 2466 2467 unsigned VARegSize, VARegSaveSize; 2468 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2469 if (VARegSaveSize) { 2470 // If this function is vararg, store any remaining integer argument regs 2471 // to their spots on the stack so that they may be loaded by deferencing 2472 // the result of va_next. 2473 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2474 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2475 ArgOffset + VARegSaveSize 2476 - VARegSize, 2477 false)); 2478 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2479 getPointerTy()); 2480 2481 SmallVector<SDValue, 4> MemOps; 2482 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { 2483 TargetRegisterClass *RC; 2484 if (AFI->isThumb1OnlyFunction()) 2485 RC = ARM::tGPRRegisterClass; 2486 else 2487 RC = ARM::GPRRegisterClass; 2488 2489 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2490 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2491 SDValue Store = 2492 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2493 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2494 false, false, 0); 2495 MemOps.push_back(Store); 2496 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2497 DAG.getConstant(4, getPointerTy())); 2498 } 2499 if (!MemOps.empty()) 2500 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2501 &MemOps[0], MemOps.size()); 2502 } else 2503 // This will point to the next argument passed via stack. 2504 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2505} 2506 2507SDValue 2508ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2509 CallingConv::ID CallConv, bool isVarArg, 2510 const SmallVectorImpl<ISD::InputArg> 2511 &Ins, 2512 DebugLoc dl, SelectionDAG &DAG, 2513 SmallVectorImpl<SDValue> &InVals) 2514 const { 2515 MachineFunction &MF = DAG.getMachineFunction(); 2516 MachineFrameInfo *MFI = MF.getFrameInfo(); 2517 2518 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2519 2520 // Assign locations to all of the incoming arguments. 2521 SmallVector<CCValAssign, 16> ArgLocs; 2522 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2523 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2524 CCInfo.AnalyzeFormalArguments(Ins, 2525 CCAssignFnForNode(CallConv, /* Return*/ false, 2526 isVarArg)); 2527 2528 SmallVector<SDValue, 16> ArgValues; 2529 int lastInsIndex = -1; 2530 2531 SDValue ArgValue; 2532 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2533 CCValAssign &VA = ArgLocs[i]; 2534 2535 // Arguments stored in registers. 2536 if (VA.isRegLoc()) { 2537 EVT RegVT = VA.getLocVT(); 2538 2539 if (VA.needsCustom()) { 2540 // f64 and vector types are split up into multiple registers or 2541 // combinations of registers and stack slots. 2542 if (VA.getLocVT() == MVT::v2f64) { 2543 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2544 Chain, DAG, dl); 2545 VA = ArgLocs[++i]; // skip ahead to next loc 2546 SDValue ArgValue2; 2547 if (VA.isMemLoc()) { 2548 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2549 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2550 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2551 MachinePointerInfo::getFixedStack(FI), 2552 false, false, false, 0); 2553 } else { 2554 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2555 Chain, DAG, dl); 2556 } 2557 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2558 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2559 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2560 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2561 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2562 } else 2563 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2564 2565 } else { 2566 TargetRegisterClass *RC; 2567 2568 if (RegVT == MVT::f32) 2569 RC = ARM::SPRRegisterClass; 2570 else if (RegVT == MVT::f64) 2571 RC = ARM::DPRRegisterClass; 2572 else if (RegVT == MVT::v2f64) 2573 RC = ARM::QPRRegisterClass; 2574 else if (RegVT == MVT::i32) 2575 RC = (AFI->isThumb1OnlyFunction() ? 2576 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2577 else 2578 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2579 2580 // Transform the arguments in physical registers into virtual ones. 2581 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2582 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2583 } 2584 2585 // If this is an 8 or 16-bit value, it is really passed promoted 2586 // to 32 bits. Insert an assert[sz]ext to capture this, then 2587 // truncate to the right size. 2588 switch (VA.getLocInfo()) { 2589 default: llvm_unreachable("Unknown loc info!"); 2590 case CCValAssign::Full: break; 2591 case CCValAssign::BCvt: 2592 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2593 break; 2594 case CCValAssign::SExt: 2595 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2596 DAG.getValueType(VA.getValVT())); 2597 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2598 break; 2599 case CCValAssign::ZExt: 2600 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2601 DAG.getValueType(VA.getValVT())); 2602 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2603 break; 2604 } 2605 2606 InVals.push_back(ArgValue); 2607 2608 } else { // VA.isRegLoc() 2609 2610 // sanity check 2611 assert(VA.isMemLoc()); 2612 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2613 2614 int index = ArgLocs[i].getValNo(); 2615 2616 // Some Ins[] entries become multiple ArgLoc[] entries. 2617 // Process them only once. 2618 if (index != lastInsIndex) 2619 { 2620 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2621 // FIXME: For now, all byval parameter objects are marked mutable. 2622 // This can be changed with more analysis. 2623 // In case of tail call optimization mark all arguments mutable. 2624 // Since they could be overwritten by lowering of arguments in case of 2625 // a tail call. 2626 if (Flags.isByVal()) { 2627 unsigned VARegSize, VARegSaveSize; 2628 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2629 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); 2630 unsigned Bytes = Flags.getByValSize() - VARegSize; 2631 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2632 int FI = MFI->CreateFixedObject(Bytes, 2633 VA.getLocMemOffset(), false); 2634 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2635 } else { 2636 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2637 VA.getLocMemOffset(), true); 2638 2639 // Create load nodes to retrieve arguments from the stack. 2640 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2641 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2642 MachinePointerInfo::getFixedStack(FI), 2643 false, false, false, 0)); 2644 } 2645 lastInsIndex = index; 2646 } 2647 } 2648 } 2649 2650 // varargs 2651 if (isVarArg) 2652 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); 2653 2654 return Chain; 2655} 2656 2657/// isFloatingPointZero - Return true if this is +0.0. 2658static bool isFloatingPointZero(SDValue Op) { 2659 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2660 return CFP->getValueAPF().isPosZero(); 2661 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2662 // Maybe this has already been legalized into the constant pool? 2663 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2664 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2665 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2666 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2667 return CFP->getValueAPF().isPosZero(); 2668 } 2669 } 2670 return false; 2671} 2672 2673/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2674/// the given operands. 2675SDValue 2676ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2677 SDValue &ARMcc, SelectionDAG &DAG, 2678 DebugLoc dl) const { 2679 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2680 unsigned C = RHSC->getZExtValue(); 2681 if (!isLegalICmpImmediate(C)) { 2682 // Constant does not fit, try adjusting it by one? 2683 switch (CC) { 2684 default: break; 2685 case ISD::SETLT: 2686 case ISD::SETGE: 2687 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2688 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2689 RHS = DAG.getConstant(C-1, MVT::i32); 2690 } 2691 break; 2692 case ISD::SETULT: 2693 case ISD::SETUGE: 2694 if (C != 0 && isLegalICmpImmediate(C-1)) { 2695 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2696 RHS = DAG.getConstant(C-1, MVT::i32); 2697 } 2698 break; 2699 case ISD::SETLE: 2700 case ISD::SETGT: 2701 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2702 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2703 RHS = DAG.getConstant(C+1, MVT::i32); 2704 } 2705 break; 2706 case ISD::SETULE: 2707 case ISD::SETUGT: 2708 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2709 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2710 RHS = DAG.getConstant(C+1, MVT::i32); 2711 } 2712 break; 2713 } 2714 } 2715 } 2716 2717 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2718 ARMISD::NodeType CompareType; 2719 switch (CondCode) { 2720 default: 2721 CompareType = ARMISD::CMP; 2722 break; 2723 case ARMCC::EQ: 2724 case ARMCC::NE: 2725 // Uses only Z Flag 2726 CompareType = ARMISD::CMPZ; 2727 break; 2728 } 2729 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2730 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2731} 2732 2733/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2734SDValue 2735ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2736 DebugLoc dl) const { 2737 SDValue Cmp; 2738 if (!isFloatingPointZero(RHS)) 2739 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2740 else 2741 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2742 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2743} 2744 2745/// duplicateCmp - Glue values can have only one use, so this function 2746/// duplicates a comparison node. 2747SDValue 2748ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2749 unsigned Opc = Cmp.getOpcode(); 2750 DebugLoc DL = Cmp.getDebugLoc(); 2751 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2752 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2753 2754 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2755 Cmp = Cmp.getOperand(0); 2756 Opc = Cmp.getOpcode(); 2757 if (Opc == ARMISD::CMPFP) 2758 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2759 else { 2760 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2761 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2762 } 2763 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2764} 2765 2766SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2767 SDValue Cond = Op.getOperand(0); 2768 SDValue SelectTrue = Op.getOperand(1); 2769 SDValue SelectFalse = Op.getOperand(2); 2770 DebugLoc dl = Op.getDebugLoc(); 2771 2772 // Convert: 2773 // 2774 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2775 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2776 // 2777 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2778 const ConstantSDNode *CMOVTrue = 2779 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2780 const ConstantSDNode *CMOVFalse = 2781 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2782 2783 if (CMOVTrue && CMOVFalse) { 2784 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2785 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2786 2787 SDValue True; 2788 SDValue False; 2789 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2790 True = SelectTrue; 2791 False = SelectFalse; 2792 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2793 True = SelectFalse; 2794 False = SelectTrue; 2795 } 2796 2797 if (True.getNode() && False.getNode()) { 2798 EVT VT = Op.getValueType(); 2799 SDValue ARMcc = Cond.getOperand(2); 2800 SDValue CCR = Cond.getOperand(3); 2801 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2802 assert(True.getValueType() == VT); 2803 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2804 } 2805 } 2806 } 2807 2808 return DAG.getSelectCC(dl, Cond, 2809 DAG.getConstant(0, Cond.getValueType()), 2810 SelectTrue, SelectFalse, ISD::SETNE); 2811} 2812 2813SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2814 EVT VT = Op.getValueType(); 2815 SDValue LHS = Op.getOperand(0); 2816 SDValue RHS = Op.getOperand(1); 2817 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2818 SDValue TrueVal = Op.getOperand(2); 2819 SDValue FalseVal = Op.getOperand(3); 2820 DebugLoc dl = Op.getDebugLoc(); 2821 2822 if (LHS.getValueType() == MVT::i32) { 2823 SDValue ARMcc; 2824 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2825 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2826 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2827 } 2828 2829 ARMCC::CondCodes CondCode, CondCode2; 2830 FPCCToARMCC(CC, CondCode, CondCode2); 2831 2832 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2833 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2834 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2835 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2836 ARMcc, CCR, Cmp); 2837 if (CondCode2 != ARMCC::AL) { 2838 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2839 // FIXME: Needs another CMP because flag can have but one use. 2840 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2841 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2842 Result, TrueVal, ARMcc2, CCR, Cmp2); 2843 } 2844 return Result; 2845} 2846 2847/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2848/// to morph to an integer compare sequence. 2849static bool canChangeToInt(SDValue Op, bool &SeenZero, 2850 const ARMSubtarget *Subtarget) { 2851 SDNode *N = Op.getNode(); 2852 if (!N->hasOneUse()) 2853 // Otherwise it requires moving the value from fp to integer registers. 2854 return false; 2855 if (!N->getNumValues()) 2856 return false; 2857 EVT VT = Op.getValueType(); 2858 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2859 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2860 // vmrs are very slow, e.g. cortex-a8. 2861 return false; 2862 2863 if (isFloatingPointZero(Op)) { 2864 SeenZero = true; 2865 return true; 2866 } 2867 return ISD::isNormalLoad(N); 2868} 2869 2870static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2871 if (isFloatingPointZero(Op)) 2872 return DAG.getConstant(0, MVT::i32); 2873 2874 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2875 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2876 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2877 Ld->isVolatile(), Ld->isNonTemporal(), 2878 Ld->isInvariant(), Ld->getAlignment()); 2879 2880 llvm_unreachable("Unknown VFP cmp argument!"); 2881} 2882 2883static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2884 SDValue &RetVal1, SDValue &RetVal2) { 2885 if (isFloatingPointZero(Op)) { 2886 RetVal1 = DAG.getConstant(0, MVT::i32); 2887 RetVal2 = DAG.getConstant(0, MVT::i32); 2888 return; 2889 } 2890 2891 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2892 SDValue Ptr = Ld->getBasePtr(); 2893 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2894 Ld->getChain(), Ptr, 2895 Ld->getPointerInfo(), 2896 Ld->isVolatile(), Ld->isNonTemporal(), 2897 Ld->isInvariant(), Ld->getAlignment()); 2898 2899 EVT PtrType = Ptr.getValueType(); 2900 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2901 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2902 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2903 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2904 Ld->getChain(), NewPtr, 2905 Ld->getPointerInfo().getWithOffset(4), 2906 Ld->isVolatile(), Ld->isNonTemporal(), 2907 Ld->isInvariant(), NewAlign); 2908 return; 2909 } 2910 2911 llvm_unreachable("Unknown VFP cmp argument!"); 2912} 2913 2914/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2915/// f32 and even f64 comparisons to integer ones. 2916SDValue 2917ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2918 SDValue Chain = Op.getOperand(0); 2919 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2920 SDValue LHS = Op.getOperand(2); 2921 SDValue RHS = Op.getOperand(3); 2922 SDValue Dest = Op.getOperand(4); 2923 DebugLoc dl = Op.getDebugLoc(); 2924 2925 bool SeenZero = false; 2926 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2927 canChangeToInt(RHS, SeenZero, Subtarget) && 2928 // If one of the operand is zero, it's safe to ignore the NaN case since 2929 // we only care about equality comparisons. 2930 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2931 // If unsafe fp math optimization is enabled and there are no other uses of 2932 // the CMP operands, and the condition code is EQ or NE, we can optimize it 2933 // to an integer comparison. 2934 if (CC == ISD::SETOEQ) 2935 CC = ISD::SETEQ; 2936 else if (CC == ISD::SETUNE) 2937 CC = ISD::SETNE; 2938 2939 SDValue ARMcc; 2940 if (LHS.getValueType() == MVT::f32) { 2941 LHS = bitcastf32Toi32(LHS, DAG); 2942 RHS = bitcastf32Toi32(RHS, DAG); 2943 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2944 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2945 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2946 Chain, Dest, ARMcc, CCR, Cmp); 2947 } 2948 2949 SDValue LHS1, LHS2; 2950 SDValue RHS1, RHS2; 2951 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2952 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2953 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2954 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2955 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2956 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2957 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2958 } 2959 2960 return SDValue(); 2961} 2962 2963SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2964 SDValue Chain = Op.getOperand(0); 2965 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2966 SDValue LHS = Op.getOperand(2); 2967 SDValue RHS = Op.getOperand(3); 2968 SDValue Dest = Op.getOperand(4); 2969 DebugLoc dl = Op.getDebugLoc(); 2970 2971 if (LHS.getValueType() == MVT::i32) { 2972 SDValue ARMcc; 2973 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2974 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2975 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2976 Chain, Dest, ARMcc, CCR, Cmp); 2977 } 2978 2979 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2980 2981 if (getTargetMachine().Options.UnsafeFPMath && 2982 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2983 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2984 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2985 if (Result.getNode()) 2986 return Result; 2987 } 2988 2989 ARMCC::CondCodes CondCode, CondCode2; 2990 FPCCToARMCC(CC, CondCode, CondCode2); 2991 2992 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2993 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2994 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2995 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2996 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2997 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2998 if (CondCode2 != ARMCC::AL) { 2999 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3000 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3001 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3002 } 3003 return Res; 3004} 3005 3006SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3007 SDValue Chain = Op.getOperand(0); 3008 SDValue Table = Op.getOperand(1); 3009 SDValue Index = Op.getOperand(2); 3010 DebugLoc dl = Op.getDebugLoc(); 3011 3012 EVT PTy = getPointerTy(); 3013 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3014 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3015 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3016 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3017 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3018 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3019 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3020 if (Subtarget->isThumb2()) { 3021 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3022 // which does another jump to the destination. This also makes it easier 3023 // to translate it to TBB / TBH later. 3024 // FIXME: This might not work if the function is extremely large. 3025 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3026 Addr, Op.getOperand(2), JTI, UId); 3027 } 3028 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3029 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3030 MachinePointerInfo::getJumpTable(), 3031 false, false, false, 0); 3032 Chain = Addr.getValue(1); 3033 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3034 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3035 } else { 3036 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3037 MachinePointerInfo::getJumpTable(), 3038 false, false, false, 0); 3039 Chain = Addr.getValue(1); 3040 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3041 } 3042} 3043 3044static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3045 EVT VT = Op.getValueType(); 3046 assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering"); 3047 3048 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3049 return Op; 3050 return DAG.UnrollVectorOp(Op.getNode()); 3051} 3052 3053static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3054 EVT VT = Op.getValueType(); 3055 if (VT.isVector()) 3056 return LowerVectorFP_TO_INT(Op, DAG); 3057 3058 DebugLoc dl = Op.getDebugLoc(); 3059 unsigned Opc; 3060 3061 switch (Op.getOpcode()) { 3062 default: 3063 assert(0 && "Invalid opcode!"); 3064 case ISD::FP_TO_SINT: 3065 Opc = ARMISD::FTOSI; 3066 break; 3067 case ISD::FP_TO_UINT: 3068 Opc = ARMISD::FTOUI; 3069 break; 3070 } 3071 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3072 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3073} 3074 3075static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3076 EVT VT = Op.getValueType(); 3077 DebugLoc dl = Op.getDebugLoc(); 3078 3079 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3080 if (VT.getVectorElementType() == MVT::f32) 3081 return Op; 3082 return DAG.UnrollVectorOp(Op.getNode()); 3083 } 3084 3085 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3086 "Invalid type for custom lowering!"); 3087 if (VT != MVT::v4f32) 3088 return DAG.UnrollVectorOp(Op.getNode()); 3089 3090 unsigned CastOpc; 3091 unsigned Opc; 3092 switch (Op.getOpcode()) { 3093 default: 3094 assert(0 && "Invalid opcode!"); 3095 case ISD::SINT_TO_FP: 3096 CastOpc = ISD::SIGN_EXTEND; 3097 Opc = ISD::SINT_TO_FP; 3098 break; 3099 case ISD::UINT_TO_FP: 3100 CastOpc = ISD::ZERO_EXTEND; 3101 Opc = ISD::UINT_TO_FP; 3102 break; 3103 } 3104 3105 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3106 return DAG.getNode(Opc, dl, VT, Op); 3107} 3108 3109static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3110 EVT VT = Op.getValueType(); 3111 if (VT.isVector()) 3112 return LowerVectorINT_TO_FP(Op, DAG); 3113 3114 DebugLoc dl = Op.getDebugLoc(); 3115 unsigned Opc; 3116 3117 switch (Op.getOpcode()) { 3118 default: 3119 assert(0 && "Invalid opcode!"); 3120 case ISD::SINT_TO_FP: 3121 Opc = ARMISD::SITOF; 3122 break; 3123 case ISD::UINT_TO_FP: 3124 Opc = ARMISD::UITOF; 3125 break; 3126 } 3127 3128 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3129 return DAG.getNode(Opc, dl, VT, Op); 3130} 3131 3132SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3133 // Implement fcopysign with a fabs and a conditional fneg. 3134 SDValue Tmp0 = Op.getOperand(0); 3135 SDValue Tmp1 = Op.getOperand(1); 3136 DebugLoc dl = Op.getDebugLoc(); 3137 EVT VT = Op.getValueType(); 3138 EVT SrcVT = Tmp1.getValueType(); 3139 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3140 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3141 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3142 3143 if (UseNEON) { 3144 // Use VBSL to copy the sign bit. 3145 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3146 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3147 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3148 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3149 if (VT == MVT::f64) 3150 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3151 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3152 DAG.getConstant(32, MVT::i32)); 3153 else /*if (VT == MVT::f32)*/ 3154 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3155 if (SrcVT == MVT::f32) { 3156 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3157 if (VT == MVT::f64) 3158 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3159 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3160 DAG.getConstant(32, MVT::i32)); 3161 } else if (VT == MVT::f32) 3162 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3163 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3164 DAG.getConstant(32, MVT::i32)); 3165 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3166 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3167 3168 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3169 MVT::i32); 3170 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3171 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3172 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3173 3174 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3175 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3176 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3177 if (VT == MVT::f32) { 3178 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3179 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3180 DAG.getConstant(0, MVT::i32)); 3181 } else { 3182 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3183 } 3184 3185 return Res; 3186 } 3187 3188 // Bitcast operand 1 to i32. 3189 if (SrcVT == MVT::f64) 3190 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3191 &Tmp1, 1).getValue(1); 3192 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3193 3194 // Or in the signbit with integer operations. 3195 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3196 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3197 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3198 if (VT == MVT::f32) { 3199 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3200 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3201 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3202 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3203 } 3204 3205 // f64: Or the high part with signbit and then combine two parts. 3206 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3207 &Tmp0, 1); 3208 SDValue Lo = Tmp0.getValue(0); 3209 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3210 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3211 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3212} 3213 3214SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3215 MachineFunction &MF = DAG.getMachineFunction(); 3216 MachineFrameInfo *MFI = MF.getFrameInfo(); 3217 MFI->setReturnAddressIsTaken(true); 3218 3219 EVT VT = Op.getValueType(); 3220 DebugLoc dl = Op.getDebugLoc(); 3221 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3222 if (Depth) { 3223 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3224 SDValue Offset = DAG.getConstant(4, MVT::i32); 3225 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3226 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3227 MachinePointerInfo(), false, false, false, 0); 3228 } 3229 3230 // Return LR, which contains the return address. Mark it an implicit live-in. 3231 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3232 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3233} 3234 3235SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3236 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3237 MFI->setFrameAddressIsTaken(true); 3238 3239 EVT VT = Op.getValueType(); 3240 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3241 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3242 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3243 ? ARM::R7 : ARM::R11; 3244 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3245 while (Depth--) 3246 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3247 MachinePointerInfo(), 3248 false, false, false, 0); 3249 return FrameAddr; 3250} 3251 3252/// ExpandBITCAST - If the target supports VFP, this function is called to 3253/// expand a bit convert where either the source or destination type is i64 to 3254/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3255/// operand type is illegal (e.g., v2f32 for a target that doesn't support 3256/// vectors), since the legalizer won't know what to do with that. 3257static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3258 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3259 DebugLoc dl = N->getDebugLoc(); 3260 SDValue Op = N->getOperand(0); 3261 3262 // This function is only supposed to be called for i64 types, either as the 3263 // source or destination of the bit convert. 3264 EVT SrcVT = Op.getValueType(); 3265 EVT DstVT = N->getValueType(0); 3266 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3267 "ExpandBITCAST called for non-i64 type"); 3268 3269 // Turn i64->f64 into VMOVDRR. 3270 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3271 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3272 DAG.getConstant(0, MVT::i32)); 3273 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3274 DAG.getConstant(1, MVT::i32)); 3275 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3276 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3277 } 3278 3279 // Turn f64->i64 into VMOVRRD. 3280 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3281 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3282 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3283 // Merge the pieces into a single i64 value. 3284 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3285 } 3286 3287 return SDValue(); 3288} 3289 3290/// getZeroVector - Returns a vector of specified type with all zero elements. 3291/// Zero vectors are used to represent vector negation and in those cases 3292/// will be implemented with the NEON VNEG instruction. However, VNEG does 3293/// not support i64 elements, so sometimes the zero vectors will need to be 3294/// explicitly constructed. Regardless, use a canonical VMOV to create the 3295/// zero vector. 3296static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3297 assert(VT.isVector() && "Expected a vector type"); 3298 // The canonical modified immediate encoding of a zero vector is....0! 3299 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3300 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3301 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3302 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3303} 3304 3305/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3306/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3307SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3308 SelectionDAG &DAG) const { 3309 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3310 EVT VT = Op.getValueType(); 3311 unsigned VTBits = VT.getSizeInBits(); 3312 DebugLoc dl = Op.getDebugLoc(); 3313 SDValue ShOpLo = Op.getOperand(0); 3314 SDValue ShOpHi = Op.getOperand(1); 3315 SDValue ShAmt = Op.getOperand(2); 3316 SDValue ARMcc; 3317 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3318 3319 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3320 3321 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3322 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3323 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3324 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3325 DAG.getConstant(VTBits, MVT::i32)); 3326 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3327 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3328 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3329 3330 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3331 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3332 ARMcc, DAG, dl); 3333 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3334 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3335 CCR, Cmp); 3336 3337 SDValue Ops[2] = { Lo, Hi }; 3338 return DAG.getMergeValues(Ops, 2, dl); 3339} 3340 3341/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3342/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3343SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3344 SelectionDAG &DAG) const { 3345 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3346 EVT VT = Op.getValueType(); 3347 unsigned VTBits = VT.getSizeInBits(); 3348 DebugLoc dl = Op.getDebugLoc(); 3349 SDValue ShOpLo = Op.getOperand(0); 3350 SDValue ShOpHi = Op.getOperand(1); 3351 SDValue ShAmt = Op.getOperand(2); 3352 SDValue ARMcc; 3353 3354 assert(Op.getOpcode() == ISD::SHL_PARTS); 3355 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3356 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3357 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3358 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3359 DAG.getConstant(VTBits, MVT::i32)); 3360 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3361 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3362 3363 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3364 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3365 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3366 ARMcc, DAG, dl); 3367 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3368 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3369 CCR, Cmp); 3370 3371 SDValue Ops[2] = { Lo, Hi }; 3372 return DAG.getMergeValues(Ops, 2, dl); 3373} 3374 3375SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3376 SelectionDAG &DAG) const { 3377 // The rounding mode is in bits 23:22 of the FPSCR. 3378 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3379 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3380 // so that the shift + and get folded into a bitfield extract. 3381 DebugLoc dl = Op.getDebugLoc(); 3382 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3383 DAG.getConstant(Intrinsic::arm_get_fpscr, 3384 MVT::i32)); 3385 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3386 DAG.getConstant(1U << 22, MVT::i32)); 3387 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3388 DAG.getConstant(22, MVT::i32)); 3389 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3390 DAG.getConstant(3, MVT::i32)); 3391} 3392 3393static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3394 const ARMSubtarget *ST) { 3395 EVT VT = N->getValueType(0); 3396 DebugLoc dl = N->getDebugLoc(); 3397 3398 if (!ST->hasV6T2Ops()) 3399 return SDValue(); 3400 3401 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3402 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3403} 3404 3405static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3406 const ARMSubtarget *ST) { 3407 EVT VT = N->getValueType(0); 3408 DebugLoc dl = N->getDebugLoc(); 3409 3410 if (!VT.isVector()) 3411 return SDValue(); 3412 3413 // Lower vector shifts on NEON to use VSHL. 3414 assert(ST->hasNEON() && "unexpected vector shift"); 3415 3416 // Left shifts translate directly to the vshiftu intrinsic. 3417 if (N->getOpcode() == ISD::SHL) 3418 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3419 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3420 N->getOperand(0), N->getOperand(1)); 3421 3422 assert((N->getOpcode() == ISD::SRA || 3423 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3424 3425 // NEON uses the same intrinsics for both left and right shifts. For 3426 // right shifts, the shift amounts are negative, so negate the vector of 3427 // shift amounts. 3428 EVT ShiftVT = N->getOperand(1).getValueType(); 3429 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3430 getZeroVector(ShiftVT, DAG, dl), 3431 N->getOperand(1)); 3432 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3433 Intrinsic::arm_neon_vshifts : 3434 Intrinsic::arm_neon_vshiftu); 3435 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3436 DAG.getConstant(vshiftInt, MVT::i32), 3437 N->getOperand(0), NegatedCount); 3438} 3439 3440static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3441 const ARMSubtarget *ST) { 3442 EVT VT = N->getValueType(0); 3443 DebugLoc dl = N->getDebugLoc(); 3444 3445 // We can get here for a node like i32 = ISD::SHL i32, i64 3446 if (VT != MVT::i64) 3447 return SDValue(); 3448 3449 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3450 "Unknown shift to lower!"); 3451 3452 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3453 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3454 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3455 return SDValue(); 3456 3457 // If we are in thumb mode, we don't have RRX. 3458 if (ST->isThumb1Only()) return SDValue(); 3459 3460 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3461 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3462 DAG.getConstant(0, MVT::i32)); 3463 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3464 DAG.getConstant(1, MVT::i32)); 3465 3466 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3467 // captures the result into a carry flag. 3468 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3469 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3470 3471 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3472 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3473 3474 // Merge the pieces into a single i64 value. 3475 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3476} 3477 3478static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3479 SDValue TmpOp0, TmpOp1; 3480 bool Invert = false; 3481 bool Swap = false; 3482 unsigned Opc = 0; 3483 3484 SDValue Op0 = Op.getOperand(0); 3485 SDValue Op1 = Op.getOperand(1); 3486 SDValue CC = Op.getOperand(2); 3487 EVT VT = Op.getValueType(); 3488 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3489 DebugLoc dl = Op.getDebugLoc(); 3490 3491 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3492 switch (SetCCOpcode) { 3493 default: llvm_unreachable("Illegal FP comparison"); break; 3494 case ISD::SETUNE: 3495 case ISD::SETNE: Invert = true; // Fallthrough 3496 case ISD::SETOEQ: 3497 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3498 case ISD::SETOLT: 3499 case ISD::SETLT: Swap = true; // Fallthrough 3500 case ISD::SETOGT: 3501 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3502 case ISD::SETOLE: 3503 case ISD::SETLE: Swap = true; // Fallthrough 3504 case ISD::SETOGE: 3505 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3506 case ISD::SETUGE: Swap = true; // Fallthrough 3507 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3508 case ISD::SETUGT: Swap = true; // Fallthrough 3509 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3510 case ISD::SETUEQ: Invert = true; // Fallthrough 3511 case ISD::SETONE: 3512 // Expand this to (OLT | OGT). 3513 TmpOp0 = Op0; 3514 TmpOp1 = Op1; 3515 Opc = ISD::OR; 3516 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3517 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3518 break; 3519 case ISD::SETUO: Invert = true; // Fallthrough 3520 case ISD::SETO: 3521 // Expand this to (OLT | OGE). 3522 TmpOp0 = Op0; 3523 TmpOp1 = Op1; 3524 Opc = ISD::OR; 3525 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3526 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3527 break; 3528 } 3529 } else { 3530 // Integer comparisons. 3531 switch (SetCCOpcode) { 3532 default: llvm_unreachable("Illegal integer comparison"); break; 3533 case ISD::SETNE: Invert = true; 3534 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3535 case ISD::SETLT: Swap = true; 3536 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3537 case ISD::SETLE: Swap = true; 3538 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3539 case ISD::SETULT: Swap = true; 3540 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3541 case ISD::SETULE: Swap = true; 3542 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3543 } 3544 3545 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3546 if (Opc == ARMISD::VCEQ) { 3547 3548 SDValue AndOp; 3549 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3550 AndOp = Op0; 3551 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3552 AndOp = Op1; 3553 3554 // Ignore bitconvert. 3555 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3556 AndOp = AndOp.getOperand(0); 3557 3558 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3559 Opc = ARMISD::VTST; 3560 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3561 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3562 Invert = !Invert; 3563 } 3564 } 3565 } 3566 3567 if (Swap) 3568 std::swap(Op0, Op1); 3569 3570 // If one of the operands is a constant vector zero, attempt to fold the 3571 // comparison to a specialized compare-against-zero form. 3572 SDValue SingleOp; 3573 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3574 SingleOp = Op0; 3575 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3576 if (Opc == ARMISD::VCGE) 3577 Opc = ARMISD::VCLEZ; 3578 else if (Opc == ARMISD::VCGT) 3579 Opc = ARMISD::VCLTZ; 3580 SingleOp = Op1; 3581 } 3582 3583 SDValue Result; 3584 if (SingleOp.getNode()) { 3585 switch (Opc) { 3586 case ARMISD::VCEQ: 3587 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3588 case ARMISD::VCGE: 3589 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3590 case ARMISD::VCLEZ: 3591 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3592 case ARMISD::VCGT: 3593 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3594 case ARMISD::VCLTZ: 3595 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3596 default: 3597 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3598 } 3599 } else { 3600 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3601 } 3602 3603 if (Invert) 3604 Result = DAG.getNOT(dl, Result, VT); 3605 3606 return Result; 3607} 3608 3609/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3610/// valid vector constant for a NEON instruction with a "modified immediate" 3611/// operand (e.g., VMOV). If so, return the encoded value. 3612static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3613 unsigned SplatBitSize, SelectionDAG &DAG, 3614 EVT &VT, bool is128Bits, NEONModImmType type) { 3615 unsigned OpCmode, Imm; 3616 3617 // SplatBitSize is set to the smallest size that splats the vector, so a 3618 // zero vector will always have SplatBitSize == 8. However, NEON modified 3619 // immediate instructions others than VMOV do not support the 8-bit encoding 3620 // of a zero vector, and the default encoding of zero is supposed to be the 3621 // 32-bit version. 3622 if (SplatBits == 0) 3623 SplatBitSize = 32; 3624 3625 switch (SplatBitSize) { 3626 case 8: 3627 if (type != VMOVModImm) 3628 return SDValue(); 3629 // Any 1-byte value is OK. Op=0, Cmode=1110. 3630 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3631 OpCmode = 0xe; 3632 Imm = SplatBits; 3633 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3634 break; 3635 3636 case 16: 3637 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3638 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3639 if ((SplatBits & ~0xff) == 0) { 3640 // Value = 0x00nn: Op=x, Cmode=100x. 3641 OpCmode = 0x8; 3642 Imm = SplatBits; 3643 break; 3644 } 3645 if ((SplatBits & ~0xff00) == 0) { 3646 // Value = 0xnn00: Op=x, Cmode=101x. 3647 OpCmode = 0xa; 3648 Imm = SplatBits >> 8; 3649 break; 3650 } 3651 return SDValue(); 3652 3653 case 32: 3654 // NEON's 32-bit VMOV supports splat values where: 3655 // * only one byte is nonzero, or 3656 // * the least significant byte is 0xff and the second byte is nonzero, or 3657 // * the least significant 2 bytes are 0xff and the third is nonzero. 3658 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3659 if ((SplatBits & ~0xff) == 0) { 3660 // Value = 0x000000nn: Op=x, Cmode=000x. 3661 OpCmode = 0; 3662 Imm = SplatBits; 3663 break; 3664 } 3665 if ((SplatBits & ~0xff00) == 0) { 3666 // Value = 0x0000nn00: Op=x, Cmode=001x. 3667 OpCmode = 0x2; 3668 Imm = SplatBits >> 8; 3669 break; 3670 } 3671 if ((SplatBits & ~0xff0000) == 0) { 3672 // Value = 0x00nn0000: Op=x, Cmode=010x. 3673 OpCmode = 0x4; 3674 Imm = SplatBits >> 16; 3675 break; 3676 } 3677 if ((SplatBits & ~0xff000000) == 0) { 3678 // Value = 0xnn000000: Op=x, Cmode=011x. 3679 OpCmode = 0x6; 3680 Imm = SplatBits >> 24; 3681 break; 3682 } 3683 3684 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 3685 if (type == OtherModImm) return SDValue(); 3686 3687 if ((SplatBits & ~0xffff) == 0 && 3688 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3689 // Value = 0x0000nnff: Op=x, Cmode=1100. 3690 OpCmode = 0xc; 3691 Imm = SplatBits >> 8; 3692 SplatBits |= 0xff; 3693 break; 3694 } 3695 3696 if ((SplatBits & ~0xffffff) == 0 && 3697 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3698 // Value = 0x00nnffff: Op=x, Cmode=1101. 3699 OpCmode = 0xd; 3700 Imm = SplatBits >> 16; 3701 SplatBits |= 0xffff; 3702 break; 3703 } 3704 3705 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3706 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3707 // VMOV.I32. A (very) minor optimization would be to replicate the value 3708 // and fall through here to test for a valid 64-bit splat. But, then the 3709 // caller would also need to check and handle the change in size. 3710 return SDValue(); 3711 3712 case 64: { 3713 if (type != VMOVModImm) 3714 return SDValue(); 3715 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3716 uint64_t BitMask = 0xff; 3717 uint64_t Val = 0; 3718 unsigned ImmMask = 1; 3719 Imm = 0; 3720 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3721 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3722 Val |= BitMask; 3723 Imm |= ImmMask; 3724 } else if ((SplatBits & BitMask) != 0) { 3725 return SDValue(); 3726 } 3727 BitMask <<= 8; 3728 ImmMask <<= 1; 3729 } 3730 // Op=1, Cmode=1110. 3731 OpCmode = 0x1e; 3732 SplatBits = Val; 3733 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3734 break; 3735 } 3736 3737 default: 3738 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3739 return SDValue(); 3740 } 3741 3742 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3743 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3744} 3745 3746static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3747 bool &ReverseVEXT, unsigned &Imm) { 3748 unsigned NumElts = VT.getVectorNumElements(); 3749 ReverseVEXT = false; 3750 3751 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3752 if (M[0] < 0) 3753 return false; 3754 3755 Imm = M[0]; 3756 3757 // If this is a VEXT shuffle, the immediate value is the index of the first 3758 // element. The other shuffle indices must be the successive elements after 3759 // the first one. 3760 unsigned ExpectedElt = Imm; 3761 for (unsigned i = 1; i < NumElts; ++i) { 3762 // Increment the expected index. If it wraps around, it may still be 3763 // a VEXT but the source vectors must be swapped. 3764 ExpectedElt += 1; 3765 if (ExpectedElt == NumElts * 2) { 3766 ExpectedElt = 0; 3767 ReverseVEXT = true; 3768 } 3769 3770 if (M[i] < 0) continue; // ignore UNDEF indices 3771 if (ExpectedElt != static_cast<unsigned>(M[i])) 3772 return false; 3773 } 3774 3775 // Adjust the index value if the source operands will be swapped. 3776 if (ReverseVEXT) 3777 Imm -= NumElts; 3778 3779 return true; 3780} 3781 3782/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3783/// instruction with the specified blocksize. (The order of the elements 3784/// within each block of the vector is reversed.) 3785static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3786 unsigned BlockSize) { 3787 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3788 "Only possible block sizes for VREV are: 16, 32, 64"); 3789 3790 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3791 if (EltSz == 64) 3792 return false; 3793 3794 unsigned NumElts = VT.getVectorNumElements(); 3795 unsigned BlockElts = M[0] + 1; 3796 // If the first shuffle index is UNDEF, be optimistic. 3797 if (M[0] < 0) 3798 BlockElts = BlockSize / EltSz; 3799 3800 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3801 return false; 3802 3803 for (unsigned i = 0; i < NumElts; ++i) { 3804 if (M[i] < 0) continue; // ignore UNDEF indices 3805 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3806 return false; 3807 } 3808 3809 return true; 3810} 3811 3812static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { 3813 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 3814 // range, then 0 is placed into the resulting vector. So pretty much any mask 3815 // of 8 elements can work here. 3816 return VT == MVT::v8i8 && M.size() == 8; 3817} 3818 3819static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3820 unsigned &WhichResult) { 3821 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3822 if (EltSz == 64) 3823 return false; 3824 3825 unsigned NumElts = VT.getVectorNumElements(); 3826 WhichResult = (M[0] == 0 ? 0 : 1); 3827 for (unsigned i = 0; i < NumElts; i += 2) { 3828 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3829 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3830 return false; 3831 } 3832 return true; 3833} 3834 3835/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3836/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3837/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3838static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3839 unsigned &WhichResult) { 3840 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3841 if (EltSz == 64) 3842 return false; 3843 3844 unsigned NumElts = VT.getVectorNumElements(); 3845 WhichResult = (M[0] == 0 ? 0 : 1); 3846 for (unsigned i = 0; i < NumElts; i += 2) { 3847 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3848 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3849 return false; 3850 } 3851 return true; 3852} 3853 3854static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3855 unsigned &WhichResult) { 3856 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3857 if (EltSz == 64) 3858 return false; 3859 3860 unsigned NumElts = VT.getVectorNumElements(); 3861 WhichResult = (M[0] == 0 ? 0 : 1); 3862 for (unsigned i = 0; i != NumElts; ++i) { 3863 if (M[i] < 0) continue; // ignore UNDEF indices 3864 if ((unsigned) M[i] != 2 * i + WhichResult) 3865 return false; 3866 } 3867 3868 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3869 if (VT.is64BitVector() && EltSz == 32) 3870 return false; 3871 3872 return true; 3873} 3874 3875/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3876/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3877/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3878static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3879 unsigned &WhichResult) { 3880 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3881 if (EltSz == 64) 3882 return false; 3883 3884 unsigned Half = VT.getVectorNumElements() / 2; 3885 WhichResult = (M[0] == 0 ? 0 : 1); 3886 for (unsigned j = 0; j != 2; ++j) { 3887 unsigned Idx = WhichResult; 3888 for (unsigned i = 0; i != Half; ++i) { 3889 int MIdx = M[i + j * Half]; 3890 if (MIdx >= 0 && (unsigned) MIdx != Idx) 3891 return false; 3892 Idx += 2; 3893 } 3894 } 3895 3896 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3897 if (VT.is64BitVector() && EltSz == 32) 3898 return false; 3899 3900 return true; 3901} 3902 3903static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3904 unsigned &WhichResult) { 3905 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3906 if (EltSz == 64) 3907 return false; 3908 3909 unsigned NumElts = VT.getVectorNumElements(); 3910 WhichResult = (M[0] == 0 ? 0 : 1); 3911 unsigned Idx = WhichResult * NumElts / 2; 3912 for (unsigned i = 0; i != NumElts; i += 2) { 3913 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3914 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 3915 return false; 3916 Idx += 1; 3917 } 3918 3919 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3920 if (VT.is64BitVector() && EltSz == 32) 3921 return false; 3922 3923 return true; 3924} 3925 3926/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3927/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3928/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3929static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3930 unsigned &WhichResult) { 3931 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3932 if (EltSz == 64) 3933 return false; 3934 3935 unsigned NumElts = VT.getVectorNumElements(); 3936 WhichResult = (M[0] == 0 ? 0 : 1); 3937 unsigned Idx = WhichResult * NumElts / 2; 3938 for (unsigned i = 0; i != NumElts; i += 2) { 3939 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3940 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 3941 return false; 3942 Idx += 1; 3943 } 3944 3945 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3946 if (VT.is64BitVector() && EltSz == 32) 3947 return false; 3948 3949 return true; 3950} 3951 3952// If N is an integer constant that can be moved into a register in one 3953// instruction, return an SDValue of such a constant (will become a MOV 3954// instruction). Otherwise return null. 3955static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3956 const ARMSubtarget *ST, DebugLoc dl) { 3957 uint64_t Val; 3958 if (!isa<ConstantSDNode>(N)) 3959 return SDValue(); 3960 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3961 3962 if (ST->isThumb1Only()) { 3963 if (Val <= 255 || ~Val <= 255) 3964 return DAG.getConstant(Val, MVT::i32); 3965 } else { 3966 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3967 return DAG.getConstant(Val, MVT::i32); 3968 } 3969 return SDValue(); 3970} 3971 3972// If this is a case we can't handle, return null and let the default 3973// expansion code take care of it. 3974SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3975 const ARMSubtarget *ST) const { 3976 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3977 DebugLoc dl = Op.getDebugLoc(); 3978 EVT VT = Op.getValueType(); 3979 3980 APInt SplatBits, SplatUndef; 3981 unsigned SplatBitSize; 3982 bool HasAnyUndefs; 3983 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3984 if (SplatBitSize <= 64) { 3985 // Check if an immediate VMOV works. 3986 EVT VmovVT; 3987 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3988 SplatUndef.getZExtValue(), SplatBitSize, 3989 DAG, VmovVT, VT.is128BitVector(), 3990 VMOVModImm); 3991 if (Val.getNode()) { 3992 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3993 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3994 } 3995 3996 // Try an immediate VMVN. 3997 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 3998 Val = isNEONModifiedImm(NegatedImm, 3999 SplatUndef.getZExtValue(), SplatBitSize, 4000 DAG, VmovVT, VT.is128BitVector(), 4001 VMVNModImm); 4002 if (Val.getNode()) { 4003 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4004 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4005 } 4006 4007 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4008 if (VT == MVT::v2f32 || VT == MVT::v4f32) { 4009 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4010 if (ImmVal != -1) { 4011 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4012 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4013 } 4014 } 4015 } 4016 } 4017 4018 // Scan through the operands to see if only one value is used. 4019 unsigned NumElts = VT.getVectorNumElements(); 4020 bool isOnlyLowElement = true; 4021 bool usesOnlyOneValue = true; 4022 bool isConstant = true; 4023 SDValue Value; 4024 for (unsigned i = 0; i < NumElts; ++i) { 4025 SDValue V = Op.getOperand(i); 4026 if (V.getOpcode() == ISD::UNDEF) 4027 continue; 4028 if (i > 0) 4029 isOnlyLowElement = false; 4030 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4031 isConstant = false; 4032 4033 if (!Value.getNode()) 4034 Value = V; 4035 else if (V != Value) 4036 usesOnlyOneValue = false; 4037 } 4038 4039 if (!Value.getNode()) 4040 return DAG.getUNDEF(VT); 4041 4042 if (isOnlyLowElement) 4043 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4044 4045 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4046 4047 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4048 // i32 and try again. 4049 if (usesOnlyOneValue && EltSize <= 32) { 4050 if (!isConstant) 4051 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4052 if (VT.getVectorElementType().isFloatingPoint()) { 4053 SmallVector<SDValue, 8> Ops; 4054 for (unsigned i = 0; i < NumElts; ++i) 4055 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4056 Op.getOperand(i))); 4057 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4058 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4059 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4060 if (Val.getNode()) 4061 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4062 } 4063 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4064 if (Val.getNode()) 4065 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4066 } 4067 4068 // If all elements are constants and the case above didn't get hit, fall back 4069 // to the default expansion, which will generate a load from the constant 4070 // pool. 4071 if (isConstant) 4072 return SDValue(); 4073 4074 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4075 if (NumElts >= 4) { 4076 SDValue shuffle = ReconstructShuffle(Op, DAG); 4077 if (shuffle != SDValue()) 4078 return shuffle; 4079 } 4080 4081 // Vectors with 32- or 64-bit elements can be built by directly assigning 4082 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4083 // will be legalized. 4084 if (EltSize >= 32) { 4085 // Do the expansion with floating-point types, since that is what the VFP 4086 // registers are defined to use, and since i64 is not legal. 4087 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4088 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4089 SmallVector<SDValue, 8> Ops; 4090 for (unsigned i = 0; i < NumElts; ++i) 4091 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4092 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4093 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4094 } 4095 4096 return SDValue(); 4097} 4098 4099// Gather data to see if the operation can be modelled as a 4100// shuffle in combination with VEXTs. 4101SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4102 SelectionDAG &DAG) const { 4103 DebugLoc dl = Op.getDebugLoc(); 4104 EVT VT = Op.getValueType(); 4105 unsigned NumElts = VT.getVectorNumElements(); 4106 4107 SmallVector<SDValue, 2> SourceVecs; 4108 SmallVector<unsigned, 2> MinElts; 4109 SmallVector<unsigned, 2> MaxElts; 4110 4111 for (unsigned i = 0; i < NumElts; ++i) { 4112 SDValue V = Op.getOperand(i); 4113 if (V.getOpcode() == ISD::UNDEF) 4114 continue; 4115 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 4116 // A shuffle can only come from building a vector from various 4117 // elements of other vectors. 4118 return SDValue(); 4119 } else if (V.getOperand(0).getValueType().getVectorElementType() != 4120 VT.getVectorElementType()) { 4121 // This code doesn't know how to handle shuffles where the vector 4122 // element types do not match (this happens because type legalization 4123 // promotes the return type of EXTRACT_VECTOR_ELT). 4124 // FIXME: It might be appropriate to extend this code to handle 4125 // mismatched types. 4126 return SDValue(); 4127 } 4128 4129 // Record this extraction against the appropriate vector if possible... 4130 SDValue SourceVec = V.getOperand(0); 4131 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4132 bool FoundSource = false; 4133 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4134 if (SourceVecs[j] == SourceVec) { 4135 if (MinElts[j] > EltNo) 4136 MinElts[j] = EltNo; 4137 if (MaxElts[j] < EltNo) 4138 MaxElts[j] = EltNo; 4139 FoundSource = true; 4140 break; 4141 } 4142 } 4143 4144 // Or record a new source if not... 4145 if (!FoundSource) { 4146 SourceVecs.push_back(SourceVec); 4147 MinElts.push_back(EltNo); 4148 MaxElts.push_back(EltNo); 4149 } 4150 } 4151 4152 // Currently only do something sane when at most two source vectors 4153 // involved. 4154 if (SourceVecs.size() > 2) 4155 return SDValue(); 4156 4157 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4158 int VEXTOffsets[2] = {0, 0}; 4159 4160 // This loop extracts the usage patterns of the source vectors 4161 // and prepares appropriate SDValues for a shuffle if possible. 4162 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4163 if (SourceVecs[i].getValueType() == VT) { 4164 // No VEXT necessary 4165 ShuffleSrcs[i] = SourceVecs[i]; 4166 VEXTOffsets[i] = 0; 4167 continue; 4168 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4169 // It probably isn't worth padding out a smaller vector just to 4170 // break it down again in a shuffle. 4171 return SDValue(); 4172 } 4173 4174 // Since only 64-bit and 128-bit vectors are legal on ARM and 4175 // we've eliminated the other cases... 4176 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4177 "unexpected vector sizes in ReconstructShuffle"); 4178 4179 if (MaxElts[i] - MinElts[i] >= NumElts) { 4180 // Span too large for a VEXT to cope 4181 return SDValue(); 4182 } 4183 4184 if (MinElts[i] >= NumElts) { 4185 // The extraction can just take the second half 4186 VEXTOffsets[i] = NumElts; 4187 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4188 SourceVecs[i], 4189 DAG.getIntPtrConstant(NumElts)); 4190 } else if (MaxElts[i] < NumElts) { 4191 // The extraction can just take the first half 4192 VEXTOffsets[i] = 0; 4193 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4194 SourceVecs[i], 4195 DAG.getIntPtrConstant(0)); 4196 } else { 4197 // An actual VEXT is needed 4198 VEXTOffsets[i] = MinElts[i]; 4199 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4200 SourceVecs[i], 4201 DAG.getIntPtrConstant(0)); 4202 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4203 SourceVecs[i], 4204 DAG.getIntPtrConstant(NumElts)); 4205 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4206 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4207 } 4208 } 4209 4210 SmallVector<int, 8> Mask; 4211 4212 for (unsigned i = 0; i < NumElts; ++i) { 4213 SDValue Entry = Op.getOperand(i); 4214 if (Entry.getOpcode() == ISD::UNDEF) { 4215 Mask.push_back(-1); 4216 continue; 4217 } 4218 4219 SDValue ExtractVec = Entry.getOperand(0); 4220 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4221 .getOperand(1))->getSExtValue(); 4222 if (ExtractVec == SourceVecs[0]) { 4223 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4224 } else { 4225 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4226 } 4227 } 4228 4229 // Final check before we try to produce nonsense... 4230 if (isShuffleMaskLegal(Mask, VT)) 4231 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4232 &Mask[0]); 4233 4234 return SDValue(); 4235} 4236 4237/// isShuffleMaskLegal - Targets can use this to indicate that they only 4238/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4239/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4240/// are assumed to be legal. 4241bool 4242ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4243 EVT VT) const { 4244 if (VT.getVectorNumElements() == 4 && 4245 (VT.is128BitVector() || VT.is64BitVector())) { 4246 unsigned PFIndexes[4]; 4247 for (unsigned i = 0; i != 4; ++i) { 4248 if (M[i] < 0) 4249 PFIndexes[i] = 8; 4250 else 4251 PFIndexes[i] = M[i]; 4252 } 4253 4254 // Compute the index in the perfect shuffle table. 4255 unsigned PFTableIndex = 4256 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4257 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4258 unsigned Cost = (PFEntry >> 30); 4259 4260 if (Cost <= 4) 4261 return true; 4262 } 4263 4264 bool ReverseVEXT; 4265 unsigned Imm, WhichResult; 4266 4267 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4268 return (EltSize >= 32 || 4269 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4270 isVREVMask(M, VT, 64) || 4271 isVREVMask(M, VT, 32) || 4272 isVREVMask(M, VT, 16) || 4273 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4274 isVTBLMask(M, VT) || 4275 isVTRNMask(M, VT, WhichResult) || 4276 isVUZPMask(M, VT, WhichResult) || 4277 isVZIPMask(M, VT, WhichResult) || 4278 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4279 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4280 isVZIP_v_undef_Mask(M, VT, WhichResult)); 4281} 4282 4283/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4284/// the specified operations to build the shuffle. 4285static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4286 SDValue RHS, SelectionDAG &DAG, 4287 DebugLoc dl) { 4288 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4289 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4290 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4291 4292 enum { 4293 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4294 OP_VREV, 4295 OP_VDUP0, 4296 OP_VDUP1, 4297 OP_VDUP2, 4298 OP_VDUP3, 4299 OP_VEXT1, 4300 OP_VEXT2, 4301 OP_VEXT3, 4302 OP_VUZPL, // VUZP, left result 4303 OP_VUZPR, // VUZP, right result 4304 OP_VZIPL, // VZIP, left result 4305 OP_VZIPR, // VZIP, right result 4306 OP_VTRNL, // VTRN, left result 4307 OP_VTRNR // VTRN, right result 4308 }; 4309 4310 if (OpNum == OP_COPY) { 4311 if (LHSID == (1*9+2)*9+3) return LHS; 4312 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4313 return RHS; 4314 } 4315 4316 SDValue OpLHS, OpRHS; 4317 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4318 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4319 EVT VT = OpLHS.getValueType(); 4320 4321 switch (OpNum) { 4322 default: llvm_unreachable("Unknown shuffle opcode!"); 4323 case OP_VREV: 4324 // VREV divides the vector in half and swaps within the half. 4325 if (VT.getVectorElementType() == MVT::i32 || 4326 VT.getVectorElementType() == MVT::f32) 4327 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4328 // vrev <4 x i16> -> VREV32 4329 if (VT.getVectorElementType() == MVT::i16) 4330 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 4331 // vrev <4 x i8> -> VREV16 4332 assert(VT.getVectorElementType() == MVT::i8); 4333 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 4334 case OP_VDUP0: 4335 case OP_VDUP1: 4336 case OP_VDUP2: 4337 case OP_VDUP3: 4338 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4339 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4340 case OP_VEXT1: 4341 case OP_VEXT2: 4342 case OP_VEXT3: 4343 return DAG.getNode(ARMISD::VEXT, dl, VT, 4344 OpLHS, OpRHS, 4345 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4346 case OP_VUZPL: 4347 case OP_VUZPR: 4348 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4349 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4350 case OP_VZIPL: 4351 case OP_VZIPR: 4352 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4353 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4354 case OP_VTRNL: 4355 case OP_VTRNR: 4356 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4357 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4358 } 4359} 4360 4361static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4362 SmallVectorImpl<int> &ShuffleMask, 4363 SelectionDAG &DAG) { 4364 // Check to see if we can use the VTBL instruction. 4365 SDValue V1 = Op.getOperand(0); 4366 SDValue V2 = Op.getOperand(1); 4367 DebugLoc DL = Op.getDebugLoc(); 4368 4369 SmallVector<SDValue, 8> VTBLMask; 4370 for (SmallVectorImpl<int>::iterator 4371 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4372 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4373 4374 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4375 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4376 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4377 &VTBLMask[0], 8)); 4378 4379 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4380 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4381 &VTBLMask[0], 8)); 4382} 4383 4384static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4385 SDValue V1 = Op.getOperand(0); 4386 SDValue V2 = Op.getOperand(1); 4387 DebugLoc dl = Op.getDebugLoc(); 4388 EVT VT = Op.getValueType(); 4389 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4390 SmallVector<int, 8> ShuffleMask; 4391 4392 // Convert shuffles that are directly supported on NEON to target-specific 4393 // DAG nodes, instead of keeping them as shuffles and matching them again 4394 // during code selection. This is more efficient and avoids the possibility 4395 // of inconsistencies between legalization and selection. 4396 // FIXME: floating-point vectors should be canonicalized to integer vectors 4397 // of the same time so that they get CSEd properly. 4398 SVN->getMask(ShuffleMask); 4399 4400 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4401 if (EltSize <= 32) { 4402 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4403 int Lane = SVN->getSplatIndex(); 4404 // If this is undef splat, generate it via "just" vdup, if possible. 4405 if (Lane == -1) Lane = 0; 4406 4407 // Test if V1 is a SCALAR_TO_VECTOR. 4408 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4409 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4410 } 4411 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 4412 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 4413 // reaches it). 4414 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 4415 !isa<ConstantSDNode>(V1.getOperand(0))) { 4416 bool IsScalarToVector = true; 4417 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 4418 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 4419 IsScalarToVector = false; 4420 break; 4421 } 4422 if (IsScalarToVector) 4423 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4424 } 4425 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4426 DAG.getConstant(Lane, MVT::i32)); 4427 } 4428 4429 bool ReverseVEXT; 4430 unsigned Imm; 4431 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4432 if (ReverseVEXT) 4433 std::swap(V1, V2); 4434 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4435 DAG.getConstant(Imm, MVT::i32)); 4436 } 4437 4438 if (isVREVMask(ShuffleMask, VT, 64)) 4439 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4440 if (isVREVMask(ShuffleMask, VT, 32)) 4441 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4442 if (isVREVMask(ShuffleMask, VT, 16)) 4443 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4444 4445 // Check for Neon shuffles that modify both input vectors in place. 4446 // If both results are used, i.e., if there are two shuffles with the same 4447 // source operands and with masks corresponding to both results of one of 4448 // these operations, DAG memoization will ensure that a single node is 4449 // used for both shuffles. 4450 unsigned WhichResult; 4451 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4452 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4453 V1, V2).getValue(WhichResult); 4454 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4455 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4456 V1, V2).getValue(WhichResult); 4457 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4458 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4459 V1, V2).getValue(WhichResult); 4460 4461 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4462 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4463 V1, V1).getValue(WhichResult); 4464 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4465 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4466 V1, V1).getValue(WhichResult); 4467 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4468 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4469 V1, V1).getValue(WhichResult); 4470 } 4471 4472 // If the shuffle is not directly supported and it has 4 elements, use 4473 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4474 unsigned NumElts = VT.getVectorNumElements(); 4475 if (NumElts == 4) { 4476 unsigned PFIndexes[4]; 4477 for (unsigned i = 0; i != 4; ++i) { 4478 if (ShuffleMask[i] < 0) 4479 PFIndexes[i] = 8; 4480 else 4481 PFIndexes[i] = ShuffleMask[i]; 4482 } 4483 4484 // Compute the index in the perfect shuffle table. 4485 unsigned PFTableIndex = 4486 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4487 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4488 unsigned Cost = (PFEntry >> 30); 4489 4490 if (Cost <= 4) 4491 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4492 } 4493 4494 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 4495 if (EltSize >= 32) { 4496 // Do the expansion with floating-point types, since that is what the VFP 4497 // registers are defined to use, and since i64 is not legal. 4498 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4499 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4500 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 4501 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 4502 SmallVector<SDValue, 8> Ops; 4503 for (unsigned i = 0; i < NumElts; ++i) { 4504 if (ShuffleMask[i] < 0) 4505 Ops.push_back(DAG.getUNDEF(EltVT)); 4506 else 4507 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 4508 ShuffleMask[i] < (int)NumElts ? V1 : V2, 4509 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 4510 MVT::i32))); 4511 } 4512 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4513 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4514 } 4515 4516 if (VT == MVT::v8i8) { 4517 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 4518 if (NewOp.getNode()) 4519 return NewOp; 4520 } 4521 4522 return SDValue(); 4523} 4524 4525static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4526 // INSERT_VECTOR_ELT is legal only for immediate indexes. 4527 SDValue Lane = Op.getOperand(2); 4528 if (!isa<ConstantSDNode>(Lane)) 4529 return SDValue(); 4530 4531 return Op; 4532} 4533 4534static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4535 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 4536 SDValue Lane = Op.getOperand(1); 4537 if (!isa<ConstantSDNode>(Lane)) 4538 return SDValue(); 4539 4540 SDValue Vec = Op.getOperand(0); 4541 if (Op.getValueType() == MVT::i32 && 4542 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 4543 DebugLoc dl = Op.getDebugLoc(); 4544 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 4545 } 4546 4547 return Op; 4548} 4549 4550static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 4551 // The only time a CONCAT_VECTORS operation can have legal types is when 4552 // two 64-bit vectors are concatenated to a 128-bit vector. 4553 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 4554 "unexpected CONCAT_VECTORS"); 4555 DebugLoc dl = Op.getDebugLoc(); 4556 SDValue Val = DAG.getUNDEF(MVT::v2f64); 4557 SDValue Op0 = Op.getOperand(0); 4558 SDValue Op1 = Op.getOperand(1); 4559 if (Op0.getOpcode() != ISD::UNDEF) 4560 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4561 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 4562 DAG.getIntPtrConstant(0)); 4563 if (Op1.getOpcode() != ISD::UNDEF) 4564 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4565 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 4566 DAG.getIntPtrConstant(1)); 4567 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 4568} 4569 4570/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 4571/// element has been zero/sign-extended, depending on the isSigned parameter, 4572/// from an integer type half its size. 4573static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 4574 bool isSigned) { 4575 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 4576 EVT VT = N->getValueType(0); 4577 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 4578 SDNode *BVN = N->getOperand(0).getNode(); 4579 if (BVN->getValueType(0) != MVT::v4i32 || 4580 BVN->getOpcode() != ISD::BUILD_VECTOR) 4581 return false; 4582 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4583 unsigned HiElt = 1 - LoElt; 4584 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 4585 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 4586 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 4587 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 4588 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 4589 return false; 4590 if (isSigned) { 4591 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 4592 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 4593 return true; 4594 } else { 4595 if (Hi0->isNullValue() && Hi1->isNullValue()) 4596 return true; 4597 } 4598 return false; 4599 } 4600 4601 if (N->getOpcode() != ISD::BUILD_VECTOR) 4602 return false; 4603 4604 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 4605 SDNode *Elt = N->getOperand(i).getNode(); 4606 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 4607 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4608 unsigned HalfSize = EltSize / 2; 4609 if (isSigned) { 4610 if (!isIntN(HalfSize, C->getSExtValue())) 4611 return false; 4612 } else { 4613 if (!isUIntN(HalfSize, C->getZExtValue())) 4614 return false; 4615 } 4616 continue; 4617 } 4618 return false; 4619 } 4620 4621 return true; 4622} 4623 4624/// isSignExtended - Check if a node is a vector value that is sign-extended 4625/// or a constant BUILD_VECTOR with sign-extended elements. 4626static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 4627 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 4628 return true; 4629 if (isExtendedBUILD_VECTOR(N, DAG, true)) 4630 return true; 4631 return false; 4632} 4633 4634/// isZeroExtended - Check if a node is a vector value that is zero-extended 4635/// or a constant BUILD_VECTOR with zero-extended elements. 4636static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 4637 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 4638 return true; 4639 if (isExtendedBUILD_VECTOR(N, DAG, false)) 4640 return true; 4641 return false; 4642} 4643 4644/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending 4645/// load, or BUILD_VECTOR with extended elements, return the unextended value. 4646static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 4647 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 4648 return N->getOperand(0); 4649 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 4650 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 4651 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 4652 LD->isNonTemporal(), LD->isInvariant(), 4653 LD->getAlignment()); 4654 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 4655 // have been legalized as a BITCAST from v4i32. 4656 if (N->getOpcode() == ISD::BITCAST) { 4657 SDNode *BVN = N->getOperand(0).getNode(); 4658 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 4659 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 4660 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4661 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 4662 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 4663 } 4664 // Construct a new BUILD_VECTOR with elements truncated to half the size. 4665 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 4666 EVT VT = N->getValueType(0); 4667 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 4668 unsigned NumElts = VT.getVectorNumElements(); 4669 MVT TruncVT = MVT::getIntegerVT(EltSize); 4670 SmallVector<SDValue, 8> Ops; 4671 for (unsigned i = 0; i != NumElts; ++i) { 4672 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 4673 const APInt &CInt = C->getAPIntValue(); 4674 Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); 4675 } 4676 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 4677 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 4678} 4679 4680static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 4681 unsigned Opcode = N->getOpcode(); 4682 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4683 SDNode *N0 = N->getOperand(0).getNode(); 4684 SDNode *N1 = N->getOperand(1).getNode(); 4685 return N0->hasOneUse() && N1->hasOneUse() && 4686 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 4687 } 4688 return false; 4689} 4690 4691static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 4692 unsigned Opcode = N->getOpcode(); 4693 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4694 SDNode *N0 = N->getOperand(0).getNode(); 4695 SDNode *N1 = N->getOperand(1).getNode(); 4696 return N0->hasOneUse() && N1->hasOneUse() && 4697 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 4698 } 4699 return false; 4700} 4701 4702static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 4703 // Multiplications are only custom-lowered for 128-bit vectors so that 4704 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 4705 EVT VT = Op.getValueType(); 4706 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 4707 SDNode *N0 = Op.getOperand(0).getNode(); 4708 SDNode *N1 = Op.getOperand(1).getNode(); 4709 unsigned NewOpc = 0; 4710 bool isMLA = false; 4711 bool isN0SExt = isSignExtended(N0, DAG); 4712 bool isN1SExt = isSignExtended(N1, DAG); 4713 if (isN0SExt && isN1SExt) 4714 NewOpc = ARMISD::VMULLs; 4715 else { 4716 bool isN0ZExt = isZeroExtended(N0, DAG); 4717 bool isN1ZExt = isZeroExtended(N1, DAG); 4718 if (isN0ZExt && isN1ZExt) 4719 NewOpc = ARMISD::VMULLu; 4720 else if (isN1SExt || isN1ZExt) { 4721 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 4722 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 4723 if (isN1SExt && isAddSubSExt(N0, DAG)) { 4724 NewOpc = ARMISD::VMULLs; 4725 isMLA = true; 4726 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 4727 NewOpc = ARMISD::VMULLu; 4728 isMLA = true; 4729 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 4730 std::swap(N0, N1); 4731 NewOpc = ARMISD::VMULLu; 4732 isMLA = true; 4733 } 4734 } 4735 4736 if (!NewOpc) { 4737 if (VT == MVT::v2i64) 4738 // Fall through to expand this. It is not legal. 4739 return SDValue(); 4740 else 4741 // Other vector multiplications are legal. 4742 return Op; 4743 } 4744 } 4745 4746 // Legalize to a VMULL instruction. 4747 DebugLoc DL = Op.getDebugLoc(); 4748 SDValue Op0; 4749 SDValue Op1 = SkipExtension(N1, DAG); 4750 if (!isMLA) { 4751 Op0 = SkipExtension(N0, DAG); 4752 assert(Op0.getValueType().is64BitVector() && 4753 Op1.getValueType().is64BitVector() && 4754 "unexpected types for extended operands to VMULL"); 4755 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 4756 } 4757 4758 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 4759 // isel lowering to take advantage of no-stall back to back vmul + vmla. 4760 // vmull q0, d4, d6 4761 // vmlal q0, d5, d6 4762 // is faster than 4763 // vaddl q0, d4, d5 4764 // vmovl q1, d6 4765 // vmul q0, q0, q1 4766 SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); 4767 SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); 4768 EVT Op1VT = Op1.getValueType(); 4769 return DAG.getNode(N0->getOpcode(), DL, VT, 4770 DAG.getNode(NewOpc, DL, VT, 4771 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 4772 DAG.getNode(NewOpc, DL, VT, 4773 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 4774} 4775 4776static SDValue 4777LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 4778 // Convert to float 4779 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 4780 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 4781 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 4782 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 4783 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 4784 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 4785 // Get reciprocal estimate. 4786 // float4 recip = vrecpeq_f32(yf); 4787 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4788 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 4789 // Because char has a smaller range than uchar, we can actually get away 4790 // without any newton steps. This requires that we use a weird bias 4791 // of 0xb000, however (again, this has been exhaustively tested). 4792 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 4793 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 4794 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 4795 Y = DAG.getConstant(0xb000, MVT::i32); 4796 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 4797 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 4798 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 4799 // Convert back to short. 4800 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 4801 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 4802 return X; 4803} 4804 4805static SDValue 4806LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 4807 SDValue N2; 4808 // Convert to float. 4809 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 4810 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 4811 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 4812 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 4813 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4814 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4815 4816 // Use reciprocal estimate and one refinement step. 4817 // float4 recip = vrecpeq_f32(yf); 4818 // recip *= vrecpsq_f32(yf, recip); 4819 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4820 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4821 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4822 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4823 N1, N2); 4824 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4825 // Because short has a smaller range than ushort, we can actually get away 4826 // with only a single newton step. This requires that we use a weird bias 4827 // of 89, however (again, this has been exhaustively tested). 4828 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 4829 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4830 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4831 N1 = DAG.getConstant(0x89, MVT::i32); 4832 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4833 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4834 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4835 // Convert back to integer and return. 4836 // return vmovn_s32(vcvt_s32_f32(result)); 4837 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4838 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4839 return N0; 4840} 4841 4842static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 4843 EVT VT = Op.getValueType(); 4844 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4845 "unexpected type for custom-lowering ISD::SDIV"); 4846 4847 DebugLoc dl = Op.getDebugLoc(); 4848 SDValue N0 = Op.getOperand(0); 4849 SDValue N1 = Op.getOperand(1); 4850 SDValue N2, N3; 4851 4852 if (VT == MVT::v8i8) { 4853 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 4854 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 4855 4856 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4857 DAG.getIntPtrConstant(4)); 4858 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4859 DAG.getIntPtrConstant(4)); 4860 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4861 DAG.getIntPtrConstant(0)); 4862 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4863 DAG.getIntPtrConstant(0)); 4864 4865 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 4866 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 4867 4868 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4869 N0 = LowerCONCAT_VECTORS(N0, DAG); 4870 4871 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 4872 return N0; 4873 } 4874 return LowerSDIV_v4i16(N0, N1, dl, DAG); 4875} 4876 4877static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 4878 EVT VT = Op.getValueType(); 4879 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4880 "unexpected type for custom-lowering ISD::UDIV"); 4881 4882 DebugLoc dl = Op.getDebugLoc(); 4883 SDValue N0 = Op.getOperand(0); 4884 SDValue N1 = Op.getOperand(1); 4885 SDValue N2, N3; 4886 4887 if (VT == MVT::v8i8) { 4888 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 4889 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 4890 4891 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4892 DAG.getIntPtrConstant(4)); 4893 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4894 DAG.getIntPtrConstant(4)); 4895 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4896 DAG.getIntPtrConstant(0)); 4897 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4898 DAG.getIntPtrConstant(0)); 4899 4900 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 4901 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 4902 4903 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4904 N0 = LowerCONCAT_VECTORS(N0, DAG); 4905 4906 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 4907 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 4908 N0); 4909 return N0; 4910 } 4911 4912 // v4i16 sdiv ... Convert to float. 4913 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 4914 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 4915 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 4916 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 4917 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4918 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4919 4920 // Use reciprocal estimate and two refinement steps. 4921 // float4 recip = vrecpeq_f32(yf); 4922 // recip *= vrecpsq_f32(yf, recip); 4923 // recip *= vrecpsq_f32(yf, recip); 4924 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4925 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 4926 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4927 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4928 BN1, N2); 4929 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4930 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4931 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4932 BN1, N2); 4933 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4934 // Simply multiplying by the reciprocal estimate can leave us a few ulps 4935 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 4936 // and that it will never cause us to return an answer too large). 4937 // float4 result = as_float4(as_int4(xf*recip) + 2); 4938 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4939 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4940 N1 = DAG.getConstant(2, MVT::i32); 4941 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4942 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4943 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4944 // Convert back to integer and return. 4945 // return vmovn_u32(vcvt_s32_f32(result)); 4946 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4947 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4948 return N0; 4949} 4950 4951static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 4952 EVT VT = Op.getNode()->getValueType(0); 4953 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 4954 4955 unsigned Opc; 4956 bool ExtraOp = false; 4957 switch (Op.getOpcode()) { 4958 default: assert(0 && "Invalid code"); 4959 case ISD::ADDC: Opc = ARMISD::ADDC; break; 4960 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 4961 case ISD::SUBC: Opc = ARMISD::SUBC; break; 4962 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 4963 } 4964 4965 if (!ExtraOp) 4966 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 4967 Op.getOperand(1)); 4968 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 4969 Op.getOperand(1), Op.getOperand(2)); 4970} 4971 4972static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 4973 // Monotonic load/store is legal for all targets 4974 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 4975 return Op; 4976 4977 // Aquire/Release load/store is not legal for targets without a 4978 // dmb or equivalent available. 4979 return SDValue(); 4980} 4981 4982 4983static void 4984ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 4985 SelectionDAG &DAG, unsigned NewOp) { 4986 DebugLoc dl = Node->getDebugLoc(); 4987 assert (Node->getValueType(0) == MVT::i64 && 4988 "Only know how to expand i64 atomics"); 4989 4990 SmallVector<SDValue, 6> Ops; 4991 Ops.push_back(Node->getOperand(0)); // Chain 4992 Ops.push_back(Node->getOperand(1)); // Ptr 4993 // Low part of Val1 4994 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 4995 Node->getOperand(2), DAG.getIntPtrConstant(0))); 4996 // High part of Val1 4997 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 4998 Node->getOperand(2), DAG.getIntPtrConstant(1))); 4999 if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { 5000 // High part of Val1 5001 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5002 Node->getOperand(3), DAG.getIntPtrConstant(0))); 5003 // High part of Val2 5004 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5005 Node->getOperand(3), DAG.getIntPtrConstant(1))); 5006 } 5007 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 5008 SDValue Result = 5009 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, 5010 cast<MemSDNode>(Node)->getMemOperand()); 5011 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 5012 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 5013 Results.push_back(Result.getValue(2)); 5014} 5015 5016SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5017 switch (Op.getOpcode()) { 5018 default: llvm_unreachable("Don't know how to custom lower this!"); 5019 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5020 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5021 case ISD::GlobalAddress: 5022 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 5023 LowerGlobalAddressELF(Op, DAG); 5024 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5025 case ISD::SELECT: return LowerSELECT(Op, DAG); 5026 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5027 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 5028 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 5029 case ISD::VASTART: return LowerVASTART(Op, DAG); 5030 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 5031 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5032 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 5033 case ISD::SINT_TO_FP: 5034 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5035 case ISD::FP_TO_SINT: 5036 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 5037 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5038 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5039 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5040 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 5041 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 5042 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 5043 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 5044 Subtarget); 5045 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 5046 case ISD::SHL: 5047 case ISD::SRL: 5048 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 5049 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 5050 case ISD::SRL_PARTS: 5051 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 5052 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 5053 case ISD::SETCC: return LowerVSETCC(Op, DAG); 5054 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 5055 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5056 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5057 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5058 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 5059 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5060 case ISD::MUL: return LowerMUL(Op, DAG); 5061 case ISD::SDIV: return LowerSDIV(Op, DAG); 5062 case ISD::UDIV: return LowerUDIV(Op, DAG); 5063 case ISD::ADDC: 5064 case ISD::ADDE: 5065 case ISD::SUBC: 5066 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 5067 case ISD::ATOMIC_LOAD: 5068 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 5069 } 5070 return SDValue(); 5071} 5072 5073/// ReplaceNodeResults - Replace the results of node with an illegal result 5074/// type with new values built out of custom code. 5075void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 5076 SmallVectorImpl<SDValue>&Results, 5077 SelectionDAG &DAG) const { 5078 SDValue Res; 5079 switch (N->getOpcode()) { 5080 default: 5081 llvm_unreachable("Don't know how to custom expand this!"); 5082 break; 5083 case ISD::BITCAST: 5084 Res = ExpandBITCAST(N, DAG); 5085 break; 5086 case ISD::SRL: 5087 case ISD::SRA: 5088 Res = Expand64BitShift(N, DAG, Subtarget); 5089 break; 5090 case ISD::ATOMIC_LOAD_ADD: 5091 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); 5092 return; 5093 case ISD::ATOMIC_LOAD_AND: 5094 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); 5095 return; 5096 case ISD::ATOMIC_LOAD_NAND: 5097 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); 5098 return; 5099 case ISD::ATOMIC_LOAD_OR: 5100 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); 5101 return; 5102 case ISD::ATOMIC_LOAD_SUB: 5103 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); 5104 return; 5105 case ISD::ATOMIC_LOAD_XOR: 5106 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); 5107 return; 5108 case ISD::ATOMIC_SWAP: 5109 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); 5110 return; 5111 case ISD::ATOMIC_CMP_SWAP: 5112 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); 5113 return; 5114 } 5115 if (Res.getNode()) 5116 Results.push_back(Res); 5117} 5118 5119//===----------------------------------------------------------------------===// 5120// ARM Scheduler Hooks 5121//===----------------------------------------------------------------------===// 5122 5123MachineBasicBlock * 5124ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 5125 MachineBasicBlock *BB, 5126 unsigned Size) const { 5127 unsigned dest = MI->getOperand(0).getReg(); 5128 unsigned ptr = MI->getOperand(1).getReg(); 5129 unsigned oldval = MI->getOperand(2).getReg(); 5130 unsigned newval = MI->getOperand(3).getReg(); 5131 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5132 DebugLoc dl = MI->getDebugLoc(); 5133 bool isThumb2 = Subtarget->isThumb2(); 5134 5135 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5136 unsigned scratch = 5137 MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass 5138 : ARM::GPRRegisterClass); 5139 5140 if (isThumb2) { 5141 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5142 MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass); 5143 MRI.constrainRegClass(newval, ARM::rGPRRegisterClass); 5144 } 5145 5146 unsigned ldrOpc, strOpc; 5147 switch (Size) { 5148 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5149 case 1: 5150 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5151 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5152 break; 5153 case 2: 5154 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5155 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5156 break; 5157 case 4: 5158 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5159 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5160 break; 5161 } 5162 5163 MachineFunction *MF = BB->getParent(); 5164 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5165 MachineFunction::iterator It = BB; 5166 ++It; // insert the new blocks after the current block 5167 5168 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5169 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5170 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5171 MF->insert(It, loop1MBB); 5172 MF->insert(It, loop2MBB); 5173 MF->insert(It, exitMBB); 5174 5175 // Transfer the remainder of BB and its successor edges to exitMBB. 5176 exitMBB->splice(exitMBB->begin(), BB, 5177 llvm::next(MachineBasicBlock::iterator(MI)), 5178 BB->end()); 5179 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5180 5181 // thisMBB: 5182 // ... 5183 // fallthrough --> loop1MBB 5184 BB->addSuccessor(loop1MBB); 5185 5186 // loop1MBB: 5187 // ldrex dest, [ptr] 5188 // cmp dest, oldval 5189 // bne exitMBB 5190 BB = loop1MBB; 5191 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5192 if (ldrOpc == ARM::t2LDREX) 5193 MIB.addImm(0); 5194 AddDefaultPred(MIB); 5195 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5196 .addReg(dest).addReg(oldval)); 5197 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5198 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5199 BB->addSuccessor(loop2MBB); 5200 BB->addSuccessor(exitMBB); 5201 5202 // loop2MBB: 5203 // strex scratch, newval, [ptr] 5204 // cmp scratch, #0 5205 // bne loop1MBB 5206 BB = loop2MBB; 5207 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 5208 if (strOpc == ARM::t2STREX) 5209 MIB.addImm(0); 5210 AddDefaultPred(MIB); 5211 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5212 .addReg(scratch).addImm(0)); 5213 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5214 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5215 BB->addSuccessor(loop1MBB); 5216 BB->addSuccessor(exitMBB); 5217 5218 // exitMBB: 5219 // ... 5220 BB = exitMBB; 5221 5222 MI->eraseFromParent(); // The instruction is gone now. 5223 5224 return BB; 5225} 5226 5227MachineBasicBlock * 5228ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5229 unsigned Size, unsigned BinOpcode) const { 5230 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5231 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5232 5233 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5234 MachineFunction *MF = BB->getParent(); 5235 MachineFunction::iterator It = BB; 5236 ++It; 5237 5238 unsigned dest = MI->getOperand(0).getReg(); 5239 unsigned ptr = MI->getOperand(1).getReg(); 5240 unsigned incr = MI->getOperand(2).getReg(); 5241 DebugLoc dl = MI->getDebugLoc(); 5242 bool isThumb2 = Subtarget->isThumb2(); 5243 5244 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5245 if (isThumb2) { 5246 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5247 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5248 } 5249 5250 unsigned ldrOpc, strOpc; 5251 switch (Size) { 5252 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5253 case 1: 5254 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5255 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5256 break; 5257 case 2: 5258 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5259 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5260 break; 5261 case 4: 5262 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5263 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5264 break; 5265 } 5266 5267 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5268 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5269 MF->insert(It, loopMBB); 5270 MF->insert(It, exitMBB); 5271 5272 // Transfer the remainder of BB and its successor edges to exitMBB. 5273 exitMBB->splice(exitMBB->begin(), BB, 5274 llvm::next(MachineBasicBlock::iterator(MI)), 5275 BB->end()); 5276 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5277 5278 TargetRegisterClass *TRC = 5279 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5280 unsigned scratch = MRI.createVirtualRegister(TRC); 5281 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 5282 5283 // thisMBB: 5284 // ... 5285 // fallthrough --> loopMBB 5286 BB->addSuccessor(loopMBB); 5287 5288 // loopMBB: 5289 // ldrex dest, ptr 5290 // <binop> scratch2, dest, incr 5291 // strex scratch, scratch2, ptr 5292 // cmp scratch, #0 5293 // bne- loopMBB 5294 // fallthrough --> exitMBB 5295 BB = loopMBB; 5296 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5297 if (ldrOpc == ARM::t2LDREX) 5298 MIB.addImm(0); 5299 AddDefaultPred(MIB); 5300 if (BinOpcode) { 5301 // operand order needs to go the other way for NAND 5302 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 5303 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5304 addReg(incr).addReg(dest)).addReg(0); 5305 else 5306 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5307 addReg(dest).addReg(incr)).addReg(0); 5308 } 5309 5310 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5311 if (strOpc == ARM::t2STREX) 5312 MIB.addImm(0); 5313 AddDefaultPred(MIB); 5314 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5315 .addReg(scratch).addImm(0)); 5316 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5317 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5318 5319 BB->addSuccessor(loopMBB); 5320 BB->addSuccessor(exitMBB); 5321 5322 // exitMBB: 5323 // ... 5324 BB = exitMBB; 5325 5326 MI->eraseFromParent(); // The instruction is gone now. 5327 5328 return BB; 5329} 5330 5331MachineBasicBlock * 5332ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 5333 MachineBasicBlock *BB, 5334 unsigned Size, 5335 bool signExtend, 5336 ARMCC::CondCodes Cond) const { 5337 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5338 5339 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5340 MachineFunction *MF = BB->getParent(); 5341 MachineFunction::iterator It = BB; 5342 ++It; 5343 5344 unsigned dest = MI->getOperand(0).getReg(); 5345 unsigned ptr = MI->getOperand(1).getReg(); 5346 unsigned incr = MI->getOperand(2).getReg(); 5347 unsigned oldval = dest; 5348 DebugLoc dl = MI->getDebugLoc(); 5349 bool isThumb2 = Subtarget->isThumb2(); 5350 5351 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5352 if (isThumb2) { 5353 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5354 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5355 } 5356 5357 unsigned ldrOpc, strOpc, extendOpc; 5358 switch (Size) { 5359 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5360 case 1: 5361 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5362 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5363 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 5364 break; 5365 case 2: 5366 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5367 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5368 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 5369 break; 5370 case 4: 5371 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5372 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5373 extendOpc = 0; 5374 break; 5375 } 5376 5377 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5378 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5379 MF->insert(It, loopMBB); 5380 MF->insert(It, exitMBB); 5381 5382 // Transfer the remainder of BB and its successor edges to exitMBB. 5383 exitMBB->splice(exitMBB->begin(), BB, 5384 llvm::next(MachineBasicBlock::iterator(MI)), 5385 BB->end()); 5386 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5387 5388 TargetRegisterClass *TRC = 5389 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5390 unsigned scratch = MRI.createVirtualRegister(TRC); 5391 unsigned scratch2 = MRI.createVirtualRegister(TRC); 5392 5393 // thisMBB: 5394 // ... 5395 // fallthrough --> loopMBB 5396 BB->addSuccessor(loopMBB); 5397 5398 // loopMBB: 5399 // ldrex dest, ptr 5400 // (sign extend dest, if required) 5401 // cmp dest, incr 5402 // cmov.cond scratch2, dest, incr 5403 // strex scratch, scratch2, ptr 5404 // cmp scratch, #0 5405 // bne- loopMBB 5406 // fallthrough --> exitMBB 5407 BB = loopMBB; 5408 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5409 if (ldrOpc == ARM::t2LDREX) 5410 MIB.addImm(0); 5411 AddDefaultPred(MIB); 5412 5413 // Sign extend the value, if necessary. 5414 if (signExtend && extendOpc) { 5415 oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); 5416 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 5417 .addReg(dest) 5418 .addImm(0)); 5419 } 5420 5421 // Build compare and cmov instructions. 5422 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5423 .addReg(oldval).addReg(incr)); 5424 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 5425 .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); 5426 5427 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5428 if (strOpc == ARM::t2STREX) 5429 MIB.addImm(0); 5430 AddDefaultPred(MIB); 5431 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5432 .addReg(scratch).addImm(0)); 5433 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5434 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5435 5436 BB->addSuccessor(loopMBB); 5437 BB->addSuccessor(exitMBB); 5438 5439 // exitMBB: 5440 // ... 5441 BB = exitMBB; 5442 5443 MI->eraseFromParent(); // The instruction is gone now. 5444 5445 return BB; 5446} 5447 5448MachineBasicBlock * 5449ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 5450 unsigned Op1, unsigned Op2, 5451 bool NeedsCarry, bool IsCmpxchg) const { 5452 // This also handles ATOMIC_SWAP, indicated by Op1==0. 5453 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5454 5455 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5456 MachineFunction *MF = BB->getParent(); 5457 MachineFunction::iterator It = BB; 5458 ++It; 5459 5460 unsigned destlo = MI->getOperand(0).getReg(); 5461 unsigned desthi = MI->getOperand(1).getReg(); 5462 unsigned ptr = MI->getOperand(2).getReg(); 5463 unsigned vallo = MI->getOperand(3).getReg(); 5464 unsigned valhi = MI->getOperand(4).getReg(); 5465 DebugLoc dl = MI->getDebugLoc(); 5466 bool isThumb2 = Subtarget->isThumb2(); 5467 5468 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5469 if (isThumb2) { 5470 MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); 5471 MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); 5472 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5473 } 5474 5475 unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; 5476 unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; 5477 5478 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5479 MachineBasicBlock *contBB = 0, *cont2BB = 0; 5480 if (IsCmpxchg) { 5481 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 5482 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 5483 } 5484 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5485 MF->insert(It, loopMBB); 5486 if (IsCmpxchg) { 5487 MF->insert(It, contBB); 5488 MF->insert(It, cont2BB); 5489 } 5490 MF->insert(It, exitMBB); 5491 5492 // Transfer the remainder of BB and its successor edges to exitMBB. 5493 exitMBB->splice(exitMBB->begin(), BB, 5494 llvm::next(MachineBasicBlock::iterator(MI)), 5495 BB->end()); 5496 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5497 5498 TargetRegisterClass *TRC = 5499 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5500 unsigned storesuccess = MRI.createVirtualRegister(TRC); 5501 5502 // thisMBB: 5503 // ... 5504 // fallthrough --> loopMBB 5505 BB->addSuccessor(loopMBB); 5506 5507 // loopMBB: 5508 // ldrexd r2, r3, ptr 5509 // <binopa> r0, r2, incr 5510 // <binopb> r1, r3, incr 5511 // strexd storesuccess, r0, r1, ptr 5512 // cmp storesuccess, #0 5513 // bne- loopMBB 5514 // fallthrough --> exitMBB 5515 // 5516 // Note that the registers are explicitly specified because there is not any 5517 // way to force the register allocator to allocate a register pair. 5518 // 5519 // FIXME: The hardcoded registers are not necessary for Thumb2, but we 5520 // need to properly enforce the restriction that the two output registers 5521 // for ldrexd must be different. 5522 BB = loopMBB; 5523 // Load 5524 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 5525 .addReg(ARM::R2, RegState::Define) 5526 .addReg(ARM::R3, RegState::Define).addReg(ptr)); 5527 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 5528 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); 5529 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); 5530 5531 if (IsCmpxchg) { 5532 // Add early exit 5533 for (unsigned i = 0; i < 2; i++) { 5534 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 5535 ARM::CMPrr)) 5536 .addReg(i == 0 ? destlo : desthi) 5537 .addReg(i == 0 ? vallo : valhi)); 5538 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5539 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5540 BB->addSuccessor(exitMBB); 5541 BB->addSuccessor(i == 0 ? contBB : cont2BB); 5542 BB = (i == 0 ? contBB : cont2BB); 5543 } 5544 5545 // Copy to physregs for strexd 5546 unsigned setlo = MI->getOperand(5).getReg(); 5547 unsigned sethi = MI->getOperand(6).getReg(); 5548 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); 5549 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); 5550 } else if (Op1) { 5551 // Perform binary operation 5552 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) 5553 .addReg(destlo).addReg(vallo)) 5554 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 5555 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) 5556 .addReg(desthi).addReg(valhi)).addReg(0); 5557 } else { 5558 // Copy to physregs for strexd 5559 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); 5560 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); 5561 } 5562 5563 // Store 5564 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 5565 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); 5566 // Cmp+jump 5567 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5568 .addReg(storesuccess).addImm(0)); 5569 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5570 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5571 5572 BB->addSuccessor(loopMBB); 5573 BB->addSuccessor(exitMBB); 5574 5575 // exitMBB: 5576 // ... 5577 BB = exitMBB; 5578 5579 MI->eraseFromParent(); // The instruction is gone now. 5580 5581 return BB; 5582} 5583 5584/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 5585/// registers the function context. 5586void ARMTargetLowering:: 5587SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 5588 MachineBasicBlock *DispatchBB, int FI) const { 5589 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5590 DebugLoc dl = MI->getDebugLoc(); 5591 MachineFunction *MF = MBB->getParent(); 5592 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5593 MachineConstantPool *MCP = MF->getConstantPool(); 5594 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 5595 const Function *F = MF->getFunction(); 5596 5597 bool isThumb = Subtarget->isThumb(); 5598 bool isThumb2 = Subtarget->isThumb2(); 5599 5600 unsigned PCLabelId = AFI->createPICLabelUId(); 5601 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 5602 ARMConstantPoolValue *CPV = 5603 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 5604 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 5605 5606 const TargetRegisterClass *TRC = 5607 isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5608 5609 // Grab constant pool and fixed stack memory operands. 5610 MachineMemOperand *CPMMO = 5611 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 5612 MachineMemOperand::MOLoad, 4, 4); 5613 5614 MachineMemOperand *FIMMOSt = 5615 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 5616 MachineMemOperand::MOStore, 4, 4); 5617 5618 // Load the address of the dispatch MBB into the jump buffer. 5619 if (isThumb2) { 5620 // Incoming value: jbuf 5621 // ldr.n r5, LCPI1_1 5622 // orr r5, r5, #1 5623 // add r5, pc 5624 // str r5, [$jbuf, #+4] ; &jbuf[1] 5625 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5626 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 5627 .addConstantPoolIndex(CPI) 5628 .addMemOperand(CPMMO)); 5629 // Set the low bit because of thumb mode. 5630 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5631 AddDefaultCC( 5632 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 5633 .addReg(NewVReg1, RegState::Kill) 5634 .addImm(0x01))); 5635 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5636 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 5637 .addReg(NewVReg2, RegState::Kill) 5638 .addImm(PCLabelId); 5639 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 5640 .addReg(NewVReg3, RegState::Kill) 5641 .addFrameIndex(FI) 5642 .addImm(36) // &jbuf[1] :: pc 5643 .addMemOperand(FIMMOSt)); 5644 } else if (isThumb) { 5645 // Incoming value: jbuf 5646 // ldr.n r1, LCPI1_4 5647 // add r1, pc 5648 // mov r2, #1 5649 // orrs r1, r2 5650 // add r2, $jbuf, #+4 ; &jbuf[1] 5651 // str r1, [r2] 5652 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5653 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 5654 .addConstantPoolIndex(CPI) 5655 .addMemOperand(CPMMO)); 5656 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5657 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 5658 .addReg(NewVReg1, RegState::Kill) 5659 .addImm(PCLabelId); 5660 // Set the low bit because of thumb mode. 5661 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5662 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 5663 .addReg(ARM::CPSR, RegState::Define) 5664 .addImm(1)); 5665 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5666 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 5667 .addReg(ARM::CPSR, RegState::Define) 5668 .addReg(NewVReg2, RegState::Kill) 5669 .addReg(NewVReg3, RegState::Kill)); 5670 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 5671 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 5672 .addFrameIndex(FI) 5673 .addImm(36)); // &jbuf[1] :: pc 5674 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 5675 .addReg(NewVReg4, RegState::Kill) 5676 .addReg(NewVReg5, RegState::Kill) 5677 .addImm(0) 5678 .addMemOperand(FIMMOSt)); 5679 } else { 5680 // Incoming value: jbuf 5681 // ldr r1, LCPI1_1 5682 // add r1, pc, r1 5683 // str r1, [$jbuf, #+4] ; &jbuf[1] 5684 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5685 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 5686 .addConstantPoolIndex(CPI) 5687 .addImm(0) 5688 .addMemOperand(CPMMO)); 5689 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5690 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 5691 .addReg(NewVReg1, RegState::Kill) 5692 .addImm(PCLabelId)); 5693 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 5694 .addReg(NewVReg2, RegState::Kill) 5695 .addFrameIndex(FI) 5696 .addImm(36) // &jbuf[1] :: pc 5697 .addMemOperand(FIMMOSt)); 5698 } 5699} 5700 5701MachineBasicBlock *ARMTargetLowering:: 5702EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 5703 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5704 DebugLoc dl = MI->getDebugLoc(); 5705 MachineFunction *MF = MBB->getParent(); 5706 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5707 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 5708 MachineFrameInfo *MFI = MF->getFrameInfo(); 5709 int FI = MFI->getFunctionContextIndex(); 5710 5711 const TargetRegisterClass *TRC = 5712 Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5713 5714 // Get a mapping of the call site numbers to all of the landing pads they're 5715 // associated with. 5716 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 5717 unsigned MaxCSNum = 0; 5718 MachineModuleInfo &MMI = MF->getMMI(); 5719 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { 5720 if (!BB->isLandingPad()) continue; 5721 5722 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 5723 // pad. 5724 for (MachineBasicBlock::iterator 5725 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 5726 if (!II->isEHLabel()) continue; 5727 5728 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 5729 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 5730 5731 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 5732 for (SmallVectorImpl<unsigned>::iterator 5733 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 5734 CSI != CSE; ++CSI) { 5735 CallSiteNumToLPad[*CSI].push_back(BB); 5736 MaxCSNum = std::max(MaxCSNum, *CSI); 5737 } 5738 break; 5739 } 5740 } 5741 5742 // Get an ordered list of the machine basic blocks for the jump table. 5743 std::vector<MachineBasicBlock*> LPadList; 5744 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 5745 LPadList.reserve(CallSiteNumToLPad.size()); 5746 for (unsigned I = 1; I <= MaxCSNum; ++I) { 5747 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 5748 for (SmallVectorImpl<MachineBasicBlock*>::iterator 5749 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 5750 LPadList.push_back(*II); 5751 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 5752 } 5753 } 5754 5755 assert(!LPadList.empty() && 5756 "No landing pad destinations for the dispatch jump table!"); 5757 5758 // Create the jump table and associated information. 5759 MachineJumpTableInfo *JTI = 5760 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 5761 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 5762 unsigned UId = AFI->createJumpTableUId(); 5763 5764 // Create the MBBs for the dispatch code. 5765 5766 // Shove the dispatch's address into the return slot in the function context. 5767 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 5768 DispatchBB->setIsLandingPad(); 5769 5770 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 5771 BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); 5772 DispatchBB->addSuccessor(TrapBB); 5773 5774 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 5775 DispatchBB->addSuccessor(DispContBB); 5776 5777 // Insert and MBBs. 5778 MF->insert(MF->end(), DispatchBB); 5779 MF->insert(MF->end(), DispContBB); 5780 MF->insert(MF->end(), TrapBB); 5781 5782 // Insert code into the entry block that creates and registers the function 5783 // context. 5784 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 5785 5786 MachineMemOperand *FIMMOLd = 5787 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 5788 MachineMemOperand::MOLoad | 5789 MachineMemOperand::MOVolatile, 4, 4); 5790 5791 BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); 5792 5793 unsigned NumLPads = LPadList.size(); 5794 if (Subtarget->isThumb2()) { 5795 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5796 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 5797 .addFrameIndex(FI) 5798 .addImm(4) 5799 .addMemOperand(FIMMOLd)); 5800 5801 if (NumLPads < 256) { 5802 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 5803 .addReg(NewVReg1) 5804 .addImm(LPadList.size())); 5805 } else { 5806 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5807 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 5808 .addImm(NumLPads & 0xFFFF)); 5809 5810 unsigned VReg2 = VReg1; 5811 if ((NumLPads & 0xFFFF0000) != 0) { 5812 VReg2 = MRI->createVirtualRegister(TRC); 5813 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 5814 .addReg(VReg1) 5815 .addImm(NumLPads >> 16)); 5816 } 5817 5818 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 5819 .addReg(NewVReg1) 5820 .addReg(VReg2)); 5821 } 5822 5823 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 5824 .addMBB(TrapBB) 5825 .addImm(ARMCC::HI) 5826 .addReg(ARM::CPSR); 5827 5828 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5829 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 5830 .addJumpTableIndex(MJTI) 5831 .addImm(UId)); 5832 5833 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5834 AddDefaultCC( 5835 AddDefaultPred( 5836 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 5837 .addReg(NewVReg3, RegState::Kill) 5838 .addReg(NewVReg1) 5839 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 5840 5841 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 5842 .addReg(NewVReg4, RegState::Kill) 5843 .addReg(NewVReg1) 5844 .addJumpTableIndex(MJTI) 5845 .addImm(UId); 5846 } else if (Subtarget->isThumb()) { 5847 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5848 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 5849 .addFrameIndex(FI) 5850 .addImm(1) 5851 .addMemOperand(FIMMOLd)); 5852 5853 if (NumLPads < 256) { 5854 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 5855 .addReg(NewVReg1) 5856 .addImm(NumLPads)); 5857 } else { 5858 MachineConstantPool *ConstantPool = MF->getConstantPool(); 5859 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 5860 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 5861 5862 // MachineConstantPool wants an explicit alignment. 5863 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 5864 if (Align == 0) 5865 Align = getTargetData()->getTypeAllocSize(C->getType()); 5866 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 5867 5868 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5869 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 5870 .addReg(VReg1, RegState::Define) 5871 .addConstantPoolIndex(Idx)); 5872 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 5873 .addReg(NewVReg1) 5874 .addReg(VReg1)); 5875 } 5876 5877 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 5878 .addMBB(TrapBB) 5879 .addImm(ARMCC::HI) 5880 .addReg(ARM::CPSR); 5881 5882 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5883 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 5884 .addReg(ARM::CPSR, RegState::Define) 5885 .addReg(NewVReg1) 5886 .addImm(2)); 5887 5888 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5889 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 5890 .addJumpTableIndex(MJTI) 5891 .addImm(UId)); 5892 5893 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5894 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 5895 .addReg(ARM::CPSR, RegState::Define) 5896 .addReg(NewVReg2, RegState::Kill) 5897 .addReg(NewVReg3)); 5898 5899 MachineMemOperand *JTMMOLd = 5900 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 5901 MachineMemOperand::MOLoad, 4, 4); 5902 5903 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 5904 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 5905 .addReg(NewVReg4, RegState::Kill) 5906 .addImm(0) 5907 .addMemOperand(JTMMOLd)); 5908 5909 unsigned NewVReg6 = MRI->createVirtualRegister(TRC); 5910 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 5911 .addReg(ARM::CPSR, RegState::Define) 5912 .addReg(NewVReg5, RegState::Kill) 5913 .addReg(NewVReg3)); 5914 5915 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 5916 .addReg(NewVReg6, RegState::Kill) 5917 .addJumpTableIndex(MJTI) 5918 .addImm(UId); 5919 } else { 5920 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5921 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 5922 .addFrameIndex(FI) 5923 .addImm(4) 5924 .addMemOperand(FIMMOLd)); 5925 5926 if (NumLPads < 256) { 5927 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 5928 .addReg(NewVReg1) 5929 .addImm(NumLPads)); 5930 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 5931 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5932 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 5933 .addImm(NumLPads & 0xFFFF)); 5934 5935 unsigned VReg2 = VReg1; 5936 if ((NumLPads & 0xFFFF0000) != 0) { 5937 VReg2 = MRI->createVirtualRegister(TRC); 5938 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 5939 .addReg(VReg1) 5940 .addImm(NumLPads >> 16)); 5941 } 5942 5943 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 5944 .addReg(NewVReg1) 5945 .addReg(VReg2)); 5946 } else { 5947 MachineConstantPool *ConstantPool = MF->getConstantPool(); 5948 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 5949 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 5950 5951 // MachineConstantPool wants an explicit alignment. 5952 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 5953 if (Align == 0) 5954 Align = getTargetData()->getTypeAllocSize(C->getType()); 5955 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 5956 5957 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5958 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 5959 .addReg(VReg1, RegState::Define) 5960 .addConstantPoolIndex(Idx) 5961 .addImm(0)); 5962 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 5963 .addReg(NewVReg1) 5964 .addReg(VReg1, RegState::Kill)); 5965 } 5966 5967 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 5968 .addMBB(TrapBB) 5969 .addImm(ARMCC::HI) 5970 .addReg(ARM::CPSR); 5971 5972 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5973 AddDefaultCC( 5974 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 5975 .addReg(NewVReg1) 5976 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 5977 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5978 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 5979 .addJumpTableIndex(MJTI) 5980 .addImm(UId)); 5981 5982 MachineMemOperand *JTMMOLd = 5983 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 5984 MachineMemOperand::MOLoad, 4, 4); 5985 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 5986 AddDefaultPred( 5987 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 5988 .addReg(NewVReg3, RegState::Kill) 5989 .addReg(NewVReg4) 5990 .addImm(0) 5991 .addMemOperand(JTMMOLd)); 5992 5993 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 5994 .addReg(NewVReg5, RegState::Kill) 5995 .addReg(NewVReg4) 5996 .addJumpTableIndex(MJTI) 5997 .addImm(UId); 5998 } 5999 6000 // Add the jump table entries as successors to the MBB. 6001 MachineBasicBlock *PrevMBB = 0; 6002 for (std::vector<MachineBasicBlock*>::iterator 6003 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 6004 MachineBasicBlock *CurMBB = *I; 6005 if (PrevMBB != CurMBB) 6006 DispContBB->addSuccessor(CurMBB); 6007 PrevMBB = CurMBB; 6008 } 6009 6010 // N.B. the order the invoke BBs are processed in doesn't matter here. 6011 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6012 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6013 const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); 6014 SmallVector<MachineBasicBlock*, 64> MBBLPads; 6015 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 6016 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 6017 MachineBasicBlock *BB = *I; 6018 6019 // Remove the landing pad successor from the invoke block and replace it 6020 // with the new dispatch block. 6021 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 6022 BB->succ_end()); 6023 while (!Successors.empty()) { 6024 MachineBasicBlock *SMBB = Successors.pop_back_val(); 6025 if (SMBB->isLandingPad()) { 6026 BB->removeSuccessor(SMBB); 6027 MBBLPads.push_back(SMBB); 6028 } 6029 } 6030 6031 BB->addSuccessor(DispatchBB); 6032 6033 // Find the invoke call and mark all of the callee-saved registers as 6034 // 'implicit defined' so that they're spilled. This prevents code from 6035 // moving instructions to before the EH block, where they will never be 6036 // executed. 6037 for (MachineBasicBlock::reverse_iterator 6038 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 6039 if (!II->isCall()) continue; 6040 6041 DenseMap<unsigned, bool> DefRegs; 6042 for (MachineInstr::mop_iterator 6043 OI = II->operands_begin(), OE = II->operands_end(); 6044 OI != OE; ++OI) { 6045 if (!OI->isReg()) continue; 6046 DefRegs[OI->getReg()] = true; 6047 } 6048 6049 MachineInstrBuilder MIB(&*II); 6050 6051 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 6052 unsigned Reg = SavedRegs[i]; 6053 if (Subtarget->isThumb2() && 6054 !ARM::tGPRRegisterClass->contains(Reg) && 6055 !ARM::hGPRRegisterClass->contains(Reg)) 6056 continue; 6057 else if (Subtarget->isThumb1Only() && 6058 !ARM::tGPRRegisterClass->contains(Reg)) 6059 continue; 6060 else if (!Subtarget->isThumb() && 6061 !ARM::GPRRegisterClass->contains(Reg)) 6062 continue; 6063 if (!DefRegs[Reg]) 6064 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 6065 } 6066 6067 break; 6068 } 6069 } 6070 6071 // Mark all former landing pads as non-landing pads. The dispatch is the only 6072 // landing pad now. 6073 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6074 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 6075 (*I)->setIsLandingPad(false); 6076 6077 // The instruction is gone now. 6078 MI->eraseFromParent(); 6079 6080 return MBB; 6081} 6082 6083static 6084MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 6085 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 6086 E = MBB->succ_end(); I != E; ++I) 6087 if (*I != Succ) 6088 return *I; 6089 llvm_unreachable("Expecting a BB with two successors!"); 6090} 6091 6092MachineBasicBlock * 6093ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6094 MachineBasicBlock *BB) const { 6095 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6096 DebugLoc dl = MI->getDebugLoc(); 6097 bool isThumb2 = Subtarget->isThumb2(); 6098 switch (MI->getOpcode()) { 6099 default: { 6100 MI->dump(); 6101 llvm_unreachable("Unexpected instr type to insert"); 6102 } 6103 // The Thumb2 pre-indexed stores have the same MI operands, they just 6104 // define them differently in the .td files from the isel patterns, so 6105 // they need pseudos. 6106 case ARM::t2STR_preidx: 6107 MI->setDesc(TII->get(ARM::t2STR_PRE)); 6108 return BB; 6109 case ARM::t2STRB_preidx: 6110 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 6111 return BB; 6112 case ARM::t2STRH_preidx: 6113 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 6114 return BB; 6115 6116 case ARM::STRi_preidx: 6117 case ARM::STRBi_preidx: { 6118 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 6119 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 6120 // Decode the offset. 6121 unsigned Offset = MI->getOperand(4).getImm(); 6122 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 6123 Offset = ARM_AM::getAM2Offset(Offset); 6124 if (isSub) 6125 Offset = -Offset; 6126 6127 MachineMemOperand *MMO = *MI->memoperands_begin(); 6128 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 6129 .addOperand(MI->getOperand(0)) // Rn_wb 6130 .addOperand(MI->getOperand(1)) // Rt 6131 .addOperand(MI->getOperand(2)) // Rn 6132 .addImm(Offset) // offset (skip GPR==zero_reg) 6133 .addOperand(MI->getOperand(5)) // pred 6134 .addOperand(MI->getOperand(6)) 6135 .addMemOperand(MMO); 6136 MI->eraseFromParent(); 6137 return BB; 6138 } 6139 case ARM::STRr_preidx: 6140 case ARM::STRBr_preidx: 6141 case ARM::STRH_preidx: { 6142 unsigned NewOpc; 6143 switch (MI->getOpcode()) { 6144 default: llvm_unreachable("unexpected opcode!"); 6145 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 6146 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 6147 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 6148 } 6149 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 6150 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 6151 MIB.addOperand(MI->getOperand(i)); 6152 MI->eraseFromParent(); 6153 return BB; 6154 } 6155 case ARM::ATOMIC_LOAD_ADD_I8: 6156 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6157 case ARM::ATOMIC_LOAD_ADD_I16: 6158 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6159 case ARM::ATOMIC_LOAD_ADD_I32: 6160 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6161 6162 case ARM::ATOMIC_LOAD_AND_I8: 6163 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6164 case ARM::ATOMIC_LOAD_AND_I16: 6165 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6166 case ARM::ATOMIC_LOAD_AND_I32: 6167 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6168 6169 case ARM::ATOMIC_LOAD_OR_I8: 6170 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6171 case ARM::ATOMIC_LOAD_OR_I16: 6172 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6173 case ARM::ATOMIC_LOAD_OR_I32: 6174 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6175 6176 case ARM::ATOMIC_LOAD_XOR_I8: 6177 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6178 case ARM::ATOMIC_LOAD_XOR_I16: 6179 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6180 case ARM::ATOMIC_LOAD_XOR_I32: 6181 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6182 6183 case ARM::ATOMIC_LOAD_NAND_I8: 6184 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6185 case ARM::ATOMIC_LOAD_NAND_I16: 6186 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6187 case ARM::ATOMIC_LOAD_NAND_I32: 6188 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6189 6190 case ARM::ATOMIC_LOAD_SUB_I8: 6191 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6192 case ARM::ATOMIC_LOAD_SUB_I16: 6193 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6194 case ARM::ATOMIC_LOAD_SUB_I32: 6195 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6196 6197 case ARM::ATOMIC_LOAD_MIN_I8: 6198 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 6199 case ARM::ATOMIC_LOAD_MIN_I16: 6200 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 6201 case ARM::ATOMIC_LOAD_MIN_I32: 6202 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 6203 6204 case ARM::ATOMIC_LOAD_MAX_I8: 6205 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 6206 case ARM::ATOMIC_LOAD_MAX_I16: 6207 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 6208 case ARM::ATOMIC_LOAD_MAX_I32: 6209 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 6210 6211 case ARM::ATOMIC_LOAD_UMIN_I8: 6212 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 6213 case ARM::ATOMIC_LOAD_UMIN_I16: 6214 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 6215 case ARM::ATOMIC_LOAD_UMIN_I32: 6216 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 6217 6218 case ARM::ATOMIC_LOAD_UMAX_I8: 6219 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 6220 case ARM::ATOMIC_LOAD_UMAX_I16: 6221 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 6222 case ARM::ATOMIC_LOAD_UMAX_I32: 6223 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 6224 6225 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 6226 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 6227 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 6228 6229 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 6230 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 6231 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 6232 6233 6234 case ARM::ATOMADD6432: 6235 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 6236 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 6237 /*NeedsCarry*/ true); 6238 case ARM::ATOMSUB6432: 6239 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 6240 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 6241 /*NeedsCarry*/ true); 6242 case ARM::ATOMOR6432: 6243 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 6244 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6245 case ARM::ATOMXOR6432: 6246 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 6247 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6248 case ARM::ATOMAND6432: 6249 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 6250 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6251 case ARM::ATOMSWAP6432: 6252 return EmitAtomicBinary64(MI, BB, 0, 0, false); 6253 case ARM::ATOMCMPXCHG6432: 6254 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 6255 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 6256 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 6257 6258 case ARM::tMOVCCr_pseudo: { 6259 // To "insert" a SELECT_CC instruction, we actually have to insert the 6260 // diamond control-flow pattern. The incoming instruction knows the 6261 // destination vreg to set, the condition code register to branch on, the 6262 // true/false values to select between, and a branch opcode to use. 6263 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6264 MachineFunction::iterator It = BB; 6265 ++It; 6266 6267 // thisMBB: 6268 // ... 6269 // TrueVal = ... 6270 // cmpTY ccX, r1, r2 6271 // bCC copy1MBB 6272 // fallthrough --> copy0MBB 6273 MachineBasicBlock *thisMBB = BB; 6274 MachineFunction *F = BB->getParent(); 6275 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6276 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6277 F->insert(It, copy0MBB); 6278 F->insert(It, sinkMBB); 6279 6280 // Transfer the remainder of BB and its successor edges to sinkMBB. 6281 sinkMBB->splice(sinkMBB->begin(), BB, 6282 llvm::next(MachineBasicBlock::iterator(MI)), 6283 BB->end()); 6284 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6285 6286 BB->addSuccessor(copy0MBB); 6287 BB->addSuccessor(sinkMBB); 6288 6289 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 6290 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 6291 6292 // copy0MBB: 6293 // %FalseValue = ... 6294 // # fallthrough to sinkMBB 6295 BB = copy0MBB; 6296 6297 // Update machine-CFG edges 6298 BB->addSuccessor(sinkMBB); 6299 6300 // sinkMBB: 6301 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6302 // ... 6303 BB = sinkMBB; 6304 BuildMI(*BB, BB->begin(), dl, 6305 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 6306 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 6307 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6308 6309 MI->eraseFromParent(); // The pseudo instruction is gone now. 6310 return BB; 6311 } 6312 6313 case ARM::BCCi64: 6314 case ARM::BCCZi64: { 6315 // If there is an unconditional branch to the other successor, remove it. 6316 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 6317 6318 // Compare both parts that make up the double comparison separately for 6319 // equality. 6320 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 6321 6322 unsigned LHS1 = MI->getOperand(1).getReg(); 6323 unsigned LHS2 = MI->getOperand(2).getReg(); 6324 if (RHSisZero) { 6325 AddDefaultPred(BuildMI(BB, dl, 6326 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6327 .addReg(LHS1).addImm(0)); 6328 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6329 .addReg(LHS2).addImm(0) 6330 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 6331 } else { 6332 unsigned RHS1 = MI->getOperand(3).getReg(); 6333 unsigned RHS2 = MI->getOperand(4).getReg(); 6334 AddDefaultPred(BuildMI(BB, dl, 6335 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6336 .addReg(LHS1).addReg(RHS1)); 6337 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6338 .addReg(LHS2).addReg(RHS2) 6339 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 6340 } 6341 6342 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 6343 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 6344 if (MI->getOperand(0).getImm() == ARMCC::NE) 6345 std::swap(destMBB, exitMBB); 6346 6347 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6348 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 6349 if (isThumb2) 6350 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 6351 else 6352 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 6353 6354 MI->eraseFromParent(); // The pseudo instruction is gone now. 6355 return BB; 6356 } 6357 6358 case ARM::Int_eh_sjlj_setjmp: 6359 case ARM::Int_eh_sjlj_setjmp_nofp: 6360 case ARM::tInt_eh_sjlj_setjmp: 6361 case ARM::t2Int_eh_sjlj_setjmp: 6362 case ARM::t2Int_eh_sjlj_setjmp_nofp: 6363 EmitSjLjDispatchBlock(MI, BB); 6364 return BB; 6365 6366 case ARM::ABS: 6367 case ARM::t2ABS: { 6368 // To insert an ABS instruction, we have to insert the 6369 // diamond control-flow pattern. The incoming instruction knows the 6370 // source vreg to test against 0, the destination vreg to set, 6371 // the condition code register to branch on, the 6372 // true/false values to select between, and a branch opcode to use. 6373 // It transforms 6374 // V1 = ABS V0 6375 // into 6376 // V2 = MOVS V0 6377 // BCC (branch to SinkBB if V0 >= 0) 6378 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 6379 // SinkBB: V1 = PHI(V2, V3) 6380 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6381 MachineFunction::iterator BBI = BB; 6382 ++BBI; 6383 MachineFunction *Fn = BB->getParent(); 6384 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 6385 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 6386 Fn->insert(BBI, RSBBB); 6387 Fn->insert(BBI, SinkBB); 6388 6389 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 6390 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 6391 bool isThumb2 = Subtarget->isThumb2(); 6392 MachineRegisterInfo &MRI = Fn->getRegInfo(); 6393 // In Thumb mode S must not be specified if source register is the SP or 6394 // PC and if destination register is the SP, so restrict register class 6395 unsigned NewMovDstReg = MRI.createVirtualRegister( 6396 isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); 6397 unsigned NewRsbDstReg = MRI.createVirtualRegister( 6398 isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); 6399 6400 // Transfer the remainder of BB and its successor edges to sinkMBB. 6401 SinkBB->splice(SinkBB->begin(), BB, 6402 llvm::next(MachineBasicBlock::iterator(MI)), 6403 BB->end()); 6404 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 6405 6406 BB->addSuccessor(RSBBB); 6407 BB->addSuccessor(SinkBB); 6408 6409 // fall through to SinkMBB 6410 RSBBB->addSuccessor(SinkBB); 6411 6412 // insert a movs at the end of BB 6413 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), 6414 NewMovDstReg) 6415 .addReg(ABSSrcReg, RegState::Kill) 6416 .addImm((unsigned)ARMCC::AL).addReg(0) 6417 .addReg(ARM::CPSR, RegState::Define); 6418 6419 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 6420 BuildMI(BB, dl, 6421 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 6422 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 6423 6424 // insert rsbri in RSBBB 6425 // Note: BCC and rsbri will be converted into predicated rsbmi 6426 // by if-conversion pass 6427 BuildMI(*RSBBB, RSBBB->begin(), dl, 6428 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 6429 .addReg(NewMovDstReg, RegState::Kill) 6430 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 6431 6432 // insert PHI in SinkBB, 6433 // reuse ABSDstReg to not change uses of ABS instruction 6434 BuildMI(*SinkBB, SinkBB->begin(), dl, 6435 TII->get(ARM::PHI), ABSDstReg) 6436 .addReg(NewRsbDstReg).addMBB(RSBBB) 6437 .addReg(NewMovDstReg).addMBB(BB); 6438 6439 // remove ABS instruction 6440 MI->eraseFromParent(); 6441 6442 // return last added BB 6443 return SinkBB; 6444 } 6445 } 6446} 6447 6448void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 6449 SDNode *Node) const { 6450 if (!MI->hasPostISelHook()) { 6451 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 6452 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 6453 return; 6454 } 6455 6456 const MCInstrDesc *MCID = &MI->getDesc(); 6457 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 6458 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 6459 // operand is still set to noreg. If needed, set the optional operand's 6460 // register to CPSR, and remove the redundant implicit def. 6461 // 6462 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 6463 6464 // Rename pseudo opcodes. 6465 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 6466 if (NewOpc) { 6467 const ARMBaseInstrInfo *TII = 6468 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 6469 MCID = &TII->get(NewOpc); 6470 6471 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 6472 "converted opcode should be the same except for cc_out"); 6473 6474 MI->setDesc(*MCID); 6475 6476 // Add the optional cc_out operand 6477 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 6478 } 6479 unsigned ccOutIdx = MCID->getNumOperands() - 1; 6480 6481 // Any ARM instruction that sets the 's' bit should specify an optional 6482 // "cc_out" operand in the last operand position. 6483 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 6484 assert(!NewOpc && "Optional cc_out operand required"); 6485 return; 6486 } 6487 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 6488 // since we already have an optional CPSR def. 6489 bool definesCPSR = false; 6490 bool deadCPSR = false; 6491 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 6492 i != e; ++i) { 6493 const MachineOperand &MO = MI->getOperand(i); 6494 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 6495 definesCPSR = true; 6496 if (MO.isDead()) 6497 deadCPSR = true; 6498 MI->RemoveOperand(i); 6499 break; 6500 } 6501 } 6502 if (!definesCPSR) { 6503 assert(!NewOpc && "Optional cc_out operand required"); 6504 return; 6505 } 6506 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 6507 if (deadCPSR) { 6508 assert(!MI->getOperand(ccOutIdx).getReg() && 6509 "expect uninitialized optional cc_out operand"); 6510 return; 6511 } 6512 6513 // If this instruction was defined with an optional CPSR def and its dag node 6514 // had a live implicit CPSR def, then activate the optional CPSR def. 6515 MachineOperand &MO = MI->getOperand(ccOutIdx); 6516 MO.setReg(ARM::CPSR); 6517 MO.setIsDef(true); 6518} 6519 6520//===----------------------------------------------------------------------===// 6521// ARM Optimization Hooks 6522//===----------------------------------------------------------------------===// 6523 6524static 6525SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 6526 TargetLowering::DAGCombinerInfo &DCI) { 6527 SelectionDAG &DAG = DCI.DAG; 6528 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6529 EVT VT = N->getValueType(0); 6530 unsigned Opc = N->getOpcode(); 6531 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 6532 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 6533 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 6534 ISD::CondCode CC = ISD::SETCC_INVALID; 6535 6536 if (isSlctCC) { 6537 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 6538 } else { 6539 SDValue CCOp = Slct.getOperand(0); 6540 if (CCOp.getOpcode() == ISD::SETCC) 6541 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 6542 } 6543 6544 bool DoXform = false; 6545 bool InvCC = false; 6546 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 6547 "Bad input!"); 6548 6549 if (LHS.getOpcode() == ISD::Constant && 6550 cast<ConstantSDNode>(LHS)->isNullValue()) { 6551 DoXform = true; 6552 } else if (CC != ISD::SETCC_INVALID && 6553 RHS.getOpcode() == ISD::Constant && 6554 cast<ConstantSDNode>(RHS)->isNullValue()) { 6555 std::swap(LHS, RHS); 6556 SDValue Op0 = Slct.getOperand(0); 6557 EVT OpVT = isSlctCC ? Op0.getValueType() : 6558 Op0.getOperand(0).getValueType(); 6559 bool isInt = OpVT.isInteger(); 6560 CC = ISD::getSetCCInverse(CC, isInt); 6561 6562 if (!TLI.isCondCodeLegal(CC, OpVT)) 6563 return SDValue(); // Inverse operator isn't legal. 6564 6565 DoXform = true; 6566 InvCC = true; 6567 } 6568 6569 if (DoXform) { 6570 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 6571 if (isSlctCC) 6572 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 6573 Slct.getOperand(0), Slct.getOperand(1), CC); 6574 SDValue CCOp = Slct.getOperand(0); 6575 if (InvCC) 6576 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 6577 CCOp.getOperand(0), CCOp.getOperand(1), CC); 6578 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 6579 CCOp, OtherOp, Result); 6580 } 6581 return SDValue(); 6582} 6583 6584// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 6585// (only after legalization). 6586static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 6587 TargetLowering::DAGCombinerInfo &DCI, 6588 const ARMSubtarget *Subtarget) { 6589 6590 // Only perform optimization if after legalize, and if NEON is available. We 6591 // also expected both operands to be BUILD_VECTORs. 6592 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 6593 || N0.getOpcode() != ISD::BUILD_VECTOR 6594 || N1.getOpcode() != ISD::BUILD_VECTOR) 6595 return SDValue(); 6596 6597 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 6598 EVT VT = N->getValueType(0); 6599 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 6600 return SDValue(); 6601 6602 // Check that the vector operands are of the right form. 6603 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 6604 // operands, where N is the size of the formed vector. 6605 // Each EXTRACT_VECTOR should have the same input vector and odd or even 6606 // index such that we have a pair wise add pattern. 6607 6608 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 6609 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 6610 return SDValue(); 6611 SDValue Vec = N0->getOperand(0)->getOperand(0); 6612 SDNode *V = Vec.getNode(); 6613 unsigned nextIndex = 0; 6614 6615 // For each operands to the ADD which are BUILD_VECTORs, 6616 // check to see if each of their operands are an EXTRACT_VECTOR with 6617 // the same vector and appropriate index. 6618 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 6619 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 6620 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 6621 6622 SDValue ExtVec0 = N0->getOperand(i); 6623 SDValue ExtVec1 = N1->getOperand(i); 6624 6625 // First operand is the vector, verify its the same. 6626 if (V != ExtVec0->getOperand(0).getNode() || 6627 V != ExtVec1->getOperand(0).getNode()) 6628 return SDValue(); 6629 6630 // Second is the constant, verify its correct. 6631 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 6632 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 6633 6634 // For the constant, we want to see all the even or all the odd. 6635 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 6636 || C1->getZExtValue() != nextIndex+1) 6637 return SDValue(); 6638 6639 // Increment index. 6640 nextIndex+=2; 6641 } else 6642 return SDValue(); 6643 } 6644 6645 // Create VPADDL node. 6646 SelectionDAG &DAG = DCI.DAG; 6647 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6648 6649 // Build operand list. 6650 SmallVector<SDValue, 8> Ops; 6651 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 6652 TLI.getPointerTy())); 6653 6654 // Input is the vector. 6655 Ops.push_back(Vec); 6656 6657 // Get widened type and narrowed type. 6658 MVT widenType; 6659 unsigned numElem = VT.getVectorNumElements(); 6660 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 6661 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 6662 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 6663 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 6664 default: 6665 assert(0 && "Invalid vector element type for padd optimization."); 6666 } 6667 6668 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 6669 widenType, &Ops[0], Ops.size()); 6670 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); 6671} 6672 6673/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 6674/// operands N0 and N1. This is a helper for PerformADDCombine that is 6675/// called with the default operands, and if that fails, with commuted 6676/// operands. 6677static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 6678 TargetLowering::DAGCombinerInfo &DCI, 6679 const ARMSubtarget *Subtarget){ 6680 6681 // Attempt to create vpaddl for this add. 6682 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 6683 if (Result.getNode()) 6684 return Result; 6685 6686 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 6687 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 6688 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 6689 if (Result.getNode()) return Result; 6690 } 6691 return SDValue(); 6692} 6693 6694/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 6695/// 6696static SDValue PerformADDCombine(SDNode *N, 6697 TargetLowering::DAGCombinerInfo &DCI, 6698 const ARMSubtarget *Subtarget) { 6699 SDValue N0 = N->getOperand(0); 6700 SDValue N1 = N->getOperand(1); 6701 6702 // First try with the default operand order. 6703 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 6704 if (Result.getNode()) 6705 return Result; 6706 6707 // If that didn't work, try again with the operands commuted. 6708 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 6709} 6710 6711/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 6712/// 6713static SDValue PerformSUBCombine(SDNode *N, 6714 TargetLowering::DAGCombinerInfo &DCI) { 6715 SDValue N0 = N->getOperand(0); 6716 SDValue N1 = N->getOperand(1); 6717 6718 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 6719 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 6720 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 6721 if (Result.getNode()) return Result; 6722 } 6723 6724 return SDValue(); 6725} 6726 6727/// PerformVMULCombine 6728/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 6729/// special multiplier accumulator forwarding. 6730/// vmul d3, d0, d2 6731/// vmla d3, d1, d2 6732/// is faster than 6733/// vadd d3, d0, d1 6734/// vmul d3, d3, d2 6735static SDValue PerformVMULCombine(SDNode *N, 6736 TargetLowering::DAGCombinerInfo &DCI, 6737 const ARMSubtarget *Subtarget) { 6738 if (!Subtarget->hasVMLxForwarding()) 6739 return SDValue(); 6740 6741 SelectionDAG &DAG = DCI.DAG; 6742 SDValue N0 = N->getOperand(0); 6743 SDValue N1 = N->getOperand(1); 6744 unsigned Opcode = N0.getOpcode(); 6745 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 6746 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 6747 Opcode = N1.getOpcode(); 6748 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 6749 Opcode != ISD::FADD && Opcode != ISD::FSUB) 6750 return SDValue(); 6751 std::swap(N0, N1); 6752 } 6753 6754 EVT VT = N->getValueType(0); 6755 DebugLoc DL = N->getDebugLoc(); 6756 SDValue N00 = N0->getOperand(0); 6757 SDValue N01 = N0->getOperand(1); 6758 return DAG.getNode(Opcode, DL, VT, 6759 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 6760 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 6761} 6762 6763static SDValue PerformMULCombine(SDNode *N, 6764 TargetLowering::DAGCombinerInfo &DCI, 6765 const ARMSubtarget *Subtarget) { 6766 SelectionDAG &DAG = DCI.DAG; 6767 6768 if (Subtarget->isThumb1Only()) 6769 return SDValue(); 6770 6771 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 6772 return SDValue(); 6773 6774 EVT VT = N->getValueType(0); 6775 if (VT.is64BitVector() || VT.is128BitVector()) 6776 return PerformVMULCombine(N, DCI, Subtarget); 6777 if (VT != MVT::i32) 6778 return SDValue(); 6779 6780 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6781 if (!C) 6782 return SDValue(); 6783 6784 uint64_t MulAmt = C->getZExtValue(); 6785 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 6786 ShiftAmt = ShiftAmt & (32 - 1); 6787 SDValue V = N->getOperand(0); 6788 DebugLoc DL = N->getDebugLoc(); 6789 6790 SDValue Res; 6791 MulAmt >>= ShiftAmt; 6792 if (isPowerOf2_32(MulAmt - 1)) { 6793 // (mul x, 2^N + 1) => (add (shl x, N), x) 6794 Res = DAG.getNode(ISD::ADD, DL, VT, 6795 V, DAG.getNode(ISD::SHL, DL, VT, 6796 V, DAG.getConstant(Log2_32(MulAmt-1), 6797 MVT::i32))); 6798 } else if (isPowerOf2_32(MulAmt + 1)) { 6799 // (mul x, 2^N - 1) => (sub (shl x, N), x) 6800 Res = DAG.getNode(ISD::SUB, DL, VT, 6801 DAG.getNode(ISD::SHL, DL, VT, 6802 V, DAG.getConstant(Log2_32(MulAmt+1), 6803 MVT::i32)), 6804 V); 6805 } else 6806 return SDValue(); 6807 6808 if (ShiftAmt != 0) 6809 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 6810 DAG.getConstant(ShiftAmt, MVT::i32)); 6811 6812 // Do not add new nodes to DAG combiner worklist. 6813 DCI.CombineTo(N, Res, false); 6814 return SDValue(); 6815} 6816 6817static SDValue PerformANDCombine(SDNode *N, 6818 TargetLowering::DAGCombinerInfo &DCI) { 6819 6820 // Attempt to use immediate-form VBIC 6821 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 6822 DebugLoc dl = N->getDebugLoc(); 6823 EVT VT = N->getValueType(0); 6824 SelectionDAG &DAG = DCI.DAG; 6825 6826 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 6827 return SDValue(); 6828 6829 APInt SplatBits, SplatUndef; 6830 unsigned SplatBitSize; 6831 bool HasAnyUndefs; 6832 if (BVN && 6833 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 6834 if (SplatBitSize <= 64) { 6835 EVT VbicVT; 6836 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 6837 SplatUndef.getZExtValue(), SplatBitSize, 6838 DAG, VbicVT, VT.is128BitVector(), 6839 OtherModImm); 6840 if (Val.getNode()) { 6841 SDValue Input = 6842 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 6843 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 6844 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 6845 } 6846 } 6847 } 6848 6849 return SDValue(); 6850} 6851 6852/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 6853static SDValue PerformORCombine(SDNode *N, 6854 TargetLowering::DAGCombinerInfo &DCI, 6855 const ARMSubtarget *Subtarget) { 6856 // Attempt to use immediate-form VORR 6857 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 6858 DebugLoc dl = N->getDebugLoc(); 6859 EVT VT = N->getValueType(0); 6860 SelectionDAG &DAG = DCI.DAG; 6861 6862 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 6863 return SDValue(); 6864 6865 APInt SplatBits, SplatUndef; 6866 unsigned SplatBitSize; 6867 bool HasAnyUndefs; 6868 if (BVN && Subtarget->hasNEON() && 6869 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 6870 if (SplatBitSize <= 64) { 6871 EVT VorrVT; 6872 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 6873 SplatUndef.getZExtValue(), SplatBitSize, 6874 DAG, VorrVT, VT.is128BitVector(), 6875 OtherModImm); 6876 if (Val.getNode()) { 6877 SDValue Input = 6878 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 6879 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 6880 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 6881 } 6882 } 6883 } 6884 6885 SDValue N0 = N->getOperand(0); 6886 if (N0.getOpcode() != ISD::AND) 6887 return SDValue(); 6888 SDValue N1 = N->getOperand(1); 6889 6890 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 6891 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 6892 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 6893 APInt SplatUndef; 6894 unsigned SplatBitSize; 6895 bool HasAnyUndefs; 6896 6897 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 6898 APInt SplatBits0; 6899 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 6900 HasAnyUndefs) && !HasAnyUndefs) { 6901 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 6902 APInt SplatBits1; 6903 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 6904 HasAnyUndefs) && !HasAnyUndefs && 6905 SplatBits0 == ~SplatBits1) { 6906 // Canonicalize the vector type to make instruction selection simpler. 6907 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 6908 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 6909 N0->getOperand(1), N0->getOperand(0), 6910 N1->getOperand(0)); 6911 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 6912 } 6913 } 6914 } 6915 6916 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 6917 // reasonable. 6918 6919 // BFI is only available on V6T2+ 6920 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 6921 return SDValue(); 6922 6923 DebugLoc DL = N->getDebugLoc(); 6924 // 1) or (and A, mask), val => ARMbfi A, val, mask 6925 // iff (val & mask) == val 6926 // 6927 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 6928 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 6929 // && mask == ~mask2 6930 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 6931 // && ~mask == mask2 6932 // (i.e., copy a bitfield value into another bitfield of the same width) 6933 6934 if (VT != MVT::i32) 6935 return SDValue(); 6936 6937 SDValue N00 = N0.getOperand(0); 6938 6939 // The value and the mask need to be constants so we can verify this is 6940 // actually a bitfield set. If the mask is 0xffff, we can do better 6941 // via a movt instruction, so don't use BFI in that case. 6942 SDValue MaskOp = N0.getOperand(1); 6943 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 6944 if (!MaskC) 6945 return SDValue(); 6946 unsigned Mask = MaskC->getZExtValue(); 6947 if (Mask == 0xffff) 6948 return SDValue(); 6949 SDValue Res; 6950 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 6951 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 6952 if (N1C) { 6953 unsigned Val = N1C->getZExtValue(); 6954 if ((Val & ~Mask) != Val) 6955 return SDValue(); 6956 6957 if (ARM::isBitFieldInvertedMask(Mask)) { 6958 Val >>= CountTrailingZeros_32(~Mask); 6959 6960 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 6961 DAG.getConstant(Val, MVT::i32), 6962 DAG.getConstant(Mask, MVT::i32)); 6963 6964 // Do not add new nodes to DAG combiner worklist. 6965 DCI.CombineTo(N, Res, false); 6966 return SDValue(); 6967 } 6968 } else if (N1.getOpcode() == ISD::AND) { 6969 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 6970 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 6971 if (!N11C) 6972 return SDValue(); 6973 unsigned Mask2 = N11C->getZExtValue(); 6974 6975 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 6976 // as is to match. 6977 if (ARM::isBitFieldInvertedMask(Mask) && 6978 (Mask == ~Mask2)) { 6979 // The pack halfword instruction works better for masks that fit it, 6980 // so use that when it's available. 6981 if (Subtarget->hasT2ExtractPack() && 6982 (Mask == 0xffff || Mask == 0xffff0000)) 6983 return SDValue(); 6984 // 2a 6985 unsigned amt = CountTrailingZeros_32(Mask2); 6986 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 6987 DAG.getConstant(amt, MVT::i32)); 6988 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 6989 DAG.getConstant(Mask, MVT::i32)); 6990 // Do not add new nodes to DAG combiner worklist. 6991 DCI.CombineTo(N, Res, false); 6992 return SDValue(); 6993 } else if (ARM::isBitFieldInvertedMask(~Mask) && 6994 (~Mask == Mask2)) { 6995 // The pack halfword instruction works better for masks that fit it, 6996 // so use that when it's available. 6997 if (Subtarget->hasT2ExtractPack() && 6998 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 6999 return SDValue(); 7000 // 2b 7001 unsigned lsb = CountTrailingZeros_32(Mask); 7002 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 7003 DAG.getConstant(lsb, MVT::i32)); 7004 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 7005 DAG.getConstant(Mask2, MVT::i32)); 7006 // Do not add new nodes to DAG combiner worklist. 7007 DCI.CombineTo(N, Res, false); 7008 return SDValue(); 7009 } 7010 } 7011 7012 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 7013 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 7014 ARM::isBitFieldInvertedMask(~Mask)) { 7015 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 7016 // where lsb(mask) == #shamt and masked bits of B are known zero. 7017 SDValue ShAmt = N00.getOperand(1); 7018 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 7019 unsigned LSB = CountTrailingZeros_32(Mask); 7020 if (ShAmtC != LSB) 7021 return SDValue(); 7022 7023 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 7024 DAG.getConstant(~Mask, MVT::i32)); 7025 7026 // Do not add new nodes to DAG combiner worklist. 7027 DCI.CombineTo(N, Res, false); 7028 } 7029 7030 return SDValue(); 7031} 7032 7033/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 7034/// the bits being cleared by the AND are not demanded by the BFI. 7035static SDValue PerformBFICombine(SDNode *N, 7036 TargetLowering::DAGCombinerInfo &DCI) { 7037 SDValue N1 = N->getOperand(1); 7038 if (N1.getOpcode() == ISD::AND) { 7039 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 7040 if (!N11C) 7041 return SDValue(); 7042 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 7043 unsigned LSB = CountTrailingZeros_32(~InvMask); 7044 unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; 7045 unsigned Mask = (1 << Width)-1; 7046 unsigned Mask2 = N11C->getZExtValue(); 7047 if ((Mask & (~Mask2)) == 0) 7048 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 7049 N->getOperand(0), N1.getOperand(0), 7050 N->getOperand(2)); 7051 } 7052 return SDValue(); 7053} 7054 7055/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 7056/// ARMISD::VMOVRRD. 7057static SDValue PerformVMOVRRDCombine(SDNode *N, 7058 TargetLowering::DAGCombinerInfo &DCI) { 7059 // vmovrrd(vmovdrr x, y) -> x,y 7060 SDValue InDouble = N->getOperand(0); 7061 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 7062 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 7063 7064 // vmovrrd(load f64) -> (load i32), (load i32) 7065 SDNode *InNode = InDouble.getNode(); 7066 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 7067 InNode->getValueType(0) == MVT::f64 && 7068 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 7069 !cast<LoadSDNode>(InNode)->isVolatile()) { 7070 // TODO: Should this be done for non-FrameIndex operands? 7071 LoadSDNode *LD = cast<LoadSDNode>(InNode); 7072 7073 SelectionDAG &DAG = DCI.DAG; 7074 DebugLoc DL = LD->getDebugLoc(); 7075 SDValue BasePtr = LD->getBasePtr(); 7076 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 7077 LD->getPointerInfo(), LD->isVolatile(), 7078 LD->isNonTemporal(), LD->isInvariant(), 7079 LD->getAlignment()); 7080 7081 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 7082 DAG.getConstant(4, MVT::i32)); 7083 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 7084 LD->getPointerInfo(), LD->isVolatile(), 7085 LD->isNonTemporal(), LD->isInvariant(), 7086 std::min(4U, LD->getAlignment() / 2)); 7087 7088 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 7089 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 7090 DCI.RemoveFromWorklist(LD); 7091 DAG.DeleteNode(LD); 7092 return Result; 7093 } 7094 7095 return SDValue(); 7096} 7097 7098/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 7099/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 7100static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 7101 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 7102 SDValue Op0 = N->getOperand(0); 7103 SDValue Op1 = N->getOperand(1); 7104 if (Op0.getOpcode() == ISD::BITCAST) 7105 Op0 = Op0.getOperand(0); 7106 if (Op1.getOpcode() == ISD::BITCAST) 7107 Op1 = Op1.getOperand(0); 7108 if (Op0.getOpcode() == ARMISD::VMOVRRD && 7109 Op0.getNode() == Op1.getNode() && 7110 Op0.getResNo() == 0 && Op1.getResNo() == 1) 7111 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 7112 N->getValueType(0), Op0.getOperand(0)); 7113 return SDValue(); 7114} 7115 7116/// PerformSTORECombine - Target-specific dag combine xforms for 7117/// ISD::STORE. 7118static SDValue PerformSTORECombine(SDNode *N, 7119 TargetLowering::DAGCombinerInfo &DCI) { 7120 // Bitcast an i64 store extracted from a vector to f64. 7121 // Otherwise, the i64 value will be legalized to a pair of i32 values. 7122 StoreSDNode *St = cast<StoreSDNode>(N); 7123 SDValue StVal = St->getValue(); 7124 if (!ISD::isNormalStore(St) || St->isVolatile()) 7125 return SDValue(); 7126 7127 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 7128 StVal.getNode()->hasOneUse() && !St->isVolatile()) { 7129 SelectionDAG &DAG = DCI.DAG; 7130 DebugLoc DL = St->getDebugLoc(); 7131 SDValue BasePtr = St->getBasePtr(); 7132 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 7133 StVal.getNode()->getOperand(0), BasePtr, 7134 St->getPointerInfo(), St->isVolatile(), 7135 St->isNonTemporal(), St->getAlignment()); 7136 7137 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 7138 DAG.getConstant(4, MVT::i32)); 7139 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 7140 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 7141 St->isNonTemporal(), 7142 std::min(4U, St->getAlignment() / 2)); 7143 } 7144 7145 if (StVal.getValueType() != MVT::i64 || 7146 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 7147 return SDValue(); 7148 7149 SelectionDAG &DAG = DCI.DAG; 7150 DebugLoc dl = StVal.getDebugLoc(); 7151 SDValue IntVec = StVal.getOperand(0); 7152 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 7153 IntVec.getValueType().getVectorNumElements()); 7154 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 7155 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 7156 Vec, StVal.getOperand(1)); 7157 dl = N->getDebugLoc(); 7158 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 7159 // Make the DAGCombiner fold the bitcasts. 7160 DCI.AddToWorklist(Vec.getNode()); 7161 DCI.AddToWorklist(ExtElt.getNode()); 7162 DCI.AddToWorklist(V.getNode()); 7163 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 7164 St->getPointerInfo(), St->isVolatile(), 7165 St->isNonTemporal(), St->getAlignment(), 7166 St->getTBAAInfo()); 7167} 7168 7169/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 7170/// are normal, non-volatile loads. If so, it is profitable to bitcast an 7171/// i64 vector to have f64 elements, since the value can then be loaded 7172/// directly into a VFP register. 7173static bool hasNormalLoadOperand(SDNode *N) { 7174 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 7175 for (unsigned i = 0; i < NumElts; ++i) { 7176 SDNode *Elt = N->getOperand(i).getNode(); 7177 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 7178 return true; 7179 } 7180 return false; 7181} 7182 7183/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 7184/// ISD::BUILD_VECTOR. 7185static SDValue PerformBUILD_VECTORCombine(SDNode *N, 7186 TargetLowering::DAGCombinerInfo &DCI){ 7187 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 7188 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 7189 // into a pair of GPRs, which is fine when the value is used as a scalar, 7190 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 7191 SelectionDAG &DAG = DCI.DAG; 7192 if (N->getNumOperands() == 2) { 7193 SDValue RV = PerformVMOVDRRCombine(N, DAG); 7194 if (RV.getNode()) 7195 return RV; 7196 } 7197 7198 // Load i64 elements as f64 values so that type legalization does not split 7199 // them up into i32 values. 7200 EVT VT = N->getValueType(0); 7201 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 7202 return SDValue(); 7203 DebugLoc dl = N->getDebugLoc(); 7204 SmallVector<SDValue, 8> Ops; 7205 unsigned NumElts = VT.getVectorNumElements(); 7206 for (unsigned i = 0; i < NumElts; ++i) { 7207 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 7208 Ops.push_back(V); 7209 // Make the DAGCombiner fold the bitcast. 7210 DCI.AddToWorklist(V.getNode()); 7211 } 7212 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 7213 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 7214 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 7215} 7216 7217/// PerformInsertEltCombine - Target-specific dag combine xforms for 7218/// ISD::INSERT_VECTOR_ELT. 7219static SDValue PerformInsertEltCombine(SDNode *N, 7220 TargetLowering::DAGCombinerInfo &DCI) { 7221 // Bitcast an i64 load inserted into a vector to f64. 7222 // Otherwise, the i64 value will be legalized to a pair of i32 values. 7223 EVT VT = N->getValueType(0); 7224 SDNode *Elt = N->getOperand(1).getNode(); 7225 if (VT.getVectorElementType() != MVT::i64 || 7226 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 7227 return SDValue(); 7228 7229 SelectionDAG &DAG = DCI.DAG; 7230 DebugLoc dl = N->getDebugLoc(); 7231 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 7232 VT.getVectorNumElements()); 7233 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 7234 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 7235 // Make the DAGCombiner fold the bitcasts. 7236 DCI.AddToWorklist(Vec.getNode()); 7237 DCI.AddToWorklist(V.getNode()); 7238 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 7239 Vec, V, N->getOperand(2)); 7240 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 7241} 7242 7243/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 7244/// ISD::VECTOR_SHUFFLE. 7245static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 7246 // The LLVM shufflevector instruction does not require the shuffle mask 7247 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 7248 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 7249 // operands do not match the mask length, they are extended by concatenating 7250 // them with undef vectors. That is probably the right thing for other 7251 // targets, but for NEON it is better to concatenate two double-register 7252 // size vector operands into a single quad-register size vector. Do that 7253 // transformation here: 7254 // shuffle(concat(v1, undef), concat(v2, undef)) -> 7255 // shuffle(concat(v1, v2), undef) 7256 SDValue Op0 = N->getOperand(0); 7257 SDValue Op1 = N->getOperand(1); 7258 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 7259 Op1.getOpcode() != ISD::CONCAT_VECTORS || 7260 Op0.getNumOperands() != 2 || 7261 Op1.getNumOperands() != 2) 7262 return SDValue(); 7263 SDValue Concat0Op1 = Op0.getOperand(1); 7264 SDValue Concat1Op1 = Op1.getOperand(1); 7265 if (Concat0Op1.getOpcode() != ISD::UNDEF || 7266 Concat1Op1.getOpcode() != ISD::UNDEF) 7267 return SDValue(); 7268 // Skip the transformation if any of the types are illegal. 7269 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7270 EVT VT = N->getValueType(0); 7271 if (!TLI.isTypeLegal(VT) || 7272 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 7273 !TLI.isTypeLegal(Concat1Op1.getValueType())) 7274 return SDValue(); 7275 7276 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 7277 Op0.getOperand(0), Op1.getOperand(0)); 7278 // Translate the shuffle mask. 7279 SmallVector<int, 16> NewMask; 7280 unsigned NumElts = VT.getVectorNumElements(); 7281 unsigned HalfElts = NumElts/2; 7282 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7283 for (unsigned n = 0; n < NumElts; ++n) { 7284 int MaskElt = SVN->getMaskElt(n); 7285 int NewElt = -1; 7286 if (MaskElt < (int)HalfElts) 7287 NewElt = MaskElt; 7288 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 7289 NewElt = HalfElts + MaskElt - NumElts; 7290 NewMask.push_back(NewElt); 7291 } 7292 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 7293 DAG.getUNDEF(VT), NewMask.data()); 7294} 7295 7296/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 7297/// NEON load/store intrinsics to merge base address updates. 7298static SDValue CombineBaseUpdate(SDNode *N, 7299 TargetLowering::DAGCombinerInfo &DCI) { 7300 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 7301 return SDValue(); 7302 7303 SelectionDAG &DAG = DCI.DAG; 7304 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 7305 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 7306 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 7307 SDValue Addr = N->getOperand(AddrOpIdx); 7308 7309 // Search for a use of the address operand that is an increment. 7310 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 7311 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 7312 SDNode *User = *UI; 7313 if (User->getOpcode() != ISD::ADD || 7314 UI.getUse().getResNo() != Addr.getResNo()) 7315 continue; 7316 7317 // Check that the add is independent of the load/store. Otherwise, folding 7318 // it would create a cycle. 7319 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 7320 continue; 7321 7322 // Find the new opcode for the updating load/store. 7323 bool isLoad = true; 7324 bool isLaneOp = false; 7325 unsigned NewOpc = 0; 7326 unsigned NumVecs = 0; 7327 if (isIntrinsic) { 7328 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 7329 switch (IntNo) { 7330 default: assert(0 && "unexpected intrinsic for Neon base update"); 7331 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 7332 NumVecs = 1; break; 7333 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 7334 NumVecs = 2; break; 7335 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 7336 NumVecs = 3; break; 7337 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 7338 NumVecs = 4; break; 7339 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 7340 NumVecs = 2; isLaneOp = true; break; 7341 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 7342 NumVecs = 3; isLaneOp = true; break; 7343 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 7344 NumVecs = 4; isLaneOp = true; break; 7345 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 7346 NumVecs = 1; isLoad = false; break; 7347 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 7348 NumVecs = 2; isLoad = false; break; 7349 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 7350 NumVecs = 3; isLoad = false; break; 7351 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 7352 NumVecs = 4; isLoad = false; break; 7353 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 7354 NumVecs = 2; isLoad = false; isLaneOp = true; break; 7355 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 7356 NumVecs = 3; isLoad = false; isLaneOp = true; break; 7357 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 7358 NumVecs = 4; isLoad = false; isLaneOp = true; break; 7359 } 7360 } else { 7361 isLaneOp = true; 7362 switch (N->getOpcode()) { 7363 default: assert(0 && "unexpected opcode for Neon base update"); 7364 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 7365 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 7366 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 7367 } 7368 } 7369 7370 // Find the size of memory referenced by the load/store. 7371 EVT VecTy; 7372 if (isLoad) 7373 VecTy = N->getValueType(0); 7374 else 7375 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 7376 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 7377 if (isLaneOp) 7378 NumBytes /= VecTy.getVectorNumElements(); 7379 7380 // If the increment is a constant, it must match the memory ref size. 7381 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 7382 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 7383 uint64_t IncVal = CInc->getZExtValue(); 7384 if (IncVal != NumBytes) 7385 continue; 7386 } else if (NumBytes >= 3 * 16) { 7387 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 7388 // separate instructions that make it harder to use a non-constant update. 7389 continue; 7390 } 7391 7392 // Create the new updating load/store node. 7393 EVT Tys[6]; 7394 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 7395 unsigned n; 7396 for (n = 0; n < NumResultVecs; ++n) 7397 Tys[n] = VecTy; 7398 Tys[n++] = MVT::i32; 7399 Tys[n] = MVT::Other; 7400 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 7401 SmallVector<SDValue, 8> Ops; 7402 Ops.push_back(N->getOperand(0)); // incoming chain 7403 Ops.push_back(N->getOperand(AddrOpIdx)); 7404 Ops.push_back(Inc); 7405 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 7406 Ops.push_back(N->getOperand(i)); 7407 } 7408 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 7409 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 7410 Ops.data(), Ops.size(), 7411 MemInt->getMemoryVT(), 7412 MemInt->getMemOperand()); 7413 7414 // Update the uses. 7415 std::vector<SDValue> NewResults; 7416 for (unsigned i = 0; i < NumResultVecs; ++i) { 7417 NewResults.push_back(SDValue(UpdN.getNode(), i)); 7418 } 7419 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 7420 DCI.CombineTo(N, NewResults); 7421 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 7422 7423 break; 7424 } 7425 return SDValue(); 7426} 7427 7428/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 7429/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 7430/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 7431/// return true. 7432static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 7433 SelectionDAG &DAG = DCI.DAG; 7434 EVT VT = N->getValueType(0); 7435 // vldN-dup instructions only support 64-bit vectors for N > 1. 7436 if (!VT.is64BitVector()) 7437 return false; 7438 7439 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 7440 SDNode *VLD = N->getOperand(0).getNode(); 7441 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 7442 return false; 7443 unsigned NumVecs = 0; 7444 unsigned NewOpc = 0; 7445 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 7446 if (IntNo == Intrinsic::arm_neon_vld2lane) { 7447 NumVecs = 2; 7448 NewOpc = ARMISD::VLD2DUP; 7449 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 7450 NumVecs = 3; 7451 NewOpc = ARMISD::VLD3DUP; 7452 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 7453 NumVecs = 4; 7454 NewOpc = ARMISD::VLD4DUP; 7455 } else { 7456 return false; 7457 } 7458 7459 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 7460 // numbers match the load. 7461 unsigned VLDLaneNo = 7462 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 7463 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 7464 UI != UE; ++UI) { 7465 // Ignore uses of the chain result. 7466 if (UI.getUse().getResNo() == NumVecs) 7467 continue; 7468 SDNode *User = *UI; 7469 if (User->getOpcode() != ARMISD::VDUPLANE || 7470 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 7471 return false; 7472 } 7473 7474 // Create the vldN-dup node. 7475 EVT Tys[5]; 7476 unsigned n; 7477 for (n = 0; n < NumVecs; ++n) 7478 Tys[n] = VT; 7479 Tys[n] = MVT::Other; 7480 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 7481 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 7482 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 7483 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 7484 Ops, 2, VLDMemInt->getMemoryVT(), 7485 VLDMemInt->getMemOperand()); 7486 7487 // Update the uses. 7488 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 7489 UI != UE; ++UI) { 7490 unsigned ResNo = UI.getUse().getResNo(); 7491 // Ignore uses of the chain result. 7492 if (ResNo == NumVecs) 7493 continue; 7494 SDNode *User = *UI; 7495 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 7496 } 7497 7498 // Now the vldN-lane intrinsic is dead except for its chain result. 7499 // Update uses of the chain. 7500 std::vector<SDValue> VLDDupResults; 7501 for (unsigned n = 0; n < NumVecs; ++n) 7502 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 7503 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 7504 DCI.CombineTo(VLD, VLDDupResults); 7505 7506 return true; 7507} 7508 7509/// PerformVDUPLANECombine - Target-specific dag combine xforms for 7510/// ARMISD::VDUPLANE. 7511static SDValue PerformVDUPLANECombine(SDNode *N, 7512 TargetLowering::DAGCombinerInfo &DCI) { 7513 SDValue Op = N->getOperand(0); 7514 7515 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 7516 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 7517 if (CombineVLDDUP(N, DCI)) 7518 return SDValue(N, 0); 7519 7520 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 7521 // redundant. Ignore bit_converts for now; element sizes are checked below. 7522 while (Op.getOpcode() == ISD::BITCAST) 7523 Op = Op.getOperand(0); 7524 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 7525 return SDValue(); 7526 7527 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 7528 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 7529 // The canonical VMOV for a zero vector uses a 32-bit element size. 7530 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7531 unsigned EltBits; 7532 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 7533 EltSize = 8; 7534 EVT VT = N->getValueType(0); 7535 if (EltSize > VT.getVectorElementType().getSizeInBits()) 7536 return SDValue(); 7537 7538 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 7539} 7540 7541// isConstVecPow2 - Return true if each vector element is a power of 2, all 7542// elements are the same constant, C, and Log2(C) ranges from 1 to 32. 7543static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 7544{ 7545 integerPart cN; 7546 integerPart c0 = 0; 7547 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 7548 I != E; I++) { 7549 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 7550 if (!C) 7551 return false; 7552 7553 bool isExact; 7554 APFloat APF = C->getValueAPF(); 7555 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 7556 != APFloat::opOK || !isExact) 7557 return false; 7558 7559 c0 = (I == 0) ? cN : c0; 7560 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 7561 return false; 7562 } 7563 C = c0; 7564 return true; 7565} 7566 7567/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 7568/// can replace combinations of VMUL and VCVT (floating-point to integer) 7569/// when the VMUL has a constant operand that is a power of 2. 7570/// 7571/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 7572/// vmul.f32 d16, d17, d16 7573/// vcvt.s32.f32 d16, d16 7574/// becomes: 7575/// vcvt.s32.f32 d16, d16, #3 7576static SDValue PerformVCVTCombine(SDNode *N, 7577 TargetLowering::DAGCombinerInfo &DCI, 7578 const ARMSubtarget *Subtarget) { 7579 SelectionDAG &DAG = DCI.DAG; 7580 SDValue Op = N->getOperand(0); 7581 7582 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 7583 Op.getOpcode() != ISD::FMUL) 7584 return SDValue(); 7585 7586 uint64_t C; 7587 SDValue N0 = Op->getOperand(0); 7588 SDValue ConstVec = Op->getOperand(1); 7589 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 7590 7591 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 7592 !isConstVecPow2(ConstVec, isSigned, C)) 7593 return SDValue(); 7594 7595 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 7596 Intrinsic::arm_neon_vcvtfp2fxu; 7597 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 7598 N->getValueType(0), 7599 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 7600 DAG.getConstant(Log2_64(C), MVT::i32)); 7601} 7602 7603/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 7604/// can replace combinations of VCVT (integer to floating-point) and VDIV 7605/// when the VDIV has a constant operand that is a power of 2. 7606/// 7607/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 7608/// vcvt.f32.s32 d16, d16 7609/// vdiv.f32 d16, d17, d16 7610/// becomes: 7611/// vcvt.f32.s32 d16, d16, #3 7612static SDValue PerformVDIVCombine(SDNode *N, 7613 TargetLowering::DAGCombinerInfo &DCI, 7614 const ARMSubtarget *Subtarget) { 7615 SelectionDAG &DAG = DCI.DAG; 7616 SDValue Op = N->getOperand(0); 7617 unsigned OpOpcode = Op.getNode()->getOpcode(); 7618 7619 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 7620 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 7621 return SDValue(); 7622 7623 uint64_t C; 7624 SDValue ConstVec = N->getOperand(1); 7625 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 7626 7627 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 7628 !isConstVecPow2(ConstVec, isSigned, C)) 7629 return SDValue(); 7630 7631 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 7632 Intrinsic::arm_neon_vcvtfxu2fp; 7633 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 7634 Op.getValueType(), 7635 DAG.getConstant(IntrinsicOpcode, MVT::i32), 7636 Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); 7637} 7638 7639/// Getvshiftimm - Check if this is a valid build_vector for the immediate 7640/// operand of a vector shift operation, where all the elements of the 7641/// build_vector must have the same constant integer value. 7642static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 7643 // Ignore bit_converts. 7644 while (Op.getOpcode() == ISD::BITCAST) 7645 Op = Op.getOperand(0); 7646 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7647 APInt SplatBits, SplatUndef; 7648 unsigned SplatBitSize; 7649 bool HasAnyUndefs; 7650 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 7651 HasAnyUndefs, ElementBits) || 7652 SplatBitSize > ElementBits) 7653 return false; 7654 Cnt = SplatBits.getSExtValue(); 7655 return true; 7656} 7657 7658/// isVShiftLImm - Check if this is a valid build_vector for the immediate 7659/// operand of a vector shift left operation. That value must be in the range: 7660/// 0 <= Value < ElementBits for a left shift; or 7661/// 0 <= Value <= ElementBits for a long left shift. 7662static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 7663 assert(VT.isVector() && "vector shift count is not a vector type"); 7664 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 7665 if (! getVShiftImm(Op, ElementBits, Cnt)) 7666 return false; 7667 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 7668} 7669 7670/// isVShiftRImm - Check if this is a valid build_vector for the immediate 7671/// operand of a vector shift right operation. For a shift opcode, the value 7672/// is positive, but for an intrinsic the value count must be negative. The 7673/// absolute value must be in the range: 7674/// 1 <= |Value| <= ElementBits for a right shift; or 7675/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 7676static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 7677 int64_t &Cnt) { 7678 assert(VT.isVector() && "vector shift count is not a vector type"); 7679 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 7680 if (! getVShiftImm(Op, ElementBits, Cnt)) 7681 return false; 7682 if (isIntrinsic) 7683 Cnt = -Cnt; 7684 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 7685} 7686 7687/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 7688static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 7689 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 7690 switch (IntNo) { 7691 default: 7692 // Don't do anything for most intrinsics. 7693 break; 7694 7695 // Vector shifts: check for immediate versions and lower them. 7696 // Note: This is done during DAG combining instead of DAG legalizing because 7697 // the build_vectors for 64-bit vector element shift counts are generally 7698 // not legal, and it is hard to see their values after they get legalized to 7699 // loads from a constant pool. 7700 case Intrinsic::arm_neon_vshifts: 7701 case Intrinsic::arm_neon_vshiftu: 7702 case Intrinsic::arm_neon_vshiftls: 7703 case Intrinsic::arm_neon_vshiftlu: 7704 case Intrinsic::arm_neon_vshiftn: 7705 case Intrinsic::arm_neon_vrshifts: 7706 case Intrinsic::arm_neon_vrshiftu: 7707 case Intrinsic::arm_neon_vrshiftn: 7708 case Intrinsic::arm_neon_vqshifts: 7709 case Intrinsic::arm_neon_vqshiftu: 7710 case Intrinsic::arm_neon_vqshiftsu: 7711 case Intrinsic::arm_neon_vqshiftns: 7712 case Intrinsic::arm_neon_vqshiftnu: 7713 case Intrinsic::arm_neon_vqshiftnsu: 7714 case Intrinsic::arm_neon_vqrshiftns: 7715 case Intrinsic::arm_neon_vqrshiftnu: 7716 case Intrinsic::arm_neon_vqrshiftnsu: { 7717 EVT VT = N->getOperand(1).getValueType(); 7718 int64_t Cnt; 7719 unsigned VShiftOpc = 0; 7720 7721 switch (IntNo) { 7722 case Intrinsic::arm_neon_vshifts: 7723 case Intrinsic::arm_neon_vshiftu: 7724 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 7725 VShiftOpc = ARMISD::VSHL; 7726 break; 7727 } 7728 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 7729 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 7730 ARMISD::VSHRs : ARMISD::VSHRu); 7731 break; 7732 } 7733 return SDValue(); 7734 7735 case Intrinsic::arm_neon_vshiftls: 7736 case Intrinsic::arm_neon_vshiftlu: 7737 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 7738 break; 7739 llvm_unreachable("invalid shift count for vshll intrinsic"); 7740 7741 case Intrinsic::arm_neon_vrshifts: 7742 case Intrinsic::arm_neon_vrshiftu: 7743 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 7744 break; 7745 return SDValue(); 7746 7747 case Intrinsic::arm_neon_vqshifts: 7748 case Intrinsic::arm_neon_vqshiftu: 7749 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 7750 break; 7751 return SDValue(); 7752 7753 case Intrinsic::arm_neon_vqshiftsu: 7754 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 7755 break; 7756 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 7757 7758 case Intrinsic::arm_neon_vshiftn: 7759 case Intrinsic::arm_neon_vrshiftn: 7760 case Intrinsic::arm_neon_vqshiftns: 7761 case Intrinsic::arm_neon_vqshiftnu: 7762 case Intrinsic::arm_neon_vqshiftnsu: 7763 case Intrinsic::arm_neon_vqrshiftns: 7764 case Intrinsic::arm_neon_vqrshiftnu: 7765 case Intrinsic::arm_neon_vqrshiftnsu: 7766 // Narrowing shifts require an immediate right shift. 7767 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 7768 break; 7769 llvm_unreachable("invalid shift count for narrowing vector shift " 7770 "intrinsic"); 7771 7772 default: 7773 llvm_unreachable("unhandled vector shift"); 7774 } 7775 7776 switch (IntNo) { 7777 case Intrinsic::arm_neon_vshifts: 7778 case Intrinsic::arm_neon_vshiftu: 7779 // Opcode already set above. 7780 break; 7781 case Intrinsic::arm_neon_vshiftls: 7782 case Intrinsic::arm_neon_vshiftlu: 7783 if (Cnt == VT.getVectorElementType().getSizeInBits()) 7784 VShiftOpc = ARMISD::VSHLLi; 7785 else 7786 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 7787 ARMISD::VSHLLs : ARMISD::VSHLLu); 7788 break; 7789 case Intrinsic::arm_neon_vshiftn: 7790 VShiftOpc = ARMISD::VSHRN; break; 7791 case Intrinsic::arm_neon_vrshifts: 7792 VShiftOpc = ARMISD::VRSHRs; break; 7793 case Intrinsic::arm_neon_vrshiftu: 7794 VShiftOpc = ARMISD::VRSHRu; break; 7795 case Intrinsic::arm_neon_vrshiftn: 7796 VShiftOpc = ARMISD::VRSHRN; break; 7797 case Intrinsic::arm_neon_vqshifts: 7798 VShiftOpc = ARMISD::VQSHLs; break; 7799 case Intrinsic::arm_neon_vqshiftu: 7800 VShiftOpc = ARMISD::VQSHLu; break; 7801 case Intrinsic::arm_neon_vqshiftsu: 7802 VShiftOpc = ARMISD::VQSHLsu; break; 7803 case Intrinsic::arm_neon_vqshiftns: 7804 VShiftOpc = ARMISD::VQSHRNs; break; 7805 case Intrinsic::arm_neon_vqshiftnu: 7806 VShiftOpc = ARMISD::VQSHRNu; break; 7807 case Intrinsic::arm_neon_vqshiftnsu: 7808 VShiftOpc = ARMISD::VQSHRNsu; break; 7809 case Intrinsic::arm_neon_vqrshiftns: 7810 VShiftOpc = ARMISD::VQRSHRNs; break; 7811 case Intrinsic::arm_neon_vqrshiftnu: 7812 VShiftOpc = ARMISD::VQRSHRNu; break; 7813 case Intrinsic::arm_neon_vqrshiftnsu: 7814 VShiftOpc = ARMISD::VQRSHRNsu; break; 7815 } 7816 7817 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 7818 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 7819 } 7820 7821 case Intrinsic::arm_neon_vshiftins: { 7822 EVT VT = N->getOperand(1).getValueType(); 7823 int64_t Cnt; 7824 unsigned VShiftOpc = 0; 7825 7826 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 7827 VShiftOpc = ARMISD::VSLI; 7828 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 7829 VShiftOpc = ARMISD::VSRI; 7830 else { 7831 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 7832 } 7833 7834 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 7835 N->getOperand(1), N->getOperand(2), 7836 DAG.getConstant(Cnt, MVT::i32)); 7837 } 7838 7839 case Intrinsic::arm_neon_vqrshifts: 7840 case Intrinsic::arm_neon_vqrshiftu: 7841 // No immediate versions of these to check for. 7842 break; 7843 } 7844 7845 return SDValue(); 7846} 7847 7848/// PerformShiftCombine - Checks for immediate versions of vector shifts and 7849/// lowers them. As with the vector shift intrinsics, this is done during DAG 7850/// combining instead of DAG legalizing because the build_vectors for 64-bit 7851/// vector element shift counts are generally not legal, and it is hard to see 7852/// their values after they get legalized to loads from a constant pool. 7853static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 7854 const ARMSubtarget *ST) { 7855 EVT VT = N->getValueType(0); 7856 7857 // Nothing to be done for scalar shifts. 7858 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7859 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 7860 return SDValue(); 7861 7862 assert(ST->hasNEON() && "unexpected vector shift"); 7863 int64_t Cnt; 7864 7865 switch (N->getOpcode()) { 7866 default: llvm_unreachable("unexpected shift opcode"); 7867 7868 case ISD::SHL: 7869 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 7870 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 7871 DAG.getConstant(Cnt, MVT::i32)); 7872 break; 7873 7874 case ISD::SRA: 7875 case ISD::SRL: 7876 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 7877 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 7878 ARMISD::VSHRs : ARMISD::VSHRu); 7879 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 7880 DAG.getConstant(Cnt, MVT::i32)); 7881 } 7882 } 7883 return SDValue(); 7884} 7885 7886/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 7887/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 7888static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 7889 const ARMSubtarget *ST) { 7890 SDValue N0 = N->getOperand(0); 7891 7892 // Check for sign- and zero-extensions of vector extract operations of 8- 7893 // and 16-bit vector elements. NEON supports these directly. They are 7894 // handled during DAG combining because type legalization will promote them 7895 // to 32-bit types and it is messy to recognize the operations after that. 7896 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 7897 SDValue Vec = N0.getOperand(0); 7898 SDValue Lane = N0.getOperand(1); 7899 EVT VT = N->getValueType(0); 7900 EVT EltVT = N0.getValueType(); 7901 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7902 7903 if (VT == MVT::i32 && 7904 (EltVT == MVT::i8 || EltVT == MVT::i16) && 7905 TLI.isTypeLegal(Vec.getValueType()) && 7906 isa<ConstantSDNode>(Lane)) { 7907 7908 unsigned Opc = 0; 7909 switch (N->getOpcode()) { 7910 default: llvm_unreachable("unexpected opcode"); 7911 case ISD::SIGN_EXTEND: 7912 Opc = ARMISD::VGETLANEs; 7913 break; 7914 case ISD::ZERO_EXTEND: 7915 case ISD::ANY_EXTEND: 7916 Opc = ARMISD::VGETLANEu; 7917 break; 7918 } 7919 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 7920 } 7921 } 7922 7923 return SDValue(); 7924} 7925 7926/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 7927/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 7928static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 7929 const ARMSubtarget *ST) { 7930 // If the target supports NEON, try to use vmax/vmin instructions for f32 7931 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 7932 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 7933 // a NaN; only do the transformation when it matches that behavior. 7934 7935 // For now only do this when using NEON for FP operations; if using VFP, it 7936 // is not obvious that the benefit outweighs the cost of switching to the 7937 // NEON pipeline. 7938 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 7939 N->getValueType(0) != MVT::f32) 7940 return SDValue(); 7941 7942 SDValue CondLHS = N->getOperand(0); 7943 SDValue CondRHS = N->getOperand(1); 7944 SDValue LHS = N->getOperand(2); 7945 SDValue RHS = N->getOperand(3); 7946 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 7947 7948 unsigned Opcode = 0; 7949 bool IsReversed; 7950 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 7951 IsReversed = false; // x CC y ? x : y 7952 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 7953 IsReversed = true ; // x CC y ? y : x 7954 } else { 7955 return SDValue(); 7956 } 7957 7958 bool IsUnordered; 7959 switch (CC) { 7960 default: break; 7961 case ISD::SETOLT: 7962 case ISD::SETOLE: 7963 case ISD::SETLT: 7964 case ISD::SETLE: 7965 case ISD::SETULT: 7966 case ISD::SETULE: 7967 // If LHS is NaN, an ordered comparison will be false and the result will 7968 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 7969 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 7970 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 7971 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 7972 break; 7973 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 7974 // will return -0, so vmin can only be used for unsafe math or if one of 7975 // the operands is known to be nonzero. 7976 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 7977 !DAG.getTarget().Options.UnsafeFPMath && 7978 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 7979 break; 7980 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 7981 break; 7982 7983 case ISD::SETOGT: 7984 case ISD::SETOGE: 7985 case ISD::SETGT: 7986 case ISD::SETGE: 7987 case ISD::SETUGT: 7988 case ISD::SETUGE: 7989 // If LHS is NaN, an ordered comparison will be false and the result will 7990 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 7991 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 7992 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 7993 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 7994 break; 7995 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 7996 // will return +0, so vmax can only be used for unsafe math or if one of 7997 // the operands is known to be nonzero. 7998 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 7999 !DAG.getTarget().Options.UnsafeFPMath && 8000 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 8001 break; 8002 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 8003 break; 8004 } 8005 8006 if (!Opcode) 8007 return SDValue(); 8008 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 8009} 8010 8011/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 8012SDValue 8013ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 8014 SDValue Cmp = N->getOperand(4); 8015 if (Cmp.getOpcode() != ARMISD::CMPZ) 8016 // Only looking at EQ and NE cases. 8017 return SDValue(); 8018 8019 EVT VT = N->getValueType(0); 8020 DebugLoc dl = N->getDebugLoc(); 8021 SDValue LHS = Cmp.getOperand(0); 8022 SDValue RHS = Cmp.getOperand(1); 8023 SDValue FalseVal = N->getOperand(0); 8024 SDValue TrueVal = N->getOperand(1); 8025 SDValue ARMcc = N->getOperand(2); 8026 ARMCC::CondCodes CC = 8027 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 8028 8029 // Simplify 8030 // mov r1, r0 8031 // cmp r1, x 8032 // mov r0, y 8033 // moveq r0, x 8034 // to 8035 // cmp r0, x 8036 // movne r0, y 8037 // 8038 // mov r1, r0 8039 // cmp r1, x 8040 // mov r0, x 8041 // movne r0, y 8042 // to 8043 // cmp r0, x 8044 // movne r0, y 8045 /// FIXME: Turn this into a target neutral optimization? 8046 SDValue Res; 8047 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 8048 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 8049 N->getOperand(3), Cmp); 8050 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 8051 SDValue ARMcc; 8052 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 8053 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 8054 N->getOperand(3), NewCmp); 8055 } 8056 8057 if (Res.getNode()) { 8058 APInt KnownZero, KnownOne; 8059 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 8060 DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); 8061 // Capture demanded bits information that would be otherwise lost. 8062 if (KnownZero == 0xfffffffe) 8063 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8064 DAG.getValueType(MVT::i1)); 8065 else if (KnownZero == 0xffffff00) 8066 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8067 DAG.getValueType(MVT::i8)); 8068 else if (KnownZero == 0xffff0000) 8069 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8070 DAG.getValueType(MVT::i16)); 8071 } 8072 8073 return Res; 8074} 8075 8076SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 8077 DAGCombinerInfo &DCI) const { 8078 switch (N->getOpcode()) { 8079 default: break; 8080 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 8081 case ISD::SUB: return PerformSUBCombine(N, DCI); 8082 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 8083 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 8084 case ISD::AND: return PerformANDCombine(N, DCI); 8085 case ARMISD::BFI: return PerformBFICombine(N, DCI); 8086 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 8087 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 8088 case ISD::STORE: return PerformSTORECombine(N, DCI); 8089 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 8090 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 8091 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 8092 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 8093 case ISD::FP_TO_SINT: 8094 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 8095 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 8096 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 8097 case ISD::SHL: 8098 case ISD::SRA: 8099 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 8100 case ISD::SIGN_EXTEND: 8101 case ISD::ZERO_EXTEND: 8102 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 8103 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 8104 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 8105 case ARMISD::VLD2DUP: 8106 case ARMISD::VLD3DUP: 8107 case ARMISD::VLD4DUP: 8108 return CombineBaseUpdate(N, DCI); 8109 case ISD::INTRINSIC_VOID: 8110 case ISD::INTRINSIC_W_CHAIN: 8111 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8112 case Intrinsic::arm_neon_vld1: 8113 case Intrinsic::arm_neon_vld2: 8114 case Intrinsic::arm_neon_vld3: 8115 case Intrinsic::arm_neon_vld4: 8116 case Intrinsic::arm_neon_vld2lane: 8117 case Intrinsic::arm_neon_vld3lane: 8118 case Intrinsic::arm_neon_vld4lane: 8119 case Intrinsic::arm_neon_vst1: 8120 case Intrinsic::arm_neon_vst2: 8121 case Intrinsic::arm_neon_vst3: 8122 case Intrinsic::arm_neon_vst4: 8123 case Intrinsic::arm_neon_vst2lane: 8124 case Intrinsic::arm_neon_vst3lane: 8125 case Intrinsic::arm_neon_vst4lane: 8126 return CombineBaseUpdate(N, DCI); 8127 default: break; 8128 } 8129 break; 8130 } 8131 return SDValue(); 8132} 8133 8134bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 8135 EVT VT) const { 8136 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 8137} 8138 8139bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 8140 if (!Subtarget->allowsUnalignedMem()) 8141 return false; 8142 8143 switch (VT.getSimpleVT().SimpleTy) { 8144 default: 8145 return false; 8146 case MVT::i8: 8147 case MVT::i16: 8148 case MVT::i32: 8149 return true; 8150 // FIXME: VLD1 etc with standard alignment is legal. 8151 } 8152} 8153 8154static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 8155 unsigned AlignCheck) { 8156 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 8157 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 8158} 8159 8160EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 8161 unsigned DstAlign, unsigned SrcAlign, 8162 bool IsZeroVal, 8163 bool MemcpyStrSrc, 8164 MachineFunction &MF) const { 8165 const Function *F = MF.getFunction(); 8166 8167 // See if we can use NEON instructions for this... 8168 if (IsZeroVal && 8169 !F->hasFnAttr(Attribute::NoImplicitFloat) && 8170 Subtarget->hasNEON()) { 8171 if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { 8172 return MVT::v4i32; 8173 } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { 8174 return MVT::v2i32; 8175 } 8176 } 8177 8178 // Lowering to i32/i16 if the size permits. 8179 if (Size >= 4) { 8180 return MVT::i32; 8181 } else if (Size >= 2) { 8182 return MVT::i16; 8183 } 8184 8185 // Let the target-independent logic figure it out. 8186 return MVT::Other; 8187} 8188 8189static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 8190 if (V < 0) 8191 return false; 8192 8193 unsigned Scale = 1; 8194 switch (VT.getSimpleVT().SimpleTy) { 8195 default: return false; 8196 case MVT::i1: 8197 case MVT::i8: 8198 // Scale == 1; 8199 break; 8200 case MVT::i16: 8201 // Scale == 2; 8202 Scale = 2; 8203 break; 8204 case MVT::i32: 8205 // Scale == 4; 8206 Scale = 4; 8207 break; 8208 } 8209 8210 if ((V & (Scale - 1)) != 0) 8211 return false; 8212 V /= Scale; 8213 return V == (V & ((1LL << 5) - 1)); 8214} 8215 8216static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 8217 const ARMSubtarget *Subtarget) { 8218 bool isNeg = false; 8219 if (V < 0) { 8220 isNeg = true; 8221 V = - V; 8222 } 8223 8224 switch (VT.getSimpleVT().SimpleTy) { 8225 default: return false; 8226 case MVT::i1: 8227 case MVT::i8: 8228 case MVT::i16: 8229 case MVT::i32: 8230 // + imm12 or - imm8 8231 if (isNeg) 8232 return V == (V & ((1LL << 8) - 1)); 8233 return V == (V & ((1LL << 12) - 1)); 8234 case MVT::f32: 8235 case MVT::f64: 8236 // Same as ARM mode. FIXME: NEON? 8237 if (!Subtarget->hasVFP2()) 8238 return false; 8239 if ((V & 3) != 0) 8240 return false; 8241 V >>= 2; 8242 return V == (V & ((1LL << 8) - 1)); 8243 } 8244} 8245 8246/// isLegalAddressImmediate - Return true if the integer value can be used 8247/// as the offset of the target addressing mode for load / store of the 8248/// given type. 8249static bool isLegalAddressImmediate(int64_t V, EVT VT, 8250 const ARMSubtarget *Subtarget) { 8251 if (V == 0) 8252 return true; 8253 8254 if (!VT.isSimple()) 8255 return false; 8256 8257 if (Subtarget->isThumb1Only()) 8258 return isLegalT1AddressImmediate(V, VT); 8259 else if (Subtarget->isThumb2()) 8260 return isLegalT2AddressImmediate(V, VT, Subtarget); 8261 8262 // ARM mode. 8263 if (V < 0) 8264 V = - V; 8265 switch (VT.getSimpleVT().SimpleTy) { 8266 default: return false; 8267 case MVT::i1: 8268 case MVT::i8: 8269 case MVT::i32: 8270 // +- imm12 8271 return V == (V & ((1LL << 12) - 1)); 8272 case MVT::i16: 8273 // +- imm8 8274 return V == (V & ((1LL << 8) - 1)); 8275 case MVT::f32: 8276 case MVT::f64: 8277 if (!Subtarget->hasVFP2()) // FIXME: NEON? 8278 return false; 8279 if ((V & 3) != 0) 8280 return false; 8281 V >>= 2; 8282 return V == (V & ((1LL << 8) - 1)); 8283 } 8284} 8285 8286bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 8287 EVT VT) const { 8288 int Scale = AM.Scale; 8289 if (Scale < 0) 8290 return false; 8291 8292 switch (VT.getSimpleVT().SimpleTy) { 8293 default: return false; 8294 case MVT::i1: 8295 case MVT::i8: 8296 case MVT::i16: 8297 case MVT::i32: 8298 if (Scale == 1) 8299 return true; 8300 // r + r << imm 8301 Scale = Scale & ~1; 8302 return Scale == 2 || Scale == 4 || Scale == 8; 8303 case MVT::i64: 8304 // r + r 8305 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 8306 return true; 8307 return false; 8308 case MVT::isVoid: 8309 // Note, we allow "void" uses (basically, uses that aren't loads or 8310 // stores), because arm allows folding a scale into many arithmetic 8311 // operations. This should be made more precise and revisited later. 8312 8313 // Allow r << imm, but the imm has to be a multiple of two. 8314 if (Scale & 1) return false; 8315 return isPowerOf2_32(Scale); 8316 } 8317} 8318 8319/// isLegalAddressingMode - Return true if the addressing mode represented 8320/// by AM is legal for this target, for a load/store of the specified type. 8321bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 8322 Type *Ty) const { 8323 EVT VT = getValueType(Ty, true); 8324 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 8325 return false; 8326 8327 // Can never fold addr of global into load/store. 8328 if (AM.BaseGV) 8329 return false; 8330 8331 switch (AM.Scale) { 8332 case 0: // no scale reg, must be "r+i" or "r", or "i". 8333 break; 8334 case 1: 8335 if (Subtarget->isThumb1Only()) 8336 return false; 8337 // FALL THROUGH. 8338 default: 8339 // ARM doesn't support any R+R*scale+imm addr modes. 8340 if (AM.BaseOffs) 8341 return false; 8342 8343 if (!VT.isSimple()) 8344 return false; 8345 8346 if (Subtarget->isThumb2()) 8347 return isLegalT2ScaledAddressingMode(AM, VT); 8348 8349 int Scale = AM.Scale; 8350 switch (VT.getSimpleVT().SimpleTy) { 8351 default: return false; 8352 case MVT::i1: 8353 case MVT::i8: 8354 case MVT::i32: 8355 if (Scale < 0) Scale = -Scale; 8356 if (Scale == 1) 8357 return true; 8358 // r + r << imm 8359 return isPowerOf2_32(Scale & ~1); 8360 case MVT::i16: 8361 case MVT::i64: 8362 // r + r 8363 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 8364 return true; 8365 return false; 8366 8367 case MVT::isVoid: 8368 // Note, we allow "void" uses (basically, uses that aren't loads or 8369 // stores), because arm allows folding a scale into many arithmetic 8370 // operations. This should be made more precise and revisited later. 8371 8372 // Allow r << imm, but the imm has to be a multiple of two. 8373 if (Scale & 1) return false; 8374 return isPowerOf2_32(Scale); 8375 } 8376 break; 8377 } 8378 return true; 8379} 8380 8381/// isLegalICmpImmediate - Return true if the specified immediate is legal 8382/// icmp immediate, that is the target has icmp instructions which can compare 8383/// a register against the immediate without having to materialize the 8384/// immediate into a register. 8385bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 8386 if (!Subtarget->isThumb()) 8387 return ARM_AM::getSOImmVal(Imm) != -1; 8388 if (Subtarget->isThumb2()) 8389 return ARM_AM::getT2SOImmVal(Imm) != -1; 8390 return Imm >= 0 && Imm <= 255; 8391} 8392 8393/// isLegalAddImmediate - Return true if the specified immediate is legal 8394/// add immediate, that is the target has add instructions which can add 8395/// a register with the immediate without having to materialize the 8396/// immediate into a register. 8397bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 8398 return ARM_AM::getSOImmVal(Imm) != -1; 8399} 8400 8401static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 8402 bool isSEXTLoad, SDValue &Base, 8403 SDValue &Offset, bool &isInc, 8404 SelectionDAG &DAG) { 8405 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 8406 return false; 8407 8408 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 8409 // AddressingMode 3 8410 Base = Ptr->getOperand(0); 8411 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 8412 int RHSC = (int)RHS->getZExtValue(); 8413 if (RHSC < 0 && RHSC > -256) { 8414 assert(Ptr->getOpcode() == ISD::ADD); 8415 isInc = false; 8416 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 8417 return true; 8418 } 8419 } 8420 isInc = (Ptr->getOpcode() == ISD::ADD); 8421 Offset = Ptr->getOperand(1); 8422 return true; 8423 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 8424 // AddressingMode 2 8425 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 8426 int RHSC = (int)RHS->getZExtValue(); 8427 if (RHSC < 0 && RHSC > -0x1000) { 8428 assert(Ptr->getOpcode() == ISD::ADD); 8429 isInc = false; 8430 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 8431 Base = Ptr->getOperand(0); 8432 return true; 8433 } 8434 } 8435 8436 if (Ptr->getOpcode() == ISD::ADD) { 8437 isInc = true; 8438 ARM_AM::ShiftOpc ShOpcVal= 8439 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 8440 if (ShOpcVal != ARM_AM::no_shift) { 8441 Base = Ptr->getOperand(1); 8442 Offset = Ptr->getOperand(0); 8443 } else { 8444 Base = Ptr->getOperand(0); 8445 Offset = Ptr->getOperand(1); 8446 } 8447 return true; 8448 } 8449 8450 isInc = (Ptr->getOpcode() == ISD::ADD); 8451 Base = Ptr->getOperand(0); 8452 Offset = Ptr->getOperand(1); 8453 return true; 8454 } 8455 8456 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 8457 return false; 8458} 8459 8460static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 8461 bool isSEXTLoad, SDValue &Base, 8462 SDValue &Offset, bool &isInc, 8463 SelectionDAG &DAG) { 8464 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 8465 return false; 8466 8467 Base = Ptr->getOperand(0); 8468 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 8469 int RHSC = (int)RHS->getZExtValue(); 8470 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 8471 assert(Ptr->getOpcode() == ISD::ADD); 8472 isInc = false; 8473 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 8474 return true; 8475 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 8476 isInc = Ptr->getOpcode() == ISD::ADD; 8477 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 8478 return true; 8479 } 8480 } 8481 8482 return false; 8483} 8484 8485/// getPreIndexedAddressParts - returns true by value, base pointer and 8486/// offset pointer and addressing mode by reference if the node's address 8487/// can be legally represented as pre-indexed load / store address. 8488bool 8489ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 8490 SDValue &Offset, 8491 ISD::MemIndexedMode &AM, 8492 SelectionDAG &DAG) const { 8493 if (Subtarget->isThumb1Only()) 8494 return false; 8495 8496 EVT VT; 8497 SDValue Ptr; 8498 bool isSEXTLoad = false; 8499 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8500 Ptr = LD->getBasePtr(); 8501 VT = LD->getMemoryVT(); 8502 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 8503 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8504 Ptr = ST->getBasePtr(); 8505 VT = ST->getMemoryVT(); 8506 } else 8507 return false; 8508 8509 bool isInc; 8510 bool isLegal = false; 8511 if (Subtarget->isThumb2()) 8512 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 8513 Offset, isInc, DAG); 8514 else 8515 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 8516 Offset, isInc, DAG); 8517 if (!isLegal) 8518 return false; 8519 8520 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 8521 return true; 8522} 8523 8524/// getPostIndexedAddressParts - returns true by value, base pointer and 8525/// offset pointer and addressing mode by reference if this node can be 8526/// combined with a load / store to form a post-indexed load / store. 8527bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 8528 SDValue &Base, 8529 SDValue &Offset, 8530 ISD::MemIndexedMode &AM, 8531 SelectionDAG &DAG) const { 8532 if (Subtarget->isThumb1Only()) 8533 return false; 8534 8535 EVT VT; 8536 SDValue Ptr; 8537 bool isSEXTLoad = false; 8538 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8539 VT = LD->getMemoryVT(); 8540 Ptr = LD->getBasePtr(); 8541 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 8542 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8543 VT = ST->getMemoryVT(); 8544 Ptr = ST->getBasePtr(); 8545 } else 8546 return false; 8547 8548 bool isInc; 8549 bool isLegal = false; 8550 if (Subtarget->isThumb2()) 8551 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 8552 isInc, DAG); 8553 else 8554 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 8555 isInc, DAG); 8556 if (!isLegal) 8557 return false; 8558 8559 if (Ptr != Base) { 8560 // Swap base ptr and offset to catch more post-index load / store when 8561 // it's legal. In Thumb2 mode, offset must be an immediate. 8562 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 8563 !Subtarget->isThumb2()) 8564 std::swap(Base, Offset); 8565 8566 // Post-indexed load / store update the base pointer. 8567 if (Ptr != Base) 8568 return false; 8569 } 8570 8571 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 8572 return true; 8573} 8574 8575void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 8576 const APInt &Mask, 8577 APInt &KnownZero, 8578 APInt &KnownOne, 8579 const SelectionDAG &DAG, 8580 unsigned Depth) const { 8581 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 8582 switch (Op.getOpcode()) { 8583 default: break; 8584 case ARMISD::CMOV: { 8585 // Bits are known zero/one if known on the LHS and RHS. 8586 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 8587 if (KnownZero == 0 && KnownOne == 0) return; 8588 8589 APInt KnownZeroRHS, KnownOneRHS; 8590 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 8591 KnownZeroRHS, KnownOneRHS, Depth+1); 8592 KnownZero &= KnownZeroRHS; 8593 KnownOne &= KnownOneRHS; 8594 return; 8595 } 8596 } 8597} 8598 8599//===----------------------------------------------------------------------===// 8600// ARM Inline Assembly Support 8601//===----------------------------------------------------------------------===// 8602 8603bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 8604 // Looking for "rev" which is V6+. 8605 if (!Subtarget->hasV6Ops()) 8606 return false; 8607 8608 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 8609 std::string AsmStr = IA->getAsmString(); 8610 SmallVector<StringRef, 4> AsmPieces; 8611 SplitString(AsmStr, AsmPieces, ";\n"); 8612 8613 switch (AsmPieces.size()) { 8614 default: return false; 8615 case 1: 8616 AsmStr = AsmPieces[0]; 8617 AsmPieces.clear(); 8618 SplitString(AsmStr, AsmPieces, " \t,"); 8619 8620 // rev $0, $1 8621 if (AsmPieces.size() == 3 && 8622 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 8623 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 8624 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 8625 if (Ty && Ty->getBitWidth() == 32) 8626 return IntrinsicLowering::LowerToByteSwap(CI); 8627 } 8628 break; 8629 } 8630 8631 return false; 8632} 8633 8634/// getConstraintType - Given a constraint letter, return the type of 8635/// constraint it is for this target. 8636ARMTargetLowering::ConstraintType 8637ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 8638 if (Constraint.size() == 1) { 8639 switch (Constraint[0]) { 8640 default: break; 8641 case 'l': return C_RegisterClass; 8642 case 'w': return C_RegisterClass; 8643 case 'h': return C_RegisterClass; 8644 case 'x': return C_RegisterClass; 8645 case 't': return C_RegisterClass; 8646 case 'j': return C_Other; // Constant for movw. 8647 // An address with a single base register. Due to the way we 8648 // currently handle addresses it is the same as an 'r' memory constraint. 8649 case 'Q': return C_Memory; 8650 } 8651 } else if (Constraint.size() == 2) { 8652 switch (Constraint[0]) { 8653 default: break; 8654 // All 'U+' constraints are addresses. 8655 case 'U': return C_Memory; 8656 } 8657 } 8658 return TargetLowering::getConstraintType(Constraint); 8659} 8660 8661/// Examine constraint type and operand type and determine a weight value. 8662/// This object must already have been set up with the operand type 8663/// and the current alternative constraint selected. 8664TargetLowering::ConstraintWeight 8665ARMTargetLowering::getSingleConstraintMatchWeight( 8666 AsmOperandInfo &info, const char *constraint) const { 8667 ConstraintWeight weight = CW_Invalid; 8668 Value *CallOperandVal = info.CallOperandVal; 8669 // If we don't have a value, we can't do a match, 8670 // but allow it at the lowest weight. 8671 if (CallOperandVal == NULL) 8672 return CW_Default; 8673 Type *type = CallOperandVal->getType(); 8674 // Look at the constraint type. 8675 switch (*constraint) { 8676 default: 8677 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 8678 break; 8679 case 'l': 8680 if (type->isIntegerTy()) { 8681 if (Subtarget->isThumb()) 8682 weight = CW_SpecificReg; 8683 else 8684 weight = CW_Register; 8685 } 8686 break; 8687 case 'w': 8688 if (type->isFloatingPointTy()) 8689 weight = CW_Register; 8690 break; 8691 } 8692 return weight; 8693} 8694 8695typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 8696RCPair 8697ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 8698 EVT VT) const { 8699 if (Constraint.size() == 1) { 8700 // GCC ARM Constraint Letters 8701 switch (Constraint[0]) { 8702 case 'l': // Low regs or general regs. 8703 if (Subtarget->isThumb()) 8704 return RCPair(0U, ARM::tGPRRegisterClass); 8705 else 8706 return RCPair(0U, ARM::GPRRegisterClass); 8707 case 'h': // High regs or no regs. 8708 if (Subtarget->isThumb()) 8709 return RCPair(0U, ARM::hGPRRegisterClass); 8710 break; 8711 case 'r': 8712 return RCPair(0U, ARM::GPRRegisterClass); 8713 case 'w': 8714 if (VT == MVT::f32) 8715 return RCPair(0U, ARM::SPRRegisterClass); 8716 if (VT.getSizeInBits() == 64) 8717 return RCPair(0U, ARM::DPRRegisterClass); 8718 if (VT.getSizeInBits() == 128) 8719 return RCPair(0U, ARM::QPRRegisterClass); 8720 break; 8721 case 'x': 8722 if (VT == MVT::f32) 8723 return RCPair(0U, ARM::SPR_8RegisterClass); 8724 if (VT.getSizeInBits() == 64) 8725 return RCPair(0U, ARM::DPR_8RegisterClass); 8726 if (VT.getSizeInBits() == 128) 8727 return RCPair(0U, ARM::QPR_8RegisterClass); 8728 break; 8729 case 't': 8730 if (VT == MVT::f32) 8731 return RCPair(0U, ARM::SPRRegisterClass); 8732 break; 8733 } 8734 } 8735 if (StringRef("{cc}").equals_lower(Constraint)) 8736 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 8737 8738 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 8739} 8740 8741/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 8742/// vector. If it is invalid, don't add anything to Ops. 8743void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 8744 std::string &Constraint, 8745 std::vector<SDValue>&Ops, 8746 SelectionDAG &DAG) const { 8747 SDValue Result(0, 0); 8748 8749 // Currently only support length 1 constraints. 8750 if (Constraint.length() != 1) return; 8751 8752 char ConstraintLetter = Constraint[0]; 8753 switch (ConstraintLetter) { 8754 default: break; 8755 case 'j': 8756 case 'I': case 'J': case 'K': case 'L': 8757 case 'M': case 'N': case 'O': 8758 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 8759 if (!C) 8760 return; 8761 8762 int64_t CVal64 = C->getSExtValue(); 8763 int CVal = (int) CVal64; 8764 // None of these constraints allow values larger than 32 bits. Check 8765 // that the value fits in an int. 8766 if (CVal != CVal64) 8767 return; 8768 8769 switch (ConstraintLetter) { 8770 case 'j': 8771 // Constant suitable for movw, must be between 0 and 8772 // 65535. 8773 if (Subtarget->hasV6T2Ops()) 8774 if (CVal >= 0 && CVal <= 65535) 8775 break; 8776 return; 8777 case 'I': 8778 if (Subtarget->isThumb1Only()) { 8779 // This must be a constant between 0 and 255, for ADD 8780 // immediates. 8781 if (CVal >= 0 && CVal <= 255) 8782 break; 8783 } else if (Subtarget->isThumb2()) { 8784 // A constant that can be used as an immediate value in a 8785 // data-processing instruction. 8786 if (ARM_AM::getT2SOImmVal(CVal) != -1) 8787 break; 8788 } else { 8789 // A constant that can be used as an immediate value in a 8790 // data-processing instruction. 8791 if (ARM_AM::getSOImmVal(CVal) != -1) 8792 break; 8793 } 8794 return; 8795 8796 case 'J': 8797 if (Subtarget->isThumb()) { // FIXME thumb2 8798 // This must be a constant between -255 and -1, for negated ADD 8799 // immediates. This can be used in GCC with an "n" modifier that 8800 // prints the negated value, for use with SUB instructions. It is 8801 // not useful otherwise but is implemented for compatibility. 8802 if (CVal >= -255 && CVal <= -1) 8803 break; 8804 } else { 8805 // This must be a constant between -4095 and 4095. It is not clear 8806 // what this constraint is intended for. Implemented for 8807 // compatibility with GCC. 8808 if (CVal >= -4095 && CVal <= 4095) 8809 break; 8810 } 8811 return; 8812 8813 case 'K': 8814 if (Subtarget->isThumb1Only()) { 8815 // A 32-bit value where only one byte has a nonzero value. Exclude 8816 // zero to match GCC. This constraint is used by GCC internally for 8817 // constants that can be loaded with a move/shift combination. 8818 // It is not useful otherwise but is implemented for compatibility. 8819 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 8820 break; 8821 } else if (Subtarget->isThumb2()) { 8822 // A constant whose bitwise inverse can be used as an immediate 8823 // value in a data-processing instruction. This can be used in GCC 8824 // with a "B" modifier that prints the inverted value, for use with 8825 // BIC and MVN instructions. It is not useful otherwise but is 8826 // implemented for compatibility. 8827 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 8828 break; 8829 } else { 8830 // A constant whose bitwise inverse can be used as an immediate 8831 // value in a data-processing instruction. This can be used in GCC 8832 // with a "B" modifier that prints the inverted value, for use with 8833 // BIC and MVN instructions. It is not useful otherwise but is 8834 // implemented for compatibility. 8835 if (ARM_AM::getSOImmVal(~CVal) != -1) 8836 break; 8837 } 8838 return; 8839 8840 case 'L': 8841 if (Subtarget->isThumb1Only()) { 8842 // This must be a constant between -7 and 7, 8843 // for 3-operand ADD/SUB immediate instructions. 8844 if (CVal >= -7 && CVal < 7) 8845 break; 8846 } else if (Subtarget->isThumb2()) { 8847 // A constant whose negation can be used as an immediate value in a 8848 // data-processing instruction. This can be used in GCC with an "n" 8849 // modifier that prints the negated value, for use with SUB 8850 // instructions. It is not useful otherwise but is implemented for 8851 // compatibility. 8852 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 8853 break; 8854 } else { 8855 // A constant whose negation can be used as an immediate value in a 8856 // data-processing instruction. This can be used in GCC with an "n" 8857 // modifier that prints the negated value, for use with SUB 8858 // instructions. It is not useful otherwise but is implemented for 8859 // compatibility. 8860 if (ARM_AM::getSOImmVal(-CVal) != -1) 8861 break; 8862 } 8863 return; 8864 8865 case 'M': 8866 if (Subtarget->isThumb()) { // FIXME thumb2 8867 // This must be a multiple of 4 between 0 and 1020, for 8868 // ADD sp + immediate. 8869 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 8870 break; 8871 } else { 8872 // A power of two or a constant between 0 and 32. This is used in 8873 // GCC for the shift amount on shifted register operands, but it is 8874 // useful in general for any shift amounts. 8875 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 8876 break; 8877 } 8878 return; 8879 8880 case 'N': 8881 if (Subtarget->isThumb()) { // FIXME thumb2 8882 // This must be a constant between 0 and 31, for shift amounts. 8883 if (CVal >= 0 && CVal <= 31) 8884 break; 8885 } 8886 return; 8887 8888 case 'O': 8889 if (Subtarget->isThumb()) { // FIXME thumb2 8890 // This must be a multiple of 4 between -508 and 508, for 8891 // ADD/SUB sp = sp + immediate. 8892 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 8893 break; 8894 } 8895 return; 8896 } 8897 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 8898 break; 8899 } 8900 8901 if (Result.getNode()) { 8902 Ops.push_back(Result); 8903 return; 8904 } 8905 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 8906} 8907 8908bool 8909ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 8910 // The ARM target isn't yet aware of offsets. 8911 return false; 8912} 8913 8914bool ARM::isBitFieldInvertedMask(unsigned v) { 8915 if (v == 0xffffffff) 8916 return 0; 8917 // there can be 1's on either or both "outsides", all the "inside" 8918 // bits must be 0's 8919 unsigned int lsb = 0, msb = 31; 8920 while (v & (1 << msb)) --msb; 8921 while (v & (1 << lsb)) ++lsb; 8922 for (unsigned int i = lsb; i <= msb; ++i) { 8923 if (v & (1 << i)) 8924 return 0; 8925 } 8926 return 1; 8927} 8928 8929/// isFPImmLegal - Returns true if the target can instruction select the 8930/// specified FP immediate natively. If false, the legalizer will 8931/// materialize the FP immediate as a load from a constant pool. 8932bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 8933 if (!Subtarget->hasVFP3()) 8934 return false; 8935 if (VT == MVT::f32) 8936 return ARM_AM::getFP32Imm(Imm) != -1; 8937 if (VT == MVT::f64) 8938 return ARM_AM::getFP64Imm(Imm) != -1; 8939 return false; 8940} 8941 8942/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 8943/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 8944/// specified in the intrinsic calls. 8945bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 8946 const CallInst &I, 8947 unsigned Intrinsic) const { 8948 switch (Intrinsic) { 8949 case Intrinsic::arm_neon_vld1: 8950 case Intrinsic::arm_neon_vld2: 8951 case Intrinsic::arm_neon_vld3: 8952 case Intrinsic::arm_neon_vld4: 8953 case Intrinsic::arm_neon_vld2lane: 8954 case Intrinsic::arm_neon_vld3lane: 8955 case Intrinsic::arm_neon_vld4lane: { 8956 Info.opc = ISD::INTRINSIC_W_CHAIN; 8957 // Conservatively set memVT to the entire set of vectors loaded. 8958 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 8959 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 8960 Info.ptrVal = I.getArgOperand(0); 8961 Info.offset = 0; 8962 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 8963 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 8964 Info.vol = false; // volatile loads with NEON intrinsics not supported 8965 Info.readMem = true; 8966 Info.writeMem = false; 8967 return true; 8968 } 8969 case Intrinsic::arm_neon_vst1: 8970 case Intrinsic::arm_neon_vst2: 8971 case Intrinsic::arm_neon_vst3: 8972 case Intrinsic::arm_neon_vst4: 8973 case Intrinsic::arm_neon_vst2lane: 8974 case Intrinsic::arm_neon_vst3lane: 8975 case Intrinsic::arm_neon_vst4lane: { 8976 Info.opc = ISD::INTRINSIC_VOID; 8977 // Conservatively set memVT to the entire set of vectors stored. 8978 unsigned NumElts = 0; 8979 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 8980 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 8981 if (!ArgTy->isVectorTy()) 8982 break; 8983 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 8984 } 8985 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 8986 Info.ptrVal = I.getArgOperand(0); 8987 Info.offset = 0; 8988 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 8989 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 8990 Info.vol = false; // volatile stores with NEON intrinsics not supported 8991 Info.readMem = false; 8992 Info.writeMem = true; 8993 return true; 8994 } 8995 case Intrinsic::arm_strexd: { 8996 Info.opc = ISD::INTRINSIC_W_CHAIN; 8997 Info.memVT = MVT::i64; 8998 Info.ptrVal = I.getArgOperand(2); 8999 Info.offset = 0; 9000 Info.align = 8; 9001 Info.vol = true; 9002 Info.readMem = false; 9003 Info.writeMem = true; 9004 return true; 9005 } 9006 case Intrinsic::arm_ldrexd: { 9007 Info.opc = ISD::INTRINSIC_W_CHAIN; 9008 Info.memVT = MVT::i64; 9009 Info.ptrVal = I.getArgOperand(0); 9010 Info.offset = 0; 9011 Info.align = 8; 9012 Info.vol = true; 9013 Info.readMem = true; 9014 Info.writeMem = false; 9015 return true; 9016 } 9017 default: 9018 break; 9019 } 9020 9021 return false; 9022} 9023