ARMISelLowering.cpp revision 05e80f27148b1dc19925755d56b6466df840da44
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARMISelLowering.h" 17#include "ARM.h" 18#include "ARMCallingConv.h" 19#include "ARMConstantPoolValue.h" 20#include "ARMMachineFunctionInfo.h" 21#include "ARMPerfectShuffle.h" 22#include "ARMSubtarget.h" 23#include "ARMTargetMachine.h" 24#include "ARMTargetObjectFile.h" 25#include "MCTargetDesc/ARMAddressingModes.h" 26#include "llvm/CallingConv.h" 27#include "llvm/Constants.h" 28#include "llvm/Function.h" 29#include "llvm/GlobalValue.h" 30#include "llvm/Instruction.h" 31#include "llvm/Instructions.h" 32#include "llvm/Intrinsics.h" 33#include "llvm/Type.h" 34#include "llvm/CodeGen/CallingConvLower.h" 35#include "llvm/CodeGen/IntrinsicLowering.h" 36#include "llvm/CodeGen/MachineBasicBlock.h" 37#include "llvm/CodeGen/MachineFrameInfo.h" 38#include "llvm/CodeGen/MachineFunction.h" 39#include "llvm/CodeGen/MachineInstrBuilder.h" 40#include "llvm/CodeGen/MachineModuleInfo.h" 41#include "llvm/CodeGen/MachineRegisterInfo.h" 42#include "llvm/CodeGen/SelectionDAG.h" 43#include "llvm/MC/MCSectionMachO.h" 44#include "llvm/Target/TargetOptions.h" 45#include "llvm/ADT/StringExtras.h" 46#include "llvm/ADT/Statistic.h" 47#include "llvm/Support/CommandLine.h" 48#include "llvm/Support/ErrorHandling.h" 49#include "llvm/Support/MathExtras.h" 50#include "llvm/Support/raw_ostream.h" 51using namespace llvm; 52 53STATISTIC(NumTailCalls, "Number of tail calls"); 54STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 55STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 56 57// This option should go away when tail calls fully work. 58static cl::opt<bool> 59EnableARMTailCalls("arm-tail-calls", cl::Hidden, 60 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 61 cl::init(false)); 62 63cl::opt<bool> 64EnableARMLongCalls("arm-long-calls", cl::Hidden, 65 cl::desc("Generate calls via indirect call instructions"), 66 cl::init(false)); 67 68static cl::opt<bool> 69ARMInterworking("arm-interworking", cl::Hidden, 70 cl::desc("Enable / disable ARM interworking (for debugging only)"), 71 cl::init(true)); 72 73namespace { 74 class ARMCCState : public CCState { 75 public: 76 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 77 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 78 LLVMContext &C, ParmContext PC) 79 : CCState(CC, isVarArg, MF, TM, locs, C) { 80 assert(((PC == Call) || (PC == Prologue)) && 81 "ARMCCState users must specify whether their context is call" 82 "or prologue generation."); 83 CallOrPrologue = PC; 84 } 85 }; 86} 87 88// The APCS parameter registers. 89static const uint16_t GPRArgRegs[] = { 90 ARM::R0, ARM::R1, ARM::R2, ARM::R3 91}; 92 93void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 94 MVT PromotedBitwiseVT) { 95 if (VT != PromotedLdStVT) { 96 setOperationAction(ISD::LOAD, VT, Promote); 97 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 98 99 setOperationAction(ISD::STORE, VT, Promote); 100 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 101 } 102 103 MVT ElemTy = VT.getVectorElementType(); 104 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 105 setOperationAction(ISD::SETCC, VT, Custom); 106 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 107 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 108 if (ElemTy == MVT::i32) { 109 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 110 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 111 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 112 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 113 } else { 114 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 115 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 116 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 117 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 118 } 119 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 120 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 121 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 122 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 123 setOperationAction(ISD::SELECT, VT, Expand); 124 setOperationAction(ISD::SELECT_CC, VT, Expand); 125 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 126 if (VT.isInteger()) { 127 setOperationAction(ISD::SHL, VT, Custom); 128 setOperationAction(ISD::SRA, VT, Custom); 129 setOperationAction(ISD::SRL, VT, Custom); 130 } 131 132 // Promote all bit-wise operations. 133 if (VT.isInteger() && VT != PromotedBitwiseVT) { 134 setOperationAction(ISD::AND, VT, Promote); 135 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 136 setOperationAction(ISD::OR, VT, Promote); 137 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 138 setOperationAction(ISD::XOR, VT, Promote); 139 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 140 } 141 142 // Neon does not support vector divide/remainder operations. 143 setOperationAction(ISD::SDIV, VT, Expand); 144 setOperationAction(ISD::UDIV, VT, Expand); 145 setOperationAction(ISD::FDIV, VT, Expand); 146 setOperationAction(ISD::SREM, VT, Expand); 147 setOperationAction(ISD::UREM, VT, Expand); 148 setOperationAction(ISD::FREM, VT, Expand); 149} 150 151void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 152 addRegisterClass(VT, &ARM::DPRRegClass); 153 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 154} 155 156void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 157 addRegisterClass(VT, &ARM::QPRRegClass); 158 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 159} 160 161static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 162 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 163 return new TargetLoweringObjectFileMachO(); 164 165 return new ARMElfTargetObjectFile(); 166} 167 168ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 169 : TargetLowering(TM, createTLOF(TM)) { 170 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 171 RegInfo = TM.getRegisterInfo(); 172 Itins = TM.getInstrItineraryData(); 173 174 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 175 176 if (Subtarget->isTargetDarwin()) { 177 // Uses VFP for Thumb libfuncs if available. 178 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 179 // Single-precision floating-point arithmetic. 180 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 181 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 182 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 183 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 184 185 // Double-precision floating-point arithmetic. 186 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 187 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 188 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 189 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 190 191 // Single-precision comparisons. 192 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 193 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 194 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 195 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 196 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 197 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 198 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 199 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 200 201 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 202 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 203 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 204 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 205 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 206 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 209 210 // Double-precision comparisons. 211 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 212 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 213 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 214 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 215 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 216 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 217 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 218 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 219 220 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 221 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 222 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 223 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 224 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 225 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 226 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 228 229 // Floating-point to integer conversions. 230 // i64 conversions are done via library routines even when generating VFP 231 // instructions, so use the same ones. 232 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 233 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 234 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 235 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 236 237 // Conversions between floating types. 238 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 239 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 240 241 // Integer to floating-point conversions. 242 // i64 conversions are done via library routines even when generating VFP 243 // instructions, so use the same ones. 244 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 245 // e.g., __floatunsidf vs. __floatunssidfvfp. 246 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 247 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 248 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 249 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 250 } 251 } 252 253 // These libcalls are not available in 32-bit. 254 setLibcallName(RTLIB::SHL_I128, 0); 255 setLibcallName(RTLIB::SRL_I128, 0); 256 setLibcallName(RTLIB::SRA_I128, 0); 257 258 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 259 // Double-precision floating-point arithmetic helper functions 260 // RTABI chapter 4.1.2, Table 2 261 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 262 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 263 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 264 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 265 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 266 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 267 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 268 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 269 270 // Double-precision floating-point comparison helper functions 271 // RTABI chapter 4.1.2, Table 3 272 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 273 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 274 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 275 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 276 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 277 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 278 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 279 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 280 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 281 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 282 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 283 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 284 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 285 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 286 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 287 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 288 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 289 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 290 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 291 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 292 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 293 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 296 297 // Single-precision floating-point arithmetic helper functions 298 // RTABI chapter 4.1.2, Table 4 299 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 300 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 301 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 302 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 303 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 304 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 305 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 306 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 307 308 // Single-precision floating-point comparison helper functions 309 // RTABI chapter 4.1.2, Table 5 310 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 311 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 312 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 313 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 314 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 315 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 316 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 317 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 318 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 319 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 320 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 321 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 322 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 323 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 324 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 325 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 326 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 327 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 328 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 329 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 330 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 331 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 334 335 // Floating-point to integer conversions. 336 // RTABI chapter 4.1.2, Table 6 337 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 338 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 339 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 340 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 341 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 342 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 343 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 344 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 345 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 346 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 347 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 348 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 349 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 350 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 351 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 353 354 // Conversions between floating types. 355 // RTABI chapter 4.1.2, Table 7 356 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 357 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 358 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 359 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 360 361 // Integer to floating-point conversions. 362 // RTABI chapter 4.1.2, Table 8 363 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 364 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 365 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 366 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 367 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 368 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 369 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 370 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 371 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 372 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 373 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 374 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 379 380 // Long long helper functions 381 // RTABI chapter 4.2, Table 9 382 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 383 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 384 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 385 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 386 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 387 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 388 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 389 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 392 393 // Integer division functions 394 // RTABI chapter 4.3.1 395 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 396 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 397 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 398 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 399 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 400 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 401 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 402 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 403 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 404 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 405 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 406 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 407 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 408 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 409 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 411 412 // Memory operations 413 // RTABI chapter 4.3.4 414 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 415 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 416 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 417 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 418 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 419 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 420 } 421 422 // Use divmod compiler-rt calls for iOS 5.0 and later. 423 if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 424 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 425 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 426 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 427 } 428 429 if (Subtarget->isThumb1Only()) 430 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 431 else 432 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 433 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 434 !Subtarget->isThumb1Only()) { 435 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 436 if (!Subtarget->isFPOnlySP()) 437 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 438 439 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 440 } 441 442 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 443 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 444 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 445 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 446 setTruncStoreAction((MVT::SimpleValueType)VT, 447 (MVT::SimpleValueType)InnerVT, Expand); 448 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 449 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 450 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 451 } 452 453 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 454 455 if (Subtarget->hasNEON()) { 456 addDRTypeForNEON(MVT::v2f32); 457 addDRTypeForNEON(MVT::v8i8); 458 addDRTypeForNEON(MVT::v4i16); 459 addDRTypeForNEON(MVT::v2i32); 460 addDRTypeForNEON(MVT::v1i64); 461 462 addQRTypeForNEON(MVT::v4f32); 463 addQRTypeForNEON(MVT::v2f64); 464 addQRTypeForNEON(MVT::v16i8); 465 addQRTypeForNEON(MVT::v8i16); 466 addQRTypeForNEON(MVT::v4i32); 467 addQRTypeForNEON(MVT::v2i64); 468 469 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 470 // neither Neon nor VFP support any arithmetic operations on it. 471 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 472 // supported for v4f32. 473 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 474 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 475 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 476 // FIXME: Code duplication: FDIV and FREM are expanded always, see 477 // ARMTargetLowering::addTypeForNEON method for details. 478 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 479 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 480 // FIXME: Create unittest. 481 // In another words, find a way when "copysign" appears in DAG with vector 482 // operands. 483 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 484 // FIXME: Code duplication: SETCC has custom operation action, see 485 // ARMTargetLowering::addTypeForNEON method for details. 486 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 487 // FIXME: Create unittest for FNEG and for FABS. 488 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 489 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 490 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 491 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 492 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 493 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 494 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 495 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 496 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 497 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 498 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 499 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 500 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 501 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 502 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 503 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 504 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 505 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 506 507 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 508 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 509 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 510 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 511 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 512 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 513 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 514 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 515 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 516 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 517 518 // Neon does not support some operations on v1i64 and v2i64 types. 519 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 520 // Custom handling for some quad-vector types to detect VMULL. 521 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 522 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 523 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 524 // Custom handling for some vector types to avoid expensive expansions 525 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 526 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 527 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 528 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 529 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 530 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 531 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 532 // a destination type that is wider than the source, and nor does 533 // it have a FP_TO_[SU]INT instruction with a narrower destination than 534 // source. 535 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 536 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 537 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 538 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 539 540 setTargetDAGCombine(ISD::INTRINSIC_VOID); 541 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 542 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 543 setTargetDAGCombine(ISD::SHL); 544 setTargetDAGCombine(ISD::SRL); 545 setTargetDAGCombine(ISD::SRA); 546 setTargetDAGCombine(ISD::SIGN_EXTEND); 547 setTargetDAGCombine(ISD::ZERO_EXTEND); 548 setTargetDAGCombine(ISD::ANY_EXTEND); 549 setTargetDAGCombine(ISD::SELECT_CC); 550 setTargetDAGCombine(ISD::BUILD_VECTOR); 551 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 552 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 553 setTargetDAGCombine(ISD::STORE); 554 setTargetDAGCombine(ISD::FP_TO_SINT); 555 setTargetDAGCombine(ISD::FP_TO_UINT); 556 setTargetDAGCombine(ISD::FDIV); 557 558 // It is legal to extload from v4i8 to v4i16 or v4i32. 559 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 560 MVT::v4i16, MVT::v2i16, 561 MVT::v2i32}; 562 for (unsigned i = 0; i < 6; ++i) { 563 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 564 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 565 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 566 } 567 } 568 569 computeRegisterProperties(); 570 571 // ARM does not have f32 extending load. 572 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 573 574 // ARM does not have i1 sign extending load. 575 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 576 577 // ARM supports all 4 flavors of integer indexed load / store. 578 if (!Subtarget->isThumb1Only()) { 579 for (unsigned im = (unsigned)ISD::PRE_INC; 580 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 581 setIndexedLoadAction(im, MVT::i1, Legal); 582 setIndexedLoadAction(im, MVT::i8, Legal); 583 setIndexedLoadAction(im, MVT::i16, Legal); 584 setIndexedLoadAction(im, MVT::i32, Legal); 585 setIndexedStoreAction(im, MVT::i1, Legal); 586 setIndexedStoreAction(im, MVT::i8, Legal); 587 setIndexedStoreAction(im, MVT::i16, Legal); 588 setIndexedStoreAction(im, MVT::i32, Legal); 589 } 590 } 591 592 // i64 operation support. 593 setOperationAction(ISD::MUL, MVT::i64, Expand); 594 setOperationAction(ISD::MULHU, MVT::i32, Expand); 595 if (Subtarget->isThumb1Only()) { 596 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 597 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 598 } 599 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 600 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 601 setOperationAction(ISD::MULHS, MVT::i32, Expand); 602 603 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 604 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 605 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 606 setOperationAction(ISD::SRL, MVT::i64, Custom); 607 setOperationAction(ISD::SRA, MVT::i64, Custom); 608 609 if (!Subtarget->isThumb1Only()) { 610 // FIXME: We should do this for Thumb1 as well. 611 setOperationAction(ISD::ADDC, MVT::i32, Custom); 612 setOperationAction(ISD::ADDE, MVT::i32, Custom); 613 setOperationAction(ISD::SUBC, MVT::i32, Custom); 614 setOperationAction(ISD::SUBE, MVT::i32, Custom); 615 } 616 617 // ARM does not have ROTL. 618 setOperationAction(ISD::ROTL, MVT::i32, Expand); 619 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 620 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 621 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 622 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 623 624 // These just redirect to CTTZ and CTLZ on ARM. 625 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 626 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 627 628 // Only ARMv6 has BSWAP. 629 if (!Subtarget->hasV6Ops()) 630 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 631 632 // These are expanded into libcalls. 633 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 634 // v7M has a hardware divider 635 setOperationAction(ISD::SDIV, MVT::i32, Expand); 636 setOperationAction(ISD::UDIV, MVT::i32, Expand); 637 } 638 setOperationAction(ISD::SREM, MVT::i32, Expand); 639 setOperationAction(ISD::UREM, MVT::i32, Expand); 640 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 641 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 642 643 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 644 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 645 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 646 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 647 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 648 649 setOperationAction(ISD::TRAP, MVT::Other, Legal); 650 651 // Use the default implementation. 652 setOperationAction(ISD::VASTART, MVT::Other, Custom); 653 setOperationAction(ISD::VAARG, MVT::Other, Expand); 654 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 655 setOperationAction(ISD::VAEND, MVT::Other, Expand); 656 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 657 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 658 659 if (!Subtarget->isTargetDarwin()) { 660 // Non-Darwin platforms may return values in these registers via the 661 // personality function. 662 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 663 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 664 setExceptionPointerRegister(ARM::R0); 665 setExceptionSelectorRegister(ARM::R1); 666 } 667 668 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 669 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 670 // the default expansion. 671 // FIXME: This should be checking for v6k, not just v6. 672 if (Subtarget->hasDataBarrier() || 673 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 674 // membarrier needs custom lowering; the rest are legal and handled 675 // normally. 676 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 677 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 678 // Custom lowering for 64-bit ops 679 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 680 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 681 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 682 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 683 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 684 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 685 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 686 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 687 setInsertFencesForAtomic(true); 688 } else { 689 // Set them all for expansion, which will force libcalls. 690 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 691 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 692 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 693 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 694 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 695 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 696 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 697 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 698 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 699 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 700 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 701 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 702 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 703 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 704 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 705 // Unordered/Monotonic case. 706 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 707 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 708 // Since the libcalls include locking, fold in the fences 709 setShouldFoldAtomicFences(true); 710 } 711 712 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 713 714 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 715 if (!Subtarget->hasV6Ops()) { 716 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 717 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 718 } 719 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 720 721 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 722 !Subtarget->isThumb1Only()) { 723 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 724 // iff target supports vfp2. 725 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 726 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 727 } 728 729 // We want to custom lower some of our intrinsics. 730 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 731 if (Subtarget->isTargetDarwin()) { 732 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 733 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 734 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 735 } 736 737 setOperationAction(ISD::SETCC, MVT::i32, Expand); 738 setOperationAction(ISD::SETCC, MVT::f32, Expand); 739 setOperationAction(ISD::SETCC, MVT::f64, Expand); 740 setOperationAction(ISD::SELECT, MVT::i32, Custom); 741 setOperationAction(ISD::SELECT, MVT::f32, Custom); 742 setOperationAction(ISD::SELECT, MVT::f64, Custom); 743 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 744 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 745 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 746 747 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 748 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 749 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 750 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 751 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 752 753 // We don't support sin/cos/fmod/copysign/pow 754 setOperationAction(ISD::FSIN, MVT::f64, Expand); 755 setOperationAction(ISD::FSIN, MVT::f32, Expand); 756 setOperationAction(ISD::FCOS, MVT::f32, Expand); 757 setOperationAction(ISD::FCOS, MVT::f64, Expand); 758 setOperationAction(ISD::FREM, MVT::f64, Expand); 759 setOperationAction(ISD::FREM, MVT::f32, Expand); 760 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 761 !Subtarget->isThumb1Only()) { 762 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 763 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 764 } 765 setOperationAction(ISD::FPOW, MVT::f64, Expand); 766 setOperationAction(ISD::FPOW, MVT::f32, Expand); 767 768 if (!Subtarget->hasVFP4()) { 769 setOperationAction(ISD::FMA, MVT::f64, Expand); 770 setOperationAction(ISD::FMA, MVT::f32, Expand); 771 } 772 773 // Various VFP goodness 774 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 775 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 776 if (Subtarget->hasVFP2()) { 777 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 778 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 779 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 780 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 781 } 782 // Special handling for half-precision FP. 783 if (!Subtarget->hasFP16()) { 784 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 785 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 786 } 787 } 788 789 // We have target-specific dag combine patterns for the following nodes: 790 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 791 setTargetDAGCombine(ISD::ADD); 792 setTargetDAGCombine(ISD::SUB); 793 setTargetDAGCombine(ISD::MUL); 794 795 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { 796 setTargetDAGCombine(ISD::AND); 797 setTargetDAGCombine(ISD::OR); 798 setTargetDAGCombine(ISD::XOR); 799 } 800 801 if (Subtarget->hasV6Ops()) 802 setTargetDAGCombine(ISD::SRL); 803 804 setStackPointerRegisterToSaveRestore(ARM::SP); 805 806 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 807 !Subtarget->hasVFP2()) 808 setSchedulingPreference(Sched::RegPressure); 809 else 810 setSchedulingPreference(Sched::Hybrid); 811 812 //// temporary - rewrite interface to use type 813 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 814 maxStoresPerMemset = 16; 815 maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 816 817 // On ARM arguments smaller than 4 bytes are extended, so all arguments 818 // are at least 4 bytes aligned. 819 setMinStackArgumentAlignment(4); 820 821 benefitFromCodePlacementOpt = true; 822 823 // Prefer likely predicted branches to selects on out-of-order cores. 824 predictableSelectIsExpensive = Subtarget->isCortexA9(); 825 826 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 827} 828 829// FIXME: It might make sense to define the representative register class as the 830// nearest super-register that has a non-null superset. For example, DPR_VFP2 is 831// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 832// SPR's representative would be DPR_VFP2. This should work well if register 833// pressure tracking were modified such that a register use would increment the 834// pressure of the register class's representative and all of it's super 835// classes' representatives transitively. We have not implemented this because 836// of the difficulty prior to coalescing of modeling operand register classes 837// due to the common occurrence of cross class copies and subregister insertions 838// and extractions. 839std::pair<const TargetRegisterClass*, uint8_t> 840ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 841 const TargetRegisterClass *RRC = 0; 842 uint8_t Cost = 1; 843 switch (VT.getSimpleVT().SimpleTy) { 844 default: 845 return TargetLowering::findRepresentativeClass(VT); 846 // Use DPR as representative register class for all floating point 847 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 848 // the cost is 1 for both f32 and f64. 849 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 850 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 851 RRC = &ARM::DPRRegClass; 852 // When NEON is used for SP, only half of the register file is available 853 // because operations that define both SP and DP results will be constrained 854 // to the VFP2 class (D0-D15). We currently model this constraint prior to 855 // coalescing by double-counting the SP regs. See the FIXME above. 856 if (Subtarget->useNEONForSinglePrecisionFP()) 857 Cost = 2; 858 break; 859 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 860 case MVT::v4f32: case MVT::v2f64: 861 RRC = &ARM::DPRRegClass; 862 Cost = 2; 863 break; 864 case MVT::v4i64: 865 RRC = &ARM::DPRRegClass; 866 Cost = 4; 867 break; 868 case MVT::v8i64: 869 RRC = &ARM::DPRRegClass; 870 Cost = 8; 871 break; 872 } 873 return std::make_pair(RRC, Cost); 874} 875 876const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 877 switch (Opcode) { 878 default: return 0; 879 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 880 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 881 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 882 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 883 case ARMISD::CALL: return "ARMISD::CALL"; 884 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 885 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 886 case ARMISD::tCALL: return "ARMISD::tCALL"; 887 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 888 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 889 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 890 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 891 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 892 case ARMISD::CMP: return "ARMISD::CMP"; 893 case ARMISD::CMN: return "ARMISD::CMN"; 894 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 895 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 896 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 897 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 898 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 899 900 case ARMISD::CMOV: return "ARMISD::CMOV"; 901 902 case ARMISD::RBIT: return "ARMISD::RBIT"; 903 904 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 905 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 906 case ARMISD::SITOF: return "ARMISD::SITOF"; 907 case ARMISD::UITOF: return "ARMISD::UITOF"; 908 909 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 910 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 911 case ARMISD::RRX: return "ARMISD::RRX"; 912 913 case ARMISD::ADDC: return "ARMISD::ADDC"; 914 case ARMISD::ADDE: return "ARMISD::ADDE"; 915 case ARMISD::SUBC: return "ARMISD::SUBC"; 916 case ARMISD::SUBE: return "ARMISD::SUBE"; 917 918 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 919 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 920 921 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 922 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 923 924 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 925 926 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 927 928 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 929 930 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 931 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 932 933 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 934 935 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 936 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 937 case ARMISD::VCGE: return "ARMISD::VCGE"; 938 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 939 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 940 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 941 case ARMISD::VCGT: return "ARMISD::VCGT"; 942 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 943 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 944 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 945 case ARMISD::VTST: return "ARMISD::VTST"; 946 947 case ARMISD::VSHL: return "ARMISD::VSHL"; 948 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 949 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 950 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 951 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 952 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 953 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 954 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 955 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 956 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 957 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 958 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 959 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 960 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 961 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 962 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 963 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 964 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 965 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 966 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 967 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 968 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 969 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 970 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 971 case ARMISD::VDUP: return "ARMISD::VDUP"; 972 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 973 case ARMISD::VEXT: return "ARMISD::VEXT"; 974 case ARMISD::VREV64: return "ARMISD::VREV64"; 975 case ARMISD::VREV32: return "ARMISD::VREV32"; 976 case ARMISD::VREV16: return "ARMISD::VREV16"; 977 case ARMISD::VZIP: return "ARMISD::VZIP"; 978 case ARMISD::VUZP: return "ARMISD::VUZP"; 979 case ARMISD::VTRN: return "ARMISD::VTRN"; 980 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 981 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 982 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 983 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 984 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 985 case ARMISD::FMAX: return "ARMISD::FMAX"; 986 case ARMISD::FMIN: return "ARMISD::FMIN"; 987 case ARMISD::BFI: return "ARMISD::BFI"; 988 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 989 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 990 case ARMISD::VBSL: return "ARMISD::VBSL"; 991 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 992 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 993 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 994 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 995 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 996 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 997 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 998 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 999 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1000 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1001 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1002 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1003 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1004 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1005 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1006 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1007 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1008 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1009 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1010 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1011 } 1012} 1013 1014EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { 1015 if (!VT.isVector()) return getPointerTy(); 1016 return VT.changeVectorElementTypeToInteger(); 1017} 1018 1019/// getRegClassFor - Return the register class that should be used for the 1020/// specified value type. 1021const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 1022 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1023 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1024 // load / store 4 to 8 consecutive D registers. 1025 if (Subtarget->hasNEON()) { 1026 if (VT == MVT::v4i64) 1027 return &ARM::QQPRRegClass; 1028 if (VT == MVT::v8i64) 1029 return &ARM::QQQQPRRegClass; 1030 } 1031 return TargetLowering::getRegClassFor(VT); 1032} 1033 1034// Create a fast isel object. 1035FastISel * 1036ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1037 const TargetLibraryInfo *libInfo) const { 1038 return ARM::createFastISel(funcInfo, libInfo); 1039} 1040 1041/// getMaximalGlobalOffset - Returns the maximal possible offset which can 1042/// be used for loads / stores from the global. 1043unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1044 return (Subtarget->isThumb1Only() ? 127 : 4095); 1045} 1046 1047Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1048 unsigned NumVals = N->getNumValues(); 1049 if (!NumVals) 1050 return Sched::RegPressure; 1051 1052 for (unsigned i = 0; i != NumVals; ++i) { 1053 EVT VT = N->getValueType(i); 1054 if (VT == MVT::Glue || VT == MVT::Other) 1055 continue; 1056 if (VT.isFloatingPoint() || VT.isVector()) 1057 return Sched::ILP; 1058 } 1059 1060 if (!N->isMachineOpcode()) 1061 return Sched::RegPressure; 1062 1063 // Load are scheduled for latency even if there instruction itinerary 1064 // is not available. 1065 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1066 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1067 1068 if (MCID.getNumDefs() == 0) 1069 return Sched::RegPressure; 1070 if (!Itins->isEmpty() && 1071 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1072 return Sched::ILP; 1073 1074 return Sched::RegPressure; 1075} 1076 1077//===----------------------------------------------------------------------===// 1078// Lowering Code 1079//===----------------------------------------------------------------------===// 1080 1081/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1082static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1083 switch (CC) { 1084 default: llvm_unreachable("Unknown condition code!"); 1085 case ISD::SETNE: return ARMCC::NE; 1086 case ISD::SETEQ: return ARMCC::EQ; 1087 case ISD::SETGT: return ARMCC::GT; 1088 case ISD::SETGE: return ARMCC::GE; 1089 case ISD::SETLT: return ARMCC::LT; 1090 case ISD::SETLE: return ARMCC::LE; 1091 case ISD::SETUGT: return ARMCC::HI; 1092 case ISD::SETUGE: return ARMCC::HS; 1093 case ISD::SETULT: return ARMCC::LO; 1094 case ISD::SETULE: return ARMCC::LS; 1095 } 1096} 1097 1098/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1099static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1100 ARMCC::CondCodes &CondCode2) { 1101 CondCode2 = ARMCC::AL; 1102 switch (CC) { 1103 default: llvm_unreachable("Unknown FP condition!"); 1104 case ISD::SETEQ: 1105 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1106 case ISD::SETGT: 1107 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1108 case ISD::SETGE: 1109 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1110 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1111 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1112 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1113 case ISD::SETO: CondCode = ARMCC::VC; break; 1114 case ISD::SETUO: CondCode = ARMCC::VS; break; 1115 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1116 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1117 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1118 case ISD::SETLT: 1119 case ISD::SETULT: CondCode = ARMCC::LT; break; 1120 case ISD::SETLE: 1121 case ISD::SETULE: CondCode = ARMCC::LE; break; 1122 case ISD::SETNE: 1123 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1124 } 1125} 1126 1127//===----------------------------------------------------------------------===// 1128// Calling Convention Implementation 1129//===----------------------------------------------------------------------===// 1130 1131#include "ARMGenCallingConv.inc" 1132 1133/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1134/// given CallingConvention value. 1135CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1136 bool Return, 1137 bool isVarArg) const { 1138 switch (CC) { 1139 default: 1140 llvm_unreachable("Unsupported calling convention"); 1141 case CallingConv::Fast: 1142 if (Subtarget->hasVFP2() && !isVarArg) { 1143 if (!Subtarget->isAAPCS_ABI()) 1144 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1145 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1146 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1147 } 1148 // Fallthrough 1149 case CallingConv::C: { 1150 // Use target triple & subtarget features to do actual dispatch. 1151 if (!Subtarget->isAAPCS_ABI()) 1152 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1153 else if (Subtarget->hasVFP2() && 1154 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1155 !isVarArg) 1156 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1157 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1158 } 1159 case CallingConv::ARM_AAPCS_VFP: 1160 if (!isVarArg) 1161 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1162 // Fallthrough 1163 case CallingConv::ARM_AAPCS: 1164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1165 case CallingConv::ARM_APCS: 1166 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1167 case CallingConv::GHC: 1168 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1169 } 1170} 1171 1172/// LowerCallResult - Lower the result values of a call into the 1173/// appropriate copies out of appropriate physical registers. 1174SDValue 1175ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1176 CallingConv::ID CallConv, bool isVarArg, 1177 const SmallVectorImpl<ISD::InputArg> &Ins, 1178 DebugLoc dl, SelectionDAG &DAG, 1179 SmallVectorImpl<SDValue> &InVals) const { 1180 1181 // Assign locations to each value returned by this call. 1182 SmallVector<CCValAssign, 16> RVLocs; 1183 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1184 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1185 CCInfo.AnalyzeCallResult(Ins, 1186 CCAssignFnForNode(CallConv, /* Return*/ true, 1187 isVarArg)); 1188 1189 // Copy all of the result registers out of their specified physreg. 1190 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1191 CCValAssign VA = RVLocs[i]; 1192 1193 SDValue Val; 1194 if (VA.needsCustom()) { 1195 // Handle f64 or half of a v2f64. 1196 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1197 InFlag); 1198 Chain = Lo.getValue(1); 1199 InFlag = Lo.getValue(2); 1200 VA = RVLocs[++i]; // skip ahead to next loc 1201 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1202 InFlag); 1203 Chain = Hi.getValue(1); 1204 InFlag = Hi.getValue(2); 1205 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1206 1207 if (VA.getLocVT() == MVT::v2f64) { 1208 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1209 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1210 DAG.getConstant(0, MVT::i32)); 1211 1212 VA = RVLocs[++i]; // skip ahead to next loc 1213 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1214 Chain = Lo.getValue(1); 1215 InFlag = Lo.getValue(2); 1216 VA = RVLocs[++i]; // skip ahead to next loc 1217 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1218 Chain = Hi.getValue(1); 1219 InFlag = Hi.getValue(2); 1220 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1221 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1222 DAG.getConstant(1, MVT::i32)); 1223 } 1224 } else { 1225 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1226 InFlag); 1227 Chain = Val.getValue(1); 1228 InFlag = Val.getValue(2); 1229 } 1230 1231 switch (VA.getLocInfo()) { 1232 default: llvm_unreachable("Unknown loc info!"); 1233 case CCValAssign::Full: break; 1234 case CCValAssign::BCvt: 1235 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1236 break; 1237 } 1238 1239 InVals.push_back(Val); 1240 } 1241 1242 return Chain; 1243} 1244 1245/// LowerMemOpCallTo - Store the argument to the stack. 1246SDValue 1247ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1248 SDValue StackPtr, SDValue Arg, 1249 DebugLoc dl, SelectionDAG &DAG, 1250 const CCValAssign &VA, 1251 ISD::ArgFlagsTy Flags) const { 1252 unsigned LocMemOffset = VA.getLocMemOffset(); 1253 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1254 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1255 return DAG.getStore(Chain, dl, Arg, PtrOff, 1256 MachinePointerInfo::getStack(LocMemOffset), 1257 false, false, 0); 1258} 1259 1260void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1261 SDValue Chain, SDValue &Arg, 1262 RegsToPassVector &RegsToPass, 1263 CCValAssign &VA, CCValAssign &NextVA, 1264 SDValue &StackPtr, 1265 SmallVector<SDValue, 8> &MemOpChains, 1266 ISD::ArgFlagsTy Flags) const { 1267 1268 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1269 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1270 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1271 1272 if (NextVA.isRegLoc()) 1273 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1274 else { 1275 assert(NextVA.isMemLoc()); 1276 if (StackPtr.getNode() == 0) 1277 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1278 1279 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1280 dl, DAG, NextVA, 1281 Flags)); 1282 } 1283} 1284 1285/// LowerCall - Lowering a call into a callseq_start <- 1286/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1287/// nodes. 1288SDValue 1289ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1290 SmallVectorImpl<SDValue> &InVals) const { 1291 SelectionDAG &DAG = CLI.DAG; 1292 DebugLoc &dl = CLI.DL; 1293 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 1294 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 1295 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 1296 SDValue Chain = CLI.Chain; 1297 SDValue Callee = CLI.Callee; 1298 bool &isTailCall = CLI.IsTailCall; 1299 CallingConv::ID CallConv = CLI.CallConv; 1300 bool doesNotRet = CLI.DoesNotReturn; 1301 bool isVarArg = CLI.IsVarArg; 1302 1303 MachineFunction &MF = DAG.getMachineFunction(); 1304 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1305 bool IsSibCall = false; 1306 // Disable tail calls if they're not supported. 1307 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1308 isTailCall = false; 1309 if (isTailCall) { 1310 // Check if it's really possible to do a tail call. 1311 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1312 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1313 Outs, OutVals, Ins, DAG); 1314 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1315 // detected sibcalls. 1316 if (isTailCall) { 1317 ++NumTailCalls; 1318 IsSibCall = true; 1319 } 1320 } 1321 1322 // Analyze operands of the call, assigning locations to each operand. 1323 SmallVector<CCValAssign, 16> ArgLocs; 1324 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1325 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1326 CCInfo.AnalyzeCallOperands(Outs, 1327 CCAssignFnForNode(CallConv, /* Return*/ false, 1328 isVarArg)); 1329 1330 // Get a count of how many bytes are to be pushed on the stack. 1331 unsigned NumBytes = CCInfo.getNextStackOffset(); 1332 1333 // For tail calls, memory operands are available in our caller's stack. 1334 if (IsSibCall) 1335 NumBytes = 0; 1336 1337 // Adjust the stack pointer for the new arguments... 1338 // These operations are automatically eliminated by the prolog/epilog pass 1339 if (!IsSibCall) 1340 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1341 1342 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1343 1344 RegsToPassVector RegsToPass; 1345 SmallVector<SDValue, 8> MemOpChains; 1346 1347 // Walk the register/memloc assignments, inserting copies/loads. In the case 1348 // of tail call optimization, arguments are handled later. 1349 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1350 i != e; 1351 ++i, ++realArgIdx) { 1352 CCValAssign &VA = ArgLocs[i]; 1353 SDValue Arg = OutVals[realArgIdx]; 1354 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1355 bool isByVal = Flags.isByVal(); 1356 1357 // Promote the value if needed. 1358 switch (VA.getLocInfo()) { 1359 default: llvm_unreachable("Unknown loc info!"); 1360 case CCValAssign::Full: break; 1361 case CCValAssign::SExt: 1362 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1363 break; 1364 case CCValAssign::ZExt: 1365 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1366 break; 1367 case CCValAssign::AExt: 1368 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1369 break; 1370 case CCValAssign::BCvt: 1371 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1372 break; 1373 } 1374 1375 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1376 if (VA.needsCustom()) { 1377 if (VA.getLocVT() == MVT::v2f64) { 1378 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1379 DAG.getConstant(0, MVT::i32)); 1380 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1381 DAG.getConstant(1, MVT::i32)); 1382 1383 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1384 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1385 1386 VA = ArgLocs[++i]; // skip ahead to next loc 1387 if (VA.isRegLoc()) { 1388 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1389 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1390 } else { 1391 assert(VA.isMemLoc()); 1392 1393 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1394 dl, DAG, VA, Flags)); 1395 } 1396 } else { 1397 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1398 StackPtr, MemOpChains, Flags); 1399 } 1400 } else if (VA.isRegLoc()) { 1401 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1402 } else if (isByVal) { 1403 assert(VA.isMemLoc()); 1404 unsigned offset = 0; 1405 1406 // True if this byval aggregate will be split between registers 1407 // and memory. 1408 if (CCInfo.isFirstByValRegValid()) { 1409 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1410 unsigned int i, j; 1411 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1412 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1413 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1414 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1415 MachinePointerInfo(), 1416 false, false, false, 0); 1417 MemOpChains.push_back(Load.getValue(1)); 1418 RegsToPass.push_back(std::make_pair(j, Load)); 1419 } 1420 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1421 CCInfo.clearFirstByValReg(); 1422 } 1423 1424 if (Flags.getByValSize() - 4*offset > 0) { 1425 unsigned LocMemOffset = VA.getLocMemOffset(); 1426 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1427 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1428 StkPtrOff); 1429 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1430 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1431 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1432 MVT::i32); 1433 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1434 1435 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1436 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1437 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1438 Ops, array_lengthof(Ops))); 1439 } 1440 } else if (!IsSibCall) { 1441 assert(VA.isMemLoc()); 1442 1443 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1444 dl, DAG, VA, Flags)); 1445 } 1446 } 1447 1448 if (!MemOpChains.empty()) 1449 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1450 &MemOpChains[0], MemOpChains.size()); 1451 1452 // Build a sequence of copy-to-reg nodes chained together with token chain 1453 // and flag operands which copy the outgoing args into the appropriate regs. 1454 SDValue InFlag; 1455 // Tail call byval lowering might overwrite argument registers so in case of 1456 // tail call optimization the copies to registers are lowered later. 1457 if (!isTailCall) 1458 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1459 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1460 RegsToPass[i].second, InFlag); 1461 InFlag = Chain.getValue(1); 1462 } 1463 1464 // For tail calls lower the arguments to the 'real' stack slot. 1465 if (isTailCall) { 1466 // Force all the incoming stack arguments to be loaded from the stack 1467 // before any new outgoing arguments are stored to the stack, because the 1468 // outgoing stack slots may alias the incoming argument stack slots, and 1469 // the alias isn't otherwise explicit. This is slightly more conservative 1470 // than necessary, because it means that each store effectively depends 1471 // on every argument instead of just those arguments it would clobber. 1472 1473 // Do not flag preceding copytoreg stuff together with the following stuff. 1474 InFlag = SDValue(); 1475 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1476 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1477 RegsToPass[i].second, InFlag); 1478 InFlag = Chain.getValue(1); 1479 } 1480 InFlag =SDValue(); 1481 } 1482 1483 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1484 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1485 // node so that legalize doesn't hack it. 1486 bool isDirect = false; 1487 bool isARMFunc = false; 1488 bool isLocalARMFunc = false; 1489 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1490 1491 if (EnableARMLongCalls) { 1492 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1493 && "long-calls with non-static relocation model!"); 1494 // Handle a global address or an external symbol. If it's not one of 1495 // those, the target's already in a register, so we don't need to do 1496 // anything extra. 1497 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1498 const GlobalValue *GV = G->getGlobal(); 1499 // Create a constant pool entry for the callee address 1500 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1501 ARMConstantPoolValue *CPV = 1502 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1503 1504 // Get the address of the callee into a register 1505 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1506 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1507 Callee = DAG.getLoad(getPointerTy(), dl, 1508 DAG.getEntryNode(), CPAddr, 1509 MachinePointerInfo::getConstantPool(), 1510 false, false, false, 0); 1511 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1512 const char *Sym = S->getSymbol(); 1513 1514 // Create a constant pool entry for the callee address 1515 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1516 ARMConstantPoolValue *CPV = 1517 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1518 ARMPCLabelIndex, 0); 1519 // Get the address of the callee into a register 1520 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1521 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1522 Callee = DAG.getLoad(getPointerTy(), dl, 1523 DAG.getEntryNode(), CPAddr, 1524 MachinePointerInfo::getConstantPool(), 1525 false, false, false, 0); 1526 } 1527 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1528 const GlobalValue *GV = G->getGlobal(); 1529 isDirect = true; 1530 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1531 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1532 getTargetMachine().getRelocationModel() != Reloc::Static; 1533 isARMFunc = !Subtarget->isThumb() || isStub; 1534 // ARM call to a local ARM function is predicable. 1535 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1536 // tBX takes a register source operand. 1537 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1538 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1539 ARMConstantPoolValue *CPV = 1540 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1541 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1542 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1543 Callee = DAG.getLoad(getPointerTy(), dl, 1544 DAG.getEntryNode(), CPAddr, 1545 MachinePointerInfo::getConstantPool(), 1546 false, false, false, 0); 1547 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1548 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1549 getPointerTy(), Callee, PICLabel); 1550 } else { 1551 // On ELF targets for PIC code, direct calls should go through the PLT 1552 unsigned OpFlags = 0; 1553 if (Subtarget->isTargetELF() && 1554 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1555 OpFlags = ARMII::MO_PLT; 1556 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1557 } 1558 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1559 isDirect = true; 1560 bool isStub = Subtarget->isTargetDarwin() && 1561 getTargetMachine().getRelocationModel() != Reloc::Static; 1562 isARMFunc = !Subtarget->isThumb() || isStub; 1563 // tBX takes a register source operand. 1564 const char *Sym = S->getSymbol(); 1565 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1566 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1567 ARMConstantPoolValue *CPV = 1568 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1569 ARMPCLabelIndex, 4); 1570 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1571 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1572 Callee = DAG.getLoad(getPointerTy(), dl, 1573 DAG.getEntryNode(), CPAddr, 1574 MachinePointerInfo::getConstantPool(), 1575 false, false, false, 0); 1576 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1577 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1578 getPointerTy(), Callee, PICLabel); 1579 } else { 1580 unsigned OpFlags = 0; 1581 // On ELF targets for PIC code, direct calls should go through the PLT 1582 if (Subtarget->isTargetELF() && 1583 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1584 OpFlags = ARMII::MO_PLT; 1585 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1586 } 1587 } 1588 1589 // FIXME: handle tail calls differently. 1590 unsigned CallOpc; 1591 if (Subtarget->isThumb()) { 1592 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1593 CallOpc = ARMISD::CALL_NOLINK; 1594 else if (doesNotRet && isDirect && !isARMFunc && 1595 Subtarget->hasRAS() && !Subtarget->isThumb1Only()) 1596 // "mov lr, pc; b _foo" to avoid confusing the RSP 1597 CallOpc = ARMISD::CALL_NOLINK; 1598 else 1599 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1600 } else { 1601 if (!isDirect && !Subtarget->hasV5TOps()) { 1602 CallOpc = ARMISD::CALL_NOLINK; 1603 } else if (doesNotRet && isDirect && Subtarget->hasRAS()) 1604 // "mov lr, pc; b _foo" to avoid confusing the RSP 1605 CallOpc = ARMISD::CALL_NOLINK; 1606 else 1607 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1608 } 1609 1610 std::vector<SDValue> Ops; 1611 Ops.push_back(Chain); 1612 Ops.push_back(Callee); 1613 1614 // Add argument registers to the end of the list so that they are known live 1615 // into the call. 1616 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1617 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1618 RegsToPass[i].second.getValueType())); 1619 1620 // Add a register mask operand representing the call-preserved registers. 1621 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1622 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 1623 assert(Mask && "Missing call preserved mask for calling convention"); 1624 Ops.push_back(DAG.getRegisterMask(Mask)); 1625 1626 if (InFlag.getNode()) 1627 Ops.push_back(InFlag); 1628 1629 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1630 if (isTailCall) 1631 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1632 1633 // Returns a chain and a flag for retval copy to use. 1634 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1635 InFlag = Chain.getValue(1); 1636 1637 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1638 DAG.getIntPtrConstant(0, true), InFlag); 1639 if (!Ins.empty()) 1640 InFlag = Chain.getValue(1); 1641 1642 // Handle result values, copying them out of physregs into vregs that we 1643 // return. 1644 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1645 dl, DAG, InVals); 1646} 1647 1648/// HandleByVal - Every parameter *after* a byval parameter is passed 1649/// on the stack. Remember the next parameter register to allocate, 1650/// and then confiscate the rest of the parameter registers to insure 1651/// this. 1652void 1653ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { 1654 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1655 assert((State->getCallOrPrologue() == Prologue || 1656 State->getCallOrPrologue() == Call) && 1657 "unhandled ParmContext"); 1658 if ((!State->isFirstByValRegValid()) && 1659 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1660 State->setFirstByValReg(reg); 1661 // At a call site, a byval parameter that is split between 1662 // registers and memory needs its size truncated here. In a 1663 // function prologue, such byval parameters are reassembled in 1664 // memory, and are not truncated. 1665 if (State->getCallOrPrologue() == Call) { 1666 unsigned excess = 4 * (ARM::R4 - reg); 1667 assert(size >= excess && "expected larger existing stack allocation"); 1668 size -= excess; 1669 } 1670 } 1671 // Confiscate any remaining parameter registers to preclude their 1672 // assignment to subsequent parameters. 1673 while (State->AllocateReg(GPRArgRegs, 4)) 1674 ; 1675} 1676 1677/// MatchingStackOffset - Return true if the given stack call argument is 1678/// already available in the same position (relatively) of the caller's 1679/// incoming argument stack. 1680static 1681bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1682 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1683 const TargetInstrInfo *TII) { 1684 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1685 int FI = INT_MAX; 1686 if (Arg.getOpcode() == ISD::CopyFromReg) { 1687 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1688 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1689 return false; 1690 MachineInstr *Def = MRI->getVRegDef(VR); 1691 if (!Def) 1692 return false; 1693 if (!Flags.isByVal()) { 1694 if (!TII->isLoadFromStackSlot(Def, FI)) 1695 return false; 1696 } else { 1697 return false; 1698 } 1699 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1700 if (Flags.isByVal()) 1701 // ByVal argument is passed in as a pointer but it's now being 1702 // dereferenced. e.g. 1703 // define @foo(%struct.X* %A) { 1704 // tail call @bar(%struct.X* byval %A) 1705 // } 1706 return false; 1707 SDValue Ptr = Ld->getBasePtr(); 1708 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1709 if (!FINode) 1710 return false; 1711 FI = FINode->getIndex(); 1712 } else 1713 return false; 1714 1715 assert(FI != INT_MAX); 1716 if (!MFI->isFixedObjectIndex(FI)) 1717 return false; 1718 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1719} 1720 1721/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1722/// for tail call optimization. Targets which want to do tail call 1723/// optimization should implement this function. 1724bool 1725ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1726 CallingConv::ID CalleeCC, 1727 bool isVarArg, 1728 bool isCalleeStructRet, 1729 bool isCallerStructRet, 1730 const SmallVectorImpl<ISD::OutputArg> &Outs, 1731 const SmallVectorImpl<SDValue> &OutVals, 1732 const SmallVectorImpl<ISD::InputArg> &Ins, 1733 SelectionDAG& DAG) const { 1734 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1735 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1736 bool CCMatch = CallerCC == CalleeCC; 1737 1738 // Look for obvious safe cases to perform tail call optimization that do not 1739 // require ABI changes. This is what gcc calls sibcall. 1740 1741 // Do not sibcall optimize vararg calls unless the call site is not passing 1742 // any arguments. 1743 if (isVarArg && !Outs.empty()) 1744 return false; 1745 1746 // Also avoid sibcall optimization if either caller or callee uses struct 1747 // return semantics. 1748 if (isCalleeStructRet || isCallerStructRet) 1749 return false; 1750 1751 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1752 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1753 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1754 // support in the assembler and linker to be used. This would need to be 1755 // fixed to fully support tail calls in Thumb1. 1756 // 1757 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1758 // LR. This means if we need to reload LR, it takes an extra instructions, 1759 // which outweighs the value of the tail call; but here we don't know yet 1760 // whether LR is going to be used. Probably the right approach is to 1761 // generate the tail call here and turn it back into CALL/RET in 1762 // emitEpilogue if LR is used. 1763 1764 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1765 // but we need to make sure there are enough registers; the only valid 1766 // registers are the 4 used for parameters. We don't currently do this 1767 // case. 1768 if (Subtarget->isThumb1Only()) 1769 return false; 1770 1771 // If the calling conventions do not match, then we'd better make sure the 1772 // results are returned in the same way as what the caller expects. 1773 if (!CCMatch) { 1774 SmallVector<CCValAssign, 16> RVLocs1; 1775 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1776 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1777 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1778 1779 SmallVector<CCValAssign, 16> RVLocs2; 1780 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1781 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1782 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1783 1784 if (RVLocs1.size() != RVLocs2.size()) 1785 return false; 1786 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1787 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1788 return false; 1789 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1790 return false; 1791 if (RVLocs1[i].isRegLoc()) { 1792 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1793 return false; 1794 } else { 1795 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1796 return false; 1797 } 1798 } 1799 } 1800 1801 // If the callee takes no arguments then go on to check the results of the 1802 // call. 1803 if (!Outs.empty()) { 1804 // Check if stack adjustment is needed. For now, do not do this if any 1805 // argument is passed on the stack. 1806 SmallVector<CCValAssign, 16> ArgLocs; 1807 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1808 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1809 CCInfo.AnalyzeCallOperands(Outs, 1810 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1811 if (CCInfo.getNextStackOffset()) { 1812 MachineFunction &MF = DAG.getMachineFunction(); 1813 1814 // Check if the arguments are already laid out in the right way as 1815 // the caller's fixed stack objects. 1816 MachineFrameInfo *MFI = MF.getFrameInfo(); 1817 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1818 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1819 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1820 i != e; 1821 ++i, ++realArgIdx) { 1822 CCValAssign &VA = ArgLocs[i]; 1823 EVT RegVT = VA.getLocVT(); 1824 SDValue Arg = OutVals[realArgIdx]; 1825 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1826 if (VA.getLocInfo() == CCValAssign::Indirect) 1827 return false; 1828 if (VA.needsCustom()) { 1829 // f64 and vector types are split into multiple registers or 1830 // register/stack-slot combinations. The types will not match 1831 // the registers; give up on memory f64 refs until we figure 1832 // out what to do about this. 1833 if (!VA.isRegLoc()) 1834 return false; 1835 if (!ArgLocs[++i].isRegLoc()) 1836 return false; 1837 if (RegVT == MVT::v2f64) { 1838 if (!ArgLocs[++i].isRegLoc()) 1839 return false; 1840 if (!ArgLocs[++i].isRegLoc()) 1841 return false; 1842 } 1843 } else if (!VA.isRegLoc()) { 1844 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1845 MFI, MRI, TII)) 1846 return false; 1847 } 1848 } 1849 } 1850 } 1851 1852 return true; 1853} 1854 1855SDValue 1856ARMTargetLowering::LowerReturn(SDValue Chain, 1857 CallingConv::ID CallConv, bool isVarArg, 1858 const SmallVectorImpl<ISD::OutputArg> &Outs, 1859 const SmallVectorImpl<SDValue> &OutVals, 1860 DebugLoc dl, SelectionDAG &DAG) const { 1861 1862 // CCValAssign - represent the assignment of the return value to a location. 1863 SmallVector<CCValAssign, 16> RVLocs; 1864 1865 // CCState - Info about the registers and stack slots. 1866 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1867 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1868 1869 // Analyze outgoing return values. 1870 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1871 isVarArg)); 1872 1873 // If this is the first return lowered for this function, add 1874 // the regs to the liveout set for the function. 1875 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1876 for (unsigned i = 0; i != RVLocs.size(); ++i) 1877 if (RVLocs[i].isRegLoc()) 1878 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1879 } 1880 1881 SDValue Flag; 1882 1883 // Copy the result values into the output registers. 1884 for (unsigned i = 0, realRVLocIdx = 0; 1885 i != RVLocs.size(); 1886 ++i, ++realRVLocIdx) { 1887 CCValAssign &VA = RVLocs[i]; 1888 assert(VA.isRegLoc() && "Can only return in registers!"); 1889 1890 SDValue Arg = OutVals[realRVLocIdx]; 1891 1892 switch (VA.getLocInfo()) { 1893 default: llvm_unreachable("Unknown loc info!"); 1894 case CCValAssign::Full: break; 1895 case CCValAssign::BCvt: 1896 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1897 break; 1898 } 1899 1900 if (VA.needsCustom()) { 1901 if (VA.getLocVT() == MVT::v2f64) { 1902 // Extract the first half and return it in two registers. 1903 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1904 DAG.getConstant(0, MVT::i32)); 1905 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1906 DAG.getVTList(MVT::i32, MVT::i32), Half); 1907 1908 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1909 Flag = Chain.getValue(1); 1910 VA = RVLocs[++i]; // skip ahead to next loc 1911 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1912 HalfGPRs.getValue(1), Flag); 1913 Flag = Chain.getValue(1); 1914 VA = RVLocs[++i]; // skip ahead to next loc 1915 1916 // Extract the 2nd half and fall through to handle it as an f64 value. 1917 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1918 DAG.getConstant(1, MVT::i32)); 1919 } 1920 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1921 // available. 1922 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1923 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1924 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1925 Flag = Chain.getValue(1); 1926 VA = RVLocs[++i]; // skip ahead to next loc 1927 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1928 Flag); 1929 } else 1930 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1931 1932 // Guarantee that all emitted copies are 1933 // stuck together, avoiding something bad. 1934 Flag = Chain.getValue(1); 1935 } 1936 1937 SDValue result; 1938 if (Flag.getNode()) 1939 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1940 else // Return Void 1941 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1942 1943 return result; 1944} 1945 1946bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 1947 if (N->getNumValues() != 1) 1948 return false; 1949 if (!N->hasNUsesOfValue(1, 0)) 1950 return false; 1951 1952 SDValue TCChain = Chain; 1953 SDNode *Copy = *N->use_begin(); 1954 if (Copy->getOpcode() == ISD::CopyToReg) { 1955 // If the copy has a glue operand, we conservatively assume it isn't safe to 1956 // perform a tail call. 1957 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 1958 return false; 1959 TCChain = Copy->getOperand(0); 1960 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 1961 SDNode *VMov = Copy; 1962 // f64 returned in a pair of GPRs. 1963 SmallPtrSet<SDNode*, 2> Copies; 1964 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 1965 UI != UE; ++UI) { 1966 if (UI->getOpcode() != ISD::CopyToReg) 1967 return false; 1968 Copies.insert(*UI); 1969 } 1970 if (Copies.size() > 2) 1971 return false; 1972 1973 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 1974 UI != UE; ++UI) { 1975 SDValue UseChain = UI->getOperand(0); 1976 if (Copies.count(UseChain.getNode())) 1977 // Second CopyToReg 1978 Copy = *UI; 1979 else 1980 // First CopyToReg 1981 TCChain = UseChain; 1982 } 1983 } else if (Copy->getOpcode() == ISD::BITCAST) { 1984 // f32 returned in a single GPR. 1985 if (!Copy->hasOneUse()) 1986 return false; 1987 Copy = *Copy->use_begin(); 1988 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 1989 return false; 1990 Chain = Copy->getOperand(0); 1991 } else { 1992 return false; 1993 } 1994 1995 bool HasRet = false; 1996 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1997 UI != UE; ++UI) { 1998 if (UI->getOpcode() != ARMISD::RET_FLAG) 1999 return false; 2000 HasRet = true; 2001 } 2002 2003 if (!HasRet) 2004 return false; 2005 2006 Chain = TCChain; 2007 return true; 2008} 2009 2010bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2011 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 2012 return false; 2013 2014 if (!CI->isTailCall()) 2015 return false; 2016 2017 return !Subtarget->isThumb1Only(); 2018} 2019 2020// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2021// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2022// one of the above mentioned nodes. It has to be wrapped because otherwise 2023// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2024// be used to form addressing mode. These wrapped nodes will be selected 2025// into MOVi. 2026static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2027 EVT PtrVT = Op.getValueType(); 2028 // FIXME there is no actual debug info here 2029 DebugLoc dl = Op.getDebugLoc(); 2030 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2031 SDValue Res; 2032 if (CP->isMachineConstantPoolEntry()) 2033 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2034 CP->getAlignment()); 2035 else 2036 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2037 CP->getAlignment()); 2038 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2039} 2040 2041unsigned ARMTargetLowering::getJumpTableEncoding() const { 2042 return MachineJumpTableInfo::EK_Inline; 2043} 2044 2045SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2046 SelectionDAG &DAG) const { 2047 MachineFunction &MF = DAG.getMachineFunction(); 2048 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2049 unsigned ARMPCLabelIndex = 0; 2050 DebugLoc DL = Op.getDebugLoc(); 2051 EVT PtrVT = getPointerTy(); 2052 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2053 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2054 SDValue CPAddr; 2055 if (RelocM == Reloc::Static) { 2056 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2057 } else { 2058 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2059 ARMPCLabelIndex = AFI->createPICLabelUId(); 2060 ARMConstantPoolValue *CPV = 2061 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2062 ARMCP::CPBlockAddress, PCAdj); 2063 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2064 } 2065 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2066 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2067 MachinePointerInfo::getConstantPool(), 2068 false, false, false, 0); 2069 if (RelocM == Reloc::Static) 2070 return Result; 2071 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2072 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2073} 2074 2075// Lower ISD::GlobalTLSAddress using the "general dynamic" model 2076SDValue 2077ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2078 SelectionDAG &DAG) const { 2079 DebugLoc dl = GA->getDebugLoc(); 2080 EVT PtrVT = getPointerTy(); 2081 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2082 MachineFunction &MF = DAG.getMachineFunction(); 2083 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2084 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2085 ARMConstantPoolValue *CPV = 2086 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2087 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2088 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2089 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2090 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2091 MachinePointerInfo::getConstantPool(), 2092 false, false, false, 0); 2093 SDValue Chain = Argument.getValue(1); 2094 2095 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2096 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2097 2098 // call __tls_get_addr. 2099 ArgListTy Args; 2100 ArgListEntry Entry; 2101 Entry.Node = Argument; 2102 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2103 Args.push_back(Entry); 2104 // FIXME: is there useful debug info available here? 2105 TargetLowering::CallLoweringInfo CLI(Chain, 2106 (Type *) Type::getInt32Ty(*DAG.getContext()), 2107 false, false, false, false, 2108 0, CallingConv::C, /*isTailCall=*/false, 2109 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2110 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2111 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2112 return CallResult.first; 2113} 2114 2115// Lower ISD::GlobalTLSAddress using the "initial exec" or 2116// "local exec" model. 2117SDValue 2118ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2119 SelectionDAG &DAG, 2120 TLSModel::Model model) const { 2121 const GlobalValue *GV = GA->getGlobal(); 2122 DebugLoc dl = GA->getDebugLoc(); 2123 SDValue Offset; 2124 SDValue Chain = DAG.getEntryNode(); 2125 EVT PtrVT = getPointerTy(); 2126 // Get the Thread Pointer 2127 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2128 2129 if (model == TLSModel::InitialExec) { 2130 MachineFunction &MF = DAG.getMachineFunction(); 2131 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2132 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2133 // Initial exec model. 2134 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2135 ARMConstantPoolValue *CPV = 2136 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2137 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2138 true); 2139 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2140 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2141 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2142 MachinePointerInfo::getConstantPool(), 2143 false, false, false, 0); 2144 Chain = Offset.getValue(1); 2145 2146 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2147 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2148 2149 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2150 MachinePointerInfo::getConstantPool(), 2151 false, false, false, 0); 2152 } else { 2153 // local exec model 2154 assert(model == TLSModel::LocalExec); 2155 ARMConstantPoolValue *CPV = 2156 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2157 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2158 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2159 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2160 MachinePointerInfo::getConstantPool(), 2161 false, false, false, 0); 2162 } 2163 2164 // The address of the thread local variable is the add of the thread 2165 // pointer with the offset of the variable. 2166 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2167} 2168 2169SDValue 2170ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2171 // TODO: implement the "local dynamic" model 2172 assert(Subtarget->isTargetELF() && 2173 "TLS not implemented for non-ELF targets"); 2174 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2175 2176 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2177 2178 switch (model) { 2179 case TLSModel::GeneralDynamic: 2180 case TLSModel::LocalDynamic: 2181 return LowerToTLSGeneralDynamicModel(GA, DAG); 2182 case TLSModel::InitialExec: 2183 case TLSModel::LocalExec: 2184 return LowerToTLSExecModels(GA, DAG, model); 2185 } 2186 llvm_unreachable("bogus TLS model"); 2187} 2188 2189SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2190 SelectionDAG &DAG) const { 2191 EVT PtrVT = getPointerTy(); 2192 DebugLoc dl = Op.getDebugLoc(); 2193 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2194 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2195 if (RelocM == Reloc::PIC_) { 2196 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2197 ARMConstantPoolValue *CPV = 2198 ARMConstantPoolConstant::Create(GV, 2199 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2200 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2201 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2202 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2203 CPAddr, 2204 MachinePointerInfo::getConstantPool(), 2205 false, false, false, 0); 2206 SDValue Chain = Result.getValue(1); 2207 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2208 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2209 if (!UseGOTOFF) 2210 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2211 MachinePointerInfo::getGOT(), 2212 false, false, false, 0); 2213 return Result; 2214 } 2215 2216 // If we have T2 ops, we can materialize the address directly via movt/movw 2217 // pair. This is always cheaper. 2218 if (Subtarget->useMovt()) { 2219 ++NumMovwMovt; 2220 // FIXME: Once remat is capable of dealing with instructions with register 2221 // operands, expand this into two nodes. 2222 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2223 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2224 } else { 2225 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2226 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2227 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2228 MachinePointerInfo::getConstantPool(), 2229 false, false, false, 0); 2230 } 2231} 2232 2233SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2234 SelectionDAG &DAG) const { 2235 EVT PtrVT = getPointerTy(); 2236 DebugLoc dl = Op.getDebugLoc(); 2237 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2238 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2239 MachineFunction &MF = DAG.getMachineFunction(); 2240 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2241 2242 // FIXME: Enable this for static codegen when tool issues are fixed. Also 2243 // update ARMFastISel::ARMMaterializeGV. 2244 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2245 ++NumMovwMovt; 2246 // FIXME: Once remat is capable of dealing with instructions with register 2247 // operands, expand this into two nodes. 2248 if (RelocM == Reloc::Static) 2249 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2250 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2251 2252 unsigned Wrapper = (RelocM == Reloc::PIC_) 2253 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2254 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2255 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2256 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2257 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2258 MachinePointerInfo::getGOT(), 2259 false, false, false, 0); 2260 return Result; 2261 } 2262 2263 unsigned ARMPCLabelIndex = 0; 2264 SDValue CPAddr; 2265 if (RelocM == Reloc::Static) { 2266 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2267 } else { 2268 ARMPCLabelIndex = AFI->createPICLabelUId(); 2269 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2270 ARMConstantPoolValue *CPV = 2271 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2272 PCAdj); 2273 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2274 } 2275 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2276 2277 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2278 MachinePointerInfo::getConstantPool(), 2279 false, false, false, 0); 2280 SDValue Chain = Result.getValue(1); 2281 2282 if (RelocM == Reloc::PIC_) { 2283 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2284 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2285 } 2286 2287 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2288 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2289 false, false, false, 0); 2290 2291 return Result; 2292} 2293 2294SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2295 SelectionDAG &DAG) const { 2296 assert(Subtarget->isTargetELF() && 2297 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2298 MachineFunction &MF = DAG.getMachineFunction(); 2299 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2300 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2301 EVT PtrVT = getPointerTy(); 2302 DebugLoc dl = Op.getDebugLoc(); 2303 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2304 ARMConstantPoolValue *CPV = 2305 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2306 ARMPCLabelIndex, PCAdj); 2307 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2308 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2309 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2310 MachinePointerInfo::getConstantPool(), 2311 false, false, false, 0); 2312 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2313 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2314} 2315 2316SDValue 2317ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2318 DebugLoc dl = Op.getDebugLoc(); 2319 SDValue Val = DAG.getConstant(0, MVT::i32); 2320 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2321 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2322 Op.getOperand(1), Val); 2323} 2324 2325SDValue 2326ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2327 DebugLoc dl = Op.getDebugLoc(); 2328 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2329 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2330} 2331 2332SDValue 2333ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2334 const ARMSubtarget *Subtarget) const { 2335 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2336 DebugLoc dl = Op.getDebugLoc(); 2337 switch (IntNo) { 2338 default: return SDValue(); // Don't custom lower most intrinsics. 2339 case Intrinsic::arm_thread_pointer: { 2340 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2341 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2342 } 2343 case Intrinsic::eh_sjlj_lsda: { 2344 MachineFunction &MF = DAG.getMachineFunction(); 2345 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2346 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2347 EVT PtrVT = getPointerTy(); 2348 DebugLoc dl = Op.getDebugLoc(); 2349 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2350 SDValue CPAddr; 2351 unsigned PCAdj = (RelocM != Reloc::PIC_) 2352 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2353 ARMConstantPoolValue *CPV = 2354 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2355 ARMCP::CPLSDA, PCAdj); 2356 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2357 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2358 SDValue Result = 2359 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2360 MachinePointerInfo::getConstantPool(), 2361 false, false, false, 0); 2362 2363 if (RelocM == Reloc::PIC_) { 2364 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2365 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2366 } 2367 return Result; 2368 } 2369 case Intrinsic::arm_neon_vmulls: 2370 case Intrinsic::arm_neon_vmullu: { 2371 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2372 ? ARMISD::VMULLs : ARMISD::VMULLu; 2373 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2374 Op.getOperand(1), Op.getOperand(2)); 2375 } 2376 } 2377} 2378 2379static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2380 const ARMSubtarget *Subtarget) { 2381 DebugLoc dl = Op.getDebugLoc(); 2382 if (!Subtarget->hasDataBarrier()) { 2383 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2384 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2385 // here. 2386 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2387 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2388 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2389 DAG.getConstant(0, MVT::i32)); 2390 } 2391 2392 SDValue Op5 = Op.getOperand(5); 2393 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2394 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2395 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2396 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2397 2398 ARM_MB::MemBOpt DMBOpt; 2399 if (isDeviceBarrier) 2400 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2401 else 2402 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2403 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2404 DAG.getConstant(DMBOpt, MVT::i32)); 2405} 2406 2407 2408static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2409 const ARMSubtarget *Subtarget) { 2410 // FIXME: handle "fence singlethread" more efficiently. 2411 DebugLoc dl = Op.getDebugLoc(); 2412 if (!Subtarget->hasDataBarrier()) { 2413 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2414 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2415 // here. 2416 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2417 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2418 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2419 DAG.getConstant(0, MVT::i32)); 2420 } 2421 2422 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2423 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 2424} 2425 2426static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2427 const ARMSubtarget *Subtarget) { 2428 // ARM pre v5TE and Thumb1 does not have preload instructions. 2429 if (!(Subtarget->isThumb2() || 2430 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2431 // Just preserve the chain. 2432 return Op.getOperand(0); 2433 2434 DebugLoc dl = Op.getDebugLoc(); 2435 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2436 if (!isRead && 2437 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2438 // ARMv7 with MP extension has PLDW. 2439 return Op.getOperand(0); 2440 2441 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2442 if (Subtarget->isThumb()) { 2443 // Invert the bits. 2444 isRead = ~isRead & 1; 2445 isData = ~isData & 1; 2446 } 2447 2448 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2449 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2450 DAG.getConstant(isData, MVT::i32)); 2451} 2452 2453static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2454 MachineFunction &MF = DAG.getMachineFunction(); 2455 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2456 2457 // vastart just stores the address of the VarArgsFrameIndex slot into the 2458 // memory location argument. 2459 DebugLoc dl = Op.getDebugLoc(); 2460 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2461 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2462 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2463 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2464 MachinePointerInfo(SV), false, false, 0); 2465} 2466 2467SDValue 2468ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2469 SDValue &Root, SelectionDAG &DAG, 2470 DebugLoc dl) const { 2471 MachineFunction &MF = DAG.getMachineFunction(); 2472 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2473 2474 const TargetRegisterClass *RC; 2475 if (AFI->isThumb1OnlyFunction()) 2476 RC = &ARM::tGPRRegClass; 2477 else 2478 RC = &ARM::GPRRegClass; 2479 2480 // Transform the arguments stored in physical registers into virtual ones. 2481 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2482 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2483 2484 SDValue ArgValue2; 2485 if (NextVA.isMemLoc()) { 2486 MachineFrameInfo *MFI = MF.getFrameInfo(); 2487 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2488 2489 // Create load node to retrieve arguments from the stack. 2490 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2491 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2492 MachinePointerInfo::getFixedStack(FI), 2493 false, false, false, 0); 2494 } else { 2495 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2496 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2497 } 2498 2499 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2500} 2501 2502void 2503ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2504 unsigned &VARegSize, unsigned &VARegSaveSize) 2505 const { 2506 unsigned NumGPRs; 2507 if (CCInfo.isFirstByValRegValid()) 2508 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2509 else { 2510 unsigned int firstUnalloced; 2511 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2512 sizeof(GPRArgRegs) / 2513 sizeof(GPRArgRegs[0])); 2514 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2515 } 2516 2517 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2518 VARegSize = NumGPRs * 4; 2519 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2520} 2521 2522// The remaining GPRs hold either the beginning of variable-argument 2523// data, or the beginning of an aggregate passed by value (usuall 2524// byval). Either way, we allocate stack slots adjacent to the data 2525// provided by our caller, and store the unallocated registers there. 2526// If this is a variadic function, the va_list pointer will begin with 2527// these values; otherwise, this reassembles a (byval) structure that 2528// was split between registers and memory. 2529void 2530ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2531 DebugLoc dl, SDValue &Chain, 2532 unsigned ArgOffset) const { 2533 MachineFunction &MF = DAG.getMachineFunction(); 2534 MachineFrameInfo *MFI = MF.getFrameInfo(); 2535 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2536 unsigned firstRegToSaveIndex; 2537 if (CCInfo.isFirstByValRegValid()) 2538 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2539 else { 2540 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2541 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2542 } 2543 2544 unsigned VARegSize, VARegSaveSize; 2545 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2546 if (VARegSaveSize) { 2547 // If this function is vararg, store any remaining integer argument regs 2548 // to their spots on the stack so that they may be loaded by deferencing 2549 // the result of va_next. 2550 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2551 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2552 ArgOffset + VARegSaveSize 2553 - VARegSize, 2554 false)); 2555 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2556 getPointerTy()); 2557 2558 SmallVector<SDValue, 4> MemOps; 2559 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { 2560 const TargetRegisterClass *RC; 2561 if (AFI->isThumb1OnlyFunction()) 2562 RC = &ARM::tGPRRegClass; 2563 else 2564 RC = &ARM::GPRRegClass; 2565 2566 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2567 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2568 SDValue Store = 2569 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2570 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2571 false, false, 0); 2572 MemOps.push_back(Store); 2573 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2574 DAG.getConstant(4, getPointerTy())); 2575 } 2576 if (!MemOps.empty()) 2577 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2578 &MemOps[0], MemOps.size()); 2579 } else 2580 // This will point to the next argument passed via stack. 2581 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2582} 2583 2584SDValue 2585ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2586 CallingConv::ID CallConv, bool isVarArg, 2587 const SmallVectorImpl<ISD::InputArg> 2588 &Ins, 2589 DebugLoc dl, SelectionDAG &DAG, 2590 SmallVectorImpl<SDValue> &InVals) 2591 const { 2592 MachineFunction &MF = DAG.getMachineFunction(); 2593 MachineFrameInfo *MFI = MF.getFrameInfo(); 2594 2595 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2596 2597 // Assign locations to all of the incoming arguments. 2598 SmallVector<CCValAssign, 16> ArgLocs; 2599 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2600 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2601 CCInfo.AnalyzeFormalArguments(Ins, 2602 CCAssignFnForNode(CallConv, /* Return*/ false, 2603 isVarArg)); 2604 2605 SmallVector<SDValue, 16> ArgValues; 2606 int lastInsIndex = -1; 2607 2608 SDValue ArgValue; 2609 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2610 CCValAssign &VA = ArgLocs[i]; 2611 2612 // Arguments stored in registers. 2613 if (VA.isRegLoc()) { 2614 EVT RegVT = VA.getLocVT(); 2615 2616 if (VA.needsCustom()) { 2617 // f64 and vector types are split up into multiple registers or 2618 // combinations of registers and stack slots. 2619 if (VA.getLocVT() == MVT::v2f64) { 2620 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2621 Chain, DAG, dl); 2622 VA = ArgLocs[++i]; // skip ahead to next loc 2623 SDValue ArgValue2; 2624 if (VA.isMemLoc()) { 2625 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2626 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2627 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2628 MachinePointerInfo::getFixedStack(FI), 2629 false, false, false, 0); 2630 } else { 2631 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2632 Chain, DAG, dl); 2633 } 2634 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2635 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2636 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2637 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2638 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2639 } else 2640 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2641 2642 } else { 2643 const TargetRegisterClass *RC; 2644 2645 if (RegVT == MVT::f32) 2646 RC = &ARM::SPRRegClass; 2647 else if (RegVT == MVT::f64) 2648 RC = &ARM::DPRRegClass; 2649 else if (RegVT == MVT::v2f64) 2650 RC = &ARM::QPRRegClass; 2651 else if (RegVT == MVT::i32) 2652 RC = AFI->isThumb1OnlyFunction() ? 2653 (const TargetRegisterClass*)&ARM::tGPRRegClass : 2654 (const TargetRegisterClass*)&ARM::GPRRegClass; 2655 else 2656 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2657 2658 // Transform the arguments in physical registers into virtual ones. 2659 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2660 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2661 } 2662 2663 // If this is an 8 or 16-bit value, it is really passed promoted 2664 // to 32 bits. Insert an assert[sz]ext to capture this, then 2665 // truncate to the right size. 2666 switch (VA.getLocInfo()) { 2667 default: llvm_unreachable("Unknown loc info!"); 2668 case CCValAssign::Full: break; 2669 case CCValAssign::BCvt: 2670 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2671 break; 2672 case CCValAssign::SExt: 2673 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2674 DAG.getValueType(VA.getValVT())); 2675 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2676 break; 2677 case CCValAssign::ZExt: 2678 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2679 DAG.getValueType(VA.getValVT())); 2680 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2681 break; 2682 } 2683 2684 InVals.push_back(ArgValue); 2685 2686 } else { // VA.isRegLoc() 2687 2688 // sanity check 2689 assert(VA.isMemLoc()); 2690 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2691 2692 int index = ArgLocs[i].getValNo(); 2693 2694 // Some Ins[] entries become multiple ArgLoc[] entries. 2695 // Process them only once. 2696 if (index != lastInsIndex) 2697 { 2698 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2699 // FIXME: For now, all byval parameter objects are marked mutable. 2700 // This can be changed with more analysis. 2701 // In case of tail call optimization mark all arguments mutable. 2702 // Since they could be overwritten by lowering of arguments in case of 2703 // a tail call. 2704 if (Flags.isByVal()) { 2705 unsigned VARegSize, VARegSaveSize; 2706 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2707 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); 2708 unsigned Bytes = Flags.getByValSize() - VARegSize; 2709 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2710 int FI = MFI->CreateFixedObject(Bytes, 2711 VA.getLocMemOffset(), false); 2712 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2713 } else { 2714 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2715 VA.getLocMemOffset(), true); 2716 2717 // Create load nodes to retrieve arguments from the stack. 2718 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2719 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2720 MachinePointerInfo::getFixedStack(FI), 2721 false, false, false, 0)); 2722 } 2723 lastInsIndex = index; 2724 } 2725 } 2726 } 2727 2728 // varargs 2729 if (isVarArg) 2730 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); 2731 2732 return Chain; 2733} 2734 2735/// isFloatingPointZero - Return true if this is +0.0. 2736static bool isFloatingPointZero(SDValue Op) { 2737 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2738 return CFP->getValueAPF().isPosZero(); 2739 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2740 // Maybe this has already been legalized into the constant pool? 2741 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2742 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2743 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2744 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2745 return CFP->getValueAPF().isPosZero(); 2746 } 2747 } 2748 return false; 2749} 2750 2751/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2752/// the given operands. 2753SDValue 2754ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2755 SDValue &ARMcc, SelectionDAG &DAG, 2756 DebugLoc dl) const { 2757 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2758 unsigned C = RHSC->getZExtValue(); 2759 if (!isLegalICmpImmediate(C)) { 2760 // Constant does not fit, try adjusting it by one? 2761 switch (CC) { 2762 default: break; 2763 case ISD::SETLT: 2764 case ISD::SETGE: 2765 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2766 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2767 RHS = DAG.getConstant(C-1, MVT::i32); 2768 } 2769 break; 2770 case ISD::SETULT: 2771 case ISD::SETUGE: 2772 if (C != 0 && isLegalICmpImmediate(C-1)) { 2773 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2774 RHS = DAG.getConstant(C-1, MVT::i32); 2775 } 2776 break; 2777 case ISD::SETLE: 2778 case ISD::SETGT: 2779 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2780 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2781 RHS = DAG.getConstant(C+1, MVT::i32); 2782 } 2783 break; 2784 case ISD::SETULE: 2785 case ISD::SETUGT: 2786 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2787 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2788 RHS = DAG.getConstant(C+1, MVT::i32); 2789 } 2790 break; 2791 } 2792 } 2793 } 2794 2795 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2796 ARMISD::NodeType CompareType; 2797 switch (CondCode) { 2798 default: 2799 CompareType = ARMISD::CMP; 2800 break; 2801 case ARMCC::EQ: 2802 case ARMCC::NE: 2803 // Uses only Z Flag 2804 CompareType = ARMISD::CMPZ; 2805 break; 2806 } 2807 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2808 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2809} 2810 2811/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2812SDValue 2813ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2814 DebugLoc dl) const { 2815 SDValue Cmp; 2816 if (!isFloatingPointZero(RHS)) 2817 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2818 else 2819 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2820 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2821} 2822 2823/// duplicateCmp - Glue values can have only one use, so this function 2824/// duplicates a comparison node. 2825SDValue 2826ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2827 unsigned Opc = Cmp.getOpcode(); 2828 DebugLoc DL = Cmp.getDebugLoc(); 2829 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2830 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2831 2832 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2833 Cmp = Cmp.getOperand(0); 2834 Opc = Cmp.getOpcode(); 2835 if (Opc == ARMISD::CMPFP) 2836 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2837 else { 2838 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2839 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2840 } 2841 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2842} 2843 2844SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2845 SDValue Cond = Op.getOperand(0); 2846 SDValue SelectTrue = Op.getOperand(1); 2847 SDValue SelectFalse = Op.getOperand(2); 2848 DebugLoc dl = Op.getDebugLoc(); 2849 2850 // Convert: 2851 // 2852 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2853 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2854 // 2855 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2856 const ConstantSDNode *CMOVTrue = 2857 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2858 const ConstantSDNode *CMOVFalse = 2859 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2860 2861 if (CMOVTrue && CMOVFalse) { 2862 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2863 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2864 2865 SDValue True; 2866 SDValue False; 2867 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2868 True = SelectTrue; 2869 False = SelectFalse; 2870 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2871 True = SelectFalse; 2872 False = SelectTrue; 2873 } 2874 2875 if (True.getNode() && False.getNode()) { 2876 EVT VT = Op.getValueType(); 2877 SDValue ARMcc = Cond.getOperand(2); 2878 SDValue CCR = Cond.getOperand(3); 2879 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2880 assert(True.getValueType() == VT); 2881 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2882 } 2883 } 2884 } 2885 2886 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 2887 // undefined bits before doing a full-word comparison with zero. 2888 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 2889 DAG.getConstant(1, Cond.getValueType())); 2890 2891 return DAG.getSelectCC(dl, Cond, 2892 DAG.getConstant(0, Cond.getValueType()), 2893 SelectTrue, SelectFalse, ISD::SETNE); 2894} 2895 2896SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2897 EVT VT = Op.getValueType(); 2898 SDValue LHS = Op.getOperand(0); 2899 SDValue RHS = Op.getOperand(1); 2900 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2901 SDValue TrueVal = Op.getOperand(2); 2902 SDValue FalseVal = Op.getOperand(3); 2903 DebugLoc dl = Op.getDebugLoc(); 2904 2905 if (LHS.getValueType() == MVT::i32) { 2906 SDValue ARMcc; 2907 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2908 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2909 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2910 } 2911 2912 ARMCC::CondCodes CondCode, CondCode2; 2913 FPCCToARMCC(CC, CondCode, CondCode2); 2914 2915 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2916 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2917 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2918 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2919 ARMcc, CCR, Cmp); 2920 if (CondCode2 != ARMCC::AL) { 2921 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2922 // FIXME: Needs another CMP because flag can have but one use. 2923 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2924 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2925 Result, TrueVal, ARMcc2, CCR, Cmp2); 2926 } 2927 return Result; 2928} 2929 2930/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2931/// to morph to an integer compare sequence. 2932static bool canChangeToInt(SDValue Op, bool &SeenZero, 2933 const ARMSubtarget *Subtarget) { 2934 SDNode *N = Op.getNode(); 2935 if (!N->hasOneUse()) 2936 // Otherwise it requires moving the value from fp to integer registers. 2937 return false; 2938 if (!N->getNumValues()) 2939 return false; 2940 EVT VT = Op.getValueType(); 2941 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2942 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2943 // vmrs are very slow, e.g. cortex-a8. 2944 return false; 2945 2946 if (isFloatingPointZero(Op)) { 2947 SeenZero = true; 2948 return true; 2949 } 2950 return ISD::isNormalLoad(N); 2951} 2952 2953static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2954 if (isFloatingPointZero(Op)) 2955 return DAG.getConstant(0, MVT::i32); 2956 2957 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2958 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2959 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2960 Ld->isVolatile(), Ld->isNonTemporal(), 2961 Ld->isInvariant(), Ld->getAlignment()); 2962 2963 llvm_unreachable("Unknown VFP cmp argument!"); 2964} 2965 2966static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2967 SDValue &RetVal1, SDValue &RetVal2) { 2968 if (isFloatingPointZero(Op)) { 2969 RetVal1 = DAG.getConstant(0, MVT::i32); 2970 RetVal2 = DAG.getConstant(0, MVT::i32); 2971 return; 2972 } 2973 2974 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2975 SDValue Ptr = Ld->getBasePtr(); 2976 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2977 Ld->getChain(), Ptr, 2978 Ld->getPointerInfo(), 2979 Ld->isVolatile(), Ld->isNonTemporal(), 2980 Ld->isInvariant(), Ld->getAlignment()); 2981 2982 EVT PtrType = Ptr.getValueType(); 2983 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2984 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2985 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2986 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2987 Ld->getChain(), NewPtr, 2988 Ld->getPointerInfo().getWithOffset(4), 2989 Ld->isVolatile(), Ld->isNonTemporal(), 2990 Ld->isInvariant(), NewAlign); 2991 return; 2992 } 2993 2994 llvm_unreachable("Unknown VFP cmp argument!"); 2995} 2996 2997/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2998/// f32 and even f64 comparisons to integer ones. 2999SDValue 3000ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3001 SDValue Chain = Op.getOperand(0); 3002 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3003 SDValue LHS = Op.getOperand(2); 3004 SDValue RHS = Op.getOperand(3); 3005 SDValue Dest = Op.getOperand(4); 3006 DebugLoc dl = Op.getDebugLoc(); 3007 3008 bool LHSSeenZero = false; 3009 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3010 bool RHSSeenZero = false; 3011 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3012 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3013 // If unsafe fp math optimization is enabled and there are no other uses of 3014 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3015 // to an integer comparison. 3016 if (CC == ISD::SETOEQ) 3017 CC = ISD::SETEQ; 3018 else if (CC == ISD::SETUNE) 3019 CC = ISD::SETNE; 3020 3021 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 3022 SDValue ARMcc; 3023 if (LHS.getValueType() == MVT::f32) { 3024 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3025 bitcastf32Toi32(LHS, DAG), Mask); 3026 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3027 bitcastf32Toi32(RHS, DAG), Mask); 3028 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3029 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3030 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3031 Chain, Dest, ARMcc, CCR, Cmp); 3032 } 3033 3034 SDValue LHS1, LHS2; 3035 SDValue RHS1, RHS2; 3036 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3037 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3038 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3039 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3040 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3041 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3042 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3043 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3044 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3045 } 3046 3047 return SDValue(); 3048} 3049 3050SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3051 SDValue Chain = Op.getOperand(0); 3052 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3053 SDValue LHS = Op.getOperand(2); 3054 SDValue RHS = Op.getOperand(3); 3055 SDValue Dest = Op.getOperand(4); 3056 DebugLoc dl = Op.getDebugLoc(); 3057 3058 if (LHS.getValueType() == MVT::i32) { 3059 SDValue ARMcc; 3060 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3061 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3062 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3063 Chain, Dest, ARMcc, CCR, Cmp); 3064 } 3065 3066 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3067 3068 if (getTargetMachine().Options.UnsafeFPMath && 3069 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3070 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3071 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3072 if (Result.getNode()) 3073 return Result; 3074 } 3075 3076 ARMCC::CondCodes CondCode, CondCode2; 3077 FPCCToARMCC(CC, CondCode, CondCode2); 3078 3079 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3080 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3081 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3082 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3083 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3084 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3085 if (CondCode2 != ARMCC::AL) { 3086 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3087 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3088 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3089 } 3090 return Res; 3091} 3092 3093SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3094 SDValue Chain = Op.getOperand(0); 3095 SDValue Table = Op.getOperand(1); 3096 SDValue Index = Op.getOperand(2); 3097 DebugLoc dl = Op.getDebugLoc(); 3098 3099 EVT PTy = getPointerTy(); 3100 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3101 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3102 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3103 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3104 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3105 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3106 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3107 if (Subtarget->isThumb2()) { 3108 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3109 // which does another jump to the destination. This also makes it easier 3110 // to translate it to TBB / TBH later. 3111 // FIXME: This might not work if the function is extremely large. 3112 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3113 Addr, Op.getOperand(2), JTI, UId); 3114 } 3115 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3116 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3117 MachinePointerInfo::getJumpTable(), 3118 false, false, false, 0); 3119 Chain = Addr.getValue(1); 3120 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3121 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3122 } else { 3123 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3124 MachinePointerInfo::getJumpTable(), 3125 false, false, false, 0); 3126 Chain = Addr.getValue(1); 3127 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3128 } 3129} 3130 3131static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3132 EVT VT = Op.getValueType(); 3133 DebugLoc dl = Op.getDebugLoc(); 3134 3135 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3136 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3137 return Op; 3138 return DAG.UnrollVectorOp(Op.getNode()); 3139 } 3140 3141 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3142 "Invalid type for custom lowering!"); 3143 if (VT != MVT::v4i16) 3144 return DAG.UnrollVectorOp(Op.getNode()); 3145 3146 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3147 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3148} 3149 3150static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3151 EVT VT = Op.getValueType(); 3152 if (VT.isVector()) 3153 return LowerVectorFP_TO_INT(Op, DAG); 3154 3155 DebugLoc dl = Op.getDebugLoc(); 3156 unsigned Opc; 3157 3158 switch (Op.getOpcode()) { 3159 default: llvm_unreachable("Invalid opcode!"); 3160 case ISD::FP_TO_SINT: 3161 Opc = ARMISD::FTOSI; 3162 break; 3163 case ISD::FP_TO_UINT: 3164 Opc = ARMISD::FTOUI; 3165 break; 3166 } 3167 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3168 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3169} 3170 3171static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3172 EVT VT = Op.getValueType(); 3173 DebugLoc dl = Op.getDebugLoc(); 3174 3175 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3176 if (VT.getVectorElementType() == MVT::f32) 3177 return Op; 3178 return DAG.UnrollVectorOp(Op.getNode()); 3179 } 3180 3181 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3182 "Invalid type for custom lowering!"); 3183 if (VT != MVT::v4f32) 3184 return DAG.UnrollVectorOp(Op.getNode()); 3185 3186 unsigned CastOpc; 3187 unsigned Opc; 3188 switch (Op.getOpcode()) { 3189 default: llvm_unreachable("Invalid opcode!"); 3190 case ISD::SINT_TO_FP: 3191 CastOpc = ISD::SIGN_EXTEND; 3192 Opc = ISD::SINT_TO_FP; 3193 break; 3194 case ISD::UINT_TO_FP: 3195 CastOpc = ISD::ZERO_EXTEND; 3196 Opc = ISD::UINT_TO_FP; 3197 break; 3198 } 3199 3200 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3201 return DAG.getNode(Opc, dl, VT, Op); 3202} 3203 3204static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3205 EVT VT = Op.getValueType(); 3206 if (VT.isVector()) 3207 return LowerVectorINT_TO_FP(Op, DAG); 3208 3209 DebugLoc dl = Op.getDebugLoc(); 3210 unsigned Opc; 3211 3212 switch (Op.getOpcode()) { 3213 default: llvm_unreachable("Invalid opcode!"); 3214 case ISD::SINT_TO_FP: 3215 Opc = ARMISD::SITOF; 3216 break; 3217 case ISD::UINT_TO_FP: 3218 Opc = ARMISD::UITOF; 3219 break; 3220 } 3221 3222 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3223 return DAG.getNode(Opc, dl, VT, Op); 3224} 3225 3226SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3227 // Implement fcopysign with a fabs and a conditional fneg. 3228 SDValue Tmp0 = Op.getOperand(0); 3229 SDValue Tmp1 = Op.getOperand(1); 3230 DebugLoc dl = Op.getDebugLoc(); 3231 EVT VT = Op.getValueType(); 3232 EVT SrcVT = Tmp1.getValueType(); 3233 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3234 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3235 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3236 3237 if (UseNEON) { 3238 // Use VBSL to copy the sign bit. 3239 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3240 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3241 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3242 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3243 if (VT == MVT::f64) 3244 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3245 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3246 DAG.getConstant(32, MVT::i32)); 3247 else /*if (VT == MVT::f32)*/ 3248 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3249 if (SrcVT == MVT::f32) { 3250 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3251 if (VT == MVT::f64) 3252 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3253 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3254 DAG.getConstant(32, MVT::i32)); 3255 } else if (VT == MVT::f32) 3256 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3257 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3258 DAG.getConstant(32, MVT::i32)); 3259 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3260 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3261 3262 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3263 MVT::i32); 3264 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3265 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3266 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3267 3268 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3269 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3270 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3271 if (VT == MVT::f32) { 3272 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3273 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3274 DAG.getConstant(0, MVT::i32)); 3275 } else { 3276 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3277 } 3278 3279 return Res; 3280 } 3281 3282 // Bitcast operand 1 to i32. 3283 if (SrcVT == MVT::f64) 3284 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3285 &Tmp1, 1).getValue(1); 3286 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3287 3288 // Or in the signbit with integer operations. 3289 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3290 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3291 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3292 if (VT == MVT::f32) { 3293 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3294 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3295 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3296 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3297 } 3298 3299 // f64: Or the high part with signbit and then combine two parts. 3300 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3301 &Tmp0, 1); 3302 SDValue Lo = Tmp0.getValue(0); 3303 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3304 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3305 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3306} 3307 3308SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3309 MachineFunction &MF = DAG.getMachineFunction(); 3310 MachineFrameInfo *MFI = MF.getFrameInfo(); 3311 MFI->setReturnAddressIsTaken(true); 3312 3313 EVT VT = Op.getValueType(); 3314 DebugLoc dl = Op.getDebugLoc(); 3315 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3316 if (Depth) { 3317 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3318 SDValue Offset = DAG.getConstant(4, MVT::i32); 3319 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3320 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3321 MachinePointerInfo(), false, false, false, 0); 3322 } 3323 3324 // Return LR, which contains the return address. Mark it an implicit live-in. 3325 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3326 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3327} 3328 3329SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3330 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3331 MFI->setFrameAddressIsTaken(true); 3332 3333 EVT VT = Op.getValueType(); 3334 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3335 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3336 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3337 ? ARM::R7 : ARM::R11; 3338 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3339 while (Depth--) 3340 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3341 MachinePointerInfo(), 3342 false, false, false, 0); 3343 return FrameAddr; 3344} 3345 3346/// ExpandBITCAST - If the target supports VFP, this function is called to 3347/// expand a bit convert where either the source or destination type is i64 to 3348/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3349/// operand type is illegal (e.g., v2f32 for a target that doesn't support 3350/// vectors), since the legalizer won't know what to do with that. 3351static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3352 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3353 DebugLoc dl = N->getDebugLoc(); 3354 SDValue Op = N->getOperand(0); 3355 3356 // This function is only supposed to be called for i64 types, either as the 3357 // source or destination of the bit convert. 3358 EVT SrcVT = Op.getValueType(); 3359 EVT DstVT = N->getValueType(0); 3360 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3361 "ExpandBITCAST called for non-i64 type"); 3362 3363 // Turn i64->f64 into VMOVDRR. 3364 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3365 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3366 DAG.getConstant(0, MVT::i32)); 3367 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3368 DAG.getConstant(1, MVT::i32)); 3369 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3370 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3371 } 3372 3373 // Turn f64->i64 into VMOVRRD. 3374 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3375 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3376 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3377 // Merge the pieces into a single i64 value. 3378 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3379 } 3380 3381 return SDValue(); 3382} 3383 3384/// getZeroVector - Returns a vector of specified type with all zero elements. 3385/// Zero vectors are used to represent vector negation and in those cases 3386/// will be implemented with the NEON VNEG instruction. However, VNEG does 3387/// not support i64 elements, so sometimes the zero vectors will need to be 3388/// explicitly constructed. Regardless, use a canonical VMOV to create the 3389/// zero vector. 3390static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3391 assert(VT.isVector() && "Expected a vector type"); 3392 // The canonical modified immediate encoding of a zero vector is....0! 3393 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3394 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3395 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3396 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3397} 3398 3399/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3400/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3401SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3402 SelectionDAG &DAG) const { 3403 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3404 EVT VT = Op.getValueType(); 3405 unsigned VTBits = VT.getSizeInBits(); 3406 DebugLoc dl = Op.getDebugLoc(); 3407 SDValue ShOpLo = Op.getOperand(0); 3408 SDValue ShOpHi = Op.getOperand(1); 3409 SDValue ShAmt = Op.getOperand(2); 3410 SDValue ARMcc; 3411 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3412 3413 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3414 3415 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3416 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3417 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3418 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3419 DAG.getConstant(VTBits, MVT::i32)); 3420 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3421 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3422 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3423 3424 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3425 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3426 ARMcc, DAG, dl); 3427 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3428 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3429 CCR, Cmp); 3430 3431 SDValue Ops[2] = { Lo, Hi }; 3432 return DAG.getMergeValues(Ops, 2, dl); 3433} 3434 3435/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3436/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3437SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3438 SelectionDAG &DAG) const { 3439 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3440 EVT VT = Op.getValueType(); 3441 unsigned VTBits = VT.getSizeInBits(); 3442 DebugLoc dl = Op.getDebugLoc(); 3443 SDValue ShOpLo = Op.getOperand(0); 3444 SDValue ShOpHi = Op.getOperand(1); 3445 SDValue ShAmt = Op.getOperand(2); 3446 SDValue ARMcc; 3447 3448 assert(Op.getOpcode() == ISD::SHL_PARTS); 3449 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3450 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3451 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3452 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3453 DAG.getConstant(VTBits, MVT::i32)); 3454 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3455 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3456 3457 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3458 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3459 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3460 ARMcc, DAG, dl); 3461 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3462 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3463 CCR, Cmp); 3464 3465 SDValue Ops[2] = { Lo, Hi }; 3466 return DAG.getMergeValues(Ops, 2, dl); 3467} 3468 3469SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3470 SelectionDAG &DAG) const { 3471 // The rounding mode is in bits 23:22 of the FPSCR. 3472 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3473 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3474 // so that the shift + and get folded into a bitfield extract. 3475 DebugLoc dl = Op.getDebugLoc(); 3476 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3477 DAG.getConstant(Intrinsic::arm_get_fpscr, 3478 MVT::i32)); 3479 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3480 DAG.getConstant(1U << 22, MVT::i32)); 3481 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3482 DAG.getConstant(22, MVT::i32)); 3483 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3484 DAG.getConstant(3, MVT::i32)); 3485} 3486 3487static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3488 const ARMSubtarget *ST) { 3489 EVT VT = N->getValueType(0); 3490 DebugLoc dl = N->getDebugLoc(); 3491 3492 if (!ST->hasV6T2Ops()) 3493 return SDValue(); 3494 3495 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3496 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3497} 3498 3499static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3500 const ARMSubtarget *ST) { 3501 EVT VT = N->getValueType(0); 3502 DebugLoc dl = N->getDebugLoc(); 3503 3504 if (!VT.isVector()) 3505 return SDValue(); 3506 3507 // Lower vector shifts on NEON to use VSHL. 3508 assert(ST->hasNEON() && "unexpected vector shift"); 3509 3510 // Left shifts translate directly to the vshiftu intrinsic. 3511 if (N->getOpcode() == ISD::SHL) 3512 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3513 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3514 N->getOperand(0), N->getOperand(1)); 3515 3516 assert((N->getOpcode() == ISD::SRA || 3517 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3518 3519 // NEON uses the same intrinsics for both left and right shifts. For 3520 // right shifts, the shift amounts are negative, so negate the vector of 3521 // shift amounts. 3522 EVT ShiftVT = N->getOperand(1).getValueType(); 3523 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3524 getZeroVector(ShiftVT, DAG, dl), 3525 N->getOperand(1)); 3526 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3527 Intrinsic::arm_neon_vshifts : 3528 Intrinsic::arm_neon_vshiftu); 3529 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3530 DAG.getConstant(vshiftInt, MVT::i32), 3531 N->getOperand(0), NegatedCount); 3532} 3533 3534static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3535 const ARMSubtarget *ST) { 3536 EVT VT = N->getValueType(0); 3537 DebugLoc dl = N->getDebugLoc(); 3538 3539 // We can get here for a node like i32 = ISD::SHL i32, i64 3540 if (VT != MVT::i64) 3541 return SDValue(); 3542 3543 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3544 "Unknown shift to lower!"); 3545 3546 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3547 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3548 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3549 return SDValue(); 3550 3551 // If we are in thumb mode, we don't have RRX. 3552 if (ST->isThumb1Only()) return SDValue(); 3553 3554 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3555 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3556 DAG.getConstant(0, MVT::i32)); 3557 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3558 DAG.getConstant(1, MVT::i32)); 3559 3560 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3561 // captures the result into a carry flag. 3562 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3563 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3564 3565 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3566 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3567 3568 // Merge the pieces into a single i64 value. 3569 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3570} 3571 3572static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3573 SDValue TmpOp0, TmpOp1; 3574 bool Invert = false; 3575 bool Swap = false; 3576 unsigned Opc = 0; 3577 3578 SDValue Op0 = Op.getOperand(0); 3579 SDValue Op1 = Op.getOperand(1); 3580 SDValue CC = Op.getOperand(2); 3581 EVT VT = Op.getValueType(); 3582 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3583 DebugLoc dl = Op.getDebugLoc(); 3584 3585 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3586 switch (SetCCOpcode) { 3587 default: llvm_unreachable("Illegal FP comparison"); 3588 case ISD::SETUNE: 3589 case ISD::SETNE: Invert = true; // Fallthrough 3590 case ISD::SETOEQ: 3591 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3592 case ISD::SETOLT: 3593 case ISD::SETLT: Swap = true; // Fallthrough 3594 case ISD::SETOGT: 3595 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3596 case ISD::SETOLE: 3597 case ISD::SETLE: Swap = true; // Fallthrough 3598 case ISD::SETOGE: 3599 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3600 case ISD::SETUGE: Swap = true; // Fallthrough 3601 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3602 case ISD::SETUGT: Swap = true; // Fallthrough 3603 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3604 case ISD::SETUEQ: Invert = true; // Fallthrough 3605 case ISD::SETONE: 3606 // Expand this to (OLT | OGT). 3607 TmpOp0 = Op0; 3608 TmpOp1 = Op1; 3609 Opc = ISD::OR; 3610 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3611 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3612 break; 3613 case ISD::SETUO: Invert = true; // Fallthrough 3614 case ISD::SETO: 3615 // Expand this to (OLT | OGE). 3616 TmpOp0 = Op0; 3617 TmpOp1 = Op1; 3618 Opc = ISD::OR; 3619 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3620 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3621 break; 3622 } 3623 } else { 3624 // Integer comparisons. 3625 switch (SetCCOpcode) { 3626 default: llvm_unreachable("Illegal integer comparison"); 3627 case ISD::SETNE: Invert = true; 3628 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3629 case ISD::SETLT: Swap = true; 3630 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3631 case ISD::SETLE: Swap = true; 3632 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3633 case ISD::SETULT: Swap = true; 3634 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3635 case ISD::SETULE: Swap = true; 3636 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3637 } 3638 3639 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3640 if (Opc == ARMISD::VCEQ) { 3641 3642 SDValue AndOp; 3643 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3644 AndOp = Op0; 3645 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3646 AndOp = Op1; 3647 3648 // Ignore bitconvert. 3649 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3650 AndOp = AndOp.getOperand(0); 3651 3652 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3653 Opc = ARMISD::VTST; 3654 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3655 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3656 Invert = !Invert; 3657 } 3658 } 3659 } 3660 3661 if (Swap) 3662 std::swap(Op0, Op1); 3663 3664 // If one of the operands is a constant vector zero, attempt to fold the 3665 // comparison to a specialized compare-against-zero form. 3666 SDValue SingleOp; 3667 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3668 SingleOp = Op0; 3669 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3670 if (Opc == ARMISD::VCGE) 3671 Opc = ARMISD::VCLEZ; 3672 else if (Opc == ARMISD::VCGT) 3673 Opc = ARMISD::VCLTZ; 3674 SingleOp = Op1; 3675 } 3676 3677 SDValue Result; 3678 if (SingleOp.getNode()) { 3679 switch (Opc) { 3680 case ARMISD::VCEQ: 3681 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3682 case ARMISD::VCGE: 3683 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3684 case ARMISD::VCLEZ: 3685 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3686 case ARMISD::VCGT: 3687 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3688 case ARMISD::VCLTZ: 3689 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3690 default: 3691 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3692 } 3693 } else { 3694 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3695 } 3696 3697 if (Invert) 3698 Result = DAG.getNOT(dl, Result, VT); 3699 3700 return Result; 3701} 3702 3703/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3704/// valid vector constant for a NEON instruction with a "modified immediate" 3705/// operand (e.g., VMOV). If so, return the encoded value. 3706static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3707 unsigned SplatBitSize, SelectionDAG &DAG, 3708 EVT &VT, bool is128Bits, NEONModImmType type) { 3709 unsigned OpCmode, Imm; 3710 3711 // SplatBitSize is set to the smallest size that splats the vector, so a 3712 // zero vector will always have SplatBitSize == 8. However, NEON modified 3713 // immediate instructions others than VMOV do not support the 8-bit encoding 3714 // of a zero vector, and the default encoding of zero is supposed to be the 3715 // 32-bit version. 3716 if (SplatBits == 0) 3717 SplatBitSize = 32; 3718 3719 switch (SplatBitSize) { 3720 case 8: 3721 if (type != VMOVModImm) 3722 return SDValue(); 3723 // Any 1-byte value is OK. Op=0, Cmode=1110. 3724 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3725 OpCmode = 0xe; 3726 Imm = SplatBits; 3727 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3728 break; 3729 3730 case 16: 3731 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3732 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3733 if ((SplatBits & ~0xff) == 0) { 3734 // Value = 0x00nn: Op=x, Cmode=100x. 3735 OpCmode = 0x8; 3736 Imm = SplatBits; 3737 break; 3738 } 3739 if ((SplatBits & ~0xff00) == 0) { 3740 // Value = 0xnn00: Op=x, Cmode=101x. 3741 OpCmode = 0xa; 3742 Imm = SplatBits >> 8; 3743 break; 3744 } 3745 return SDValue(); 3746 3747 case 32: 3748 // NEON's 32-bit VMOV supports splat values where: 3749 // * only one byte is nonzero, or 3750 // * the least significant byte is 0xff and the second byte is nonzero, or 3751 // * the least significant 2 bytes are 0xff and the third is nonzero. 3752 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3753 if ((SplatBits & ~0xff) == 0) { 3754 // Value = 0x000000nn: Op=x, Cmode=000x. 3755 OpCmode = 0; 3756 Imm = SplatBits; 3757 break; 3758 } 3759 if ((SplatBits & ~0xff00) == 0) { 3760 // Value = 0x0000nn00: Op=x, Cmode=001x. 3761 OpCmode = 0x2; 3762 Imm = SplatBits >> 8; 3763 break; 3764 } 3765 if ((SplatBits & ~0xff0000) == 0) { 3766 // Value = 0x00nn0000: Op=x, Cmode=010x. 3767 OpCmode = 0x4; 3768 Imm = SplatBits >> 16; 3769 break; 3770 } 3771 if ((SplatBits & ~0xff000000) == 0) { 3772 // Value = 0xnn000000: Op=x, Cmode=011x. 3773 OpCmode = 0x6; 3774 Imm = SplatBits >> 24; 3775 break; 3776 } 3777 3778 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 3779 if (type == OtherModImm) return SDValue(); 3780 3781 if ((SplatBits & ~0xffff) == 0 && 3782 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3783 // Value = 0x0000nnff: Op=x, Cmode=1100. 3784 OpCmode = 0xc; 3785 Imm = SplatBits >> 8; 3786 SplatBits |= 0xff; 3787 break; 3788 } 3789 3790 if ((SplatBits & ~0xffffff) == 0 && 3791 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3792 // Value = 0x00nnffff: Op=x, Cmode=1101. 3793 OpCmode = 0xd; 3794 Imm = SplatBits >> 16; 3795 SplatBits |= 0xffff; 3796 break; 3797 } 3798 3799 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3800 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3801 // VMOV.I32. A (very) minor optimization would be to replicate the value 3802 // and fall through here to test for a valid 64-bit splat. But, then the 3803 // caller would also need to check and handle the change in size. 3804 return SDValue(); 3805 3806 case 64: { 3807 if (type != VMOVModImm) 3808 return SDValue(); 3809 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3810 uint64_t BitMask = 0xff; 3811 uint64_t Val = 0; 3812 unsigned ImmMask = 1; 3813 Imm = 0; 3814 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3815 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3816 Val |= BitMask; 3817 Imm |= ImmMask; 3818 } else if ((SplatBits & BitMask) != 0) { 3819 return SDValue(); 3820 } 3821 BitMask <<= 8; 3822 ImmMask <<= 1; 3823 } 3824 // Op=1, Cmode=1110. 3825 OpCmode = 0x1e; 3826 SplatBits = Val; 3827 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3828 break; 3829 } 3830 3831 default: 3832 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3833 } 3834 3835 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3836 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3837} 3838 3839SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 3840 const ARMSubtarget *ST) const { 3841 if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) 3842 return SDValue(); 3843 3844 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 3845 assert(Op.getValueType() == MVT::f32 && 3846 "ConstantFP custom lowering should only occur for f32."); 3847 3848 // Try splatting with a VMOV.f32... 3849 APFloat FPVal = CFP->getValueAPF(); 3850 int ImmVal = ARM_AM::getFP32Imm(FPVal); 3851 if (ImmVal != -1) { 3852 DebugLoc DL = Op.getDebugLoc(); 3853 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 3854 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 3855 NewVal); 3856 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 3857 DAG.getConstant(0, MVT::i32)); 3858 } 3859 3860 // If that fails, try a VMOV.i32 3861 EVT VMovVT; 3862 unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); 3863 SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, 3864 VMOVModImm); 3865 if (NewVal != SDValue()) { 3866 DebugLoc DL = Op.getDebugLoc(); 3867 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 3868 NewVal); 3869 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 3870 VecConstant); 3871 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 3872 DAG.getConstant(0, MVT::i32)); 3873 } 3874 3875 // Finally, try a VMVN.i32 3876 NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, 3877 VMVNModImm); 3878 if (NewVal != SDValue()) { 3879 DebugLoc DL = Op.getDebugLoc(); 3880 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 3881 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 3882 VecConstant); 3883 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 3884 DAG.getConstant(0, MVT::i32)); 3885 } 3886 3887 return SDValue(); 3888} 3889 3890 3891static bool isVEXTMask(ArrayRef<int> M, EVT VT, 3892 bool &ReverseVEXT, unsigned &Imm) { 3893 unsigned NumElts = VT.getVectorNumElements(); 3894 ReverseVEXT = false; 3895 3896 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3897 if (M[0] < 0) 3898 return false; 3899 3900 Imm = M[0]; 3901 3902 // If this is a VEXT shuffle, the immediate value is the index of the first 3903 // element. The other shuffle indices must be the successive elements after 3904 // the first one. 3905 unsigned ExpectedElt = Imm; 3906 for (unsigned i = 1; i < NumElts; ++i) { 3907 // Increment the expected index. If it wraps around, it may still be 3908 // a VEXT but the source vectors must be swapped. 3909 ExpectedElt += 1; 3910 if (ExpectedElt == NumElts * 2) { 3911 ExpectedElt = 0; 3912 ReverseVEXT = true; 3913 } 3914 3915 if (M[i] < 0) continue; // ignore UNDEF indices 3916 if (ExpectedElt != static_cast<unsigned>(M[i])) 3917 return false; 3918 } 3919 3920 // Adjust the index value if the source operands will be swapped. 3921 if (ReverseVEXT) 3922 Imm -= NumElts; 3923 3924 return true; 3925} 3926 3927/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3928/// instruction with the specified blocksize. (The order of the elements 3929/// within each block of the vector is reversed.) 3930static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 3931 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3932 "Only possible block sizes for VREV are: 16, 32, 64"); 3933 3934 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3935 if (EltSz == 64) 3936 return false; 3937 3938 unsigned NumElts = VT.getVectorNumElements(); 3939 unsigned BlockElts = M[0] + 1; 3940 // If the first shuffle index is UNDEF, be optimistic. 3941 if (M[0] < 0) 3942 BlockElts = BlockSize / EltSz; 3943 3944 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3945 return false; 3946 3947 for (unsigned i = 0; i < NumElts; ++i) { 3948 if (M[i] < 0) continue; // ignore UNDEF indices 3949 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3950 return false; 3951 } 3952 3953 return true; 3954} 3955 3956static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 3957 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 3958 // range, then 0 is placed into the resulting vector. So pretty much any mask 3959 // of 8 elements can work here. 3960 return VT == MVT::v8i8 && M.size() == 8; 3961} 3962 3963static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 3964 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3965 if (EltSz == 64) 3966 return false; 3967 3968 unsigned NumElts = VT.getVectorNumElements(); 3969 WhichResult = (M[0] == 0 ? 0 : 1); 3970 for (unsigned i = 0; i < NumElts; i += 2) { 3971 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3972 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3973 return false; 3974 } 3975 return true; 3976} 3977 3978/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3979/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3980/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3981static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 3982 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3983 if (EltSz == 64) 3984 return false; 3985 3986 unsigned NumElts = VT.getVectorNumElements(); 3987 WhichResult = (M[0] == 0 ? 0 : 1); 3988 for (unsigned i = 0; i < NumElts; i += 2) { 3989 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3990 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3991 return false; 3992 } 3993 return true; 3994} 3995 3996static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 3997 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3998 if (EltSz == 64) 3999 return false; 4000 4001 unsigned NumElts = VT.getVectorNumElements(); 4002 WhichResult = (M[0] == 0 ? 0 : 1); 4003 for (unsigned i = 0; i != NumElts; ++i) { 4004 if (M[i] < 0) continue; // ignore UNDEF indices 4005 if ((unsigned) M[i] != 2 * i + WhichResult) 4006 return false; 4007 } 4008 4009 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4010 if (VT.is64BitVector() && EltSz == 32) 4011 return false; 4012 4013 return true; 4014} 4015 4016/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 4017/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4018/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 4019static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4020 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4021 if (EltSz == 64) 4022 return false; 4023 4024 unsigned Half = VT.getVectorNumElements() / 2; 4025 WhichResult = (M[0] == 0 ? 0 : 1); 4026 for (unsigned j = 0; j != 2; ++j) { 4027 unsigned Idx = WhichResult; 4028 for (unsigned i = 0; i != Half; ++i) { 4029 int MIdx = M[i + j * Half]; 4030 if (MIdx >= 0 && (unsigned) MIdx != Idx) 4031 return false; 4032 Idx += 2; 4033 } 4034 } 4035 4036 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4037 if (VT.is64BitVector() && EltSz == 32) 4038 return false; 4039 4040 return true; 4041} 4042 4043static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4044 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4045 if (EltSz == 64) 4046 return false; 4047 4048 unsigned NumElts = VT.getVectorNumElements(); 4049 WhichResult = (M[0] == 0 ? 0 : 1); 4050 unsigned Idx = WhichResult * NumElts / 2; 4051 for (unsigned i = 0; i != NumElts; i += 2) { 4052 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4053 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 4054 return false; 4055 Idx += 1; 4056 } 4057 4058 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4059 if (VT.is64BitVector() && EltSz == 32) 4060 return false; 4061 4062 return true; 4063} 4064 4065/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 4066/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4067/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 4068static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4069 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4070 if (EltSz == 64) 4071 return false; 4072 4073 unsigned NumElts = VT.getVectorNumElements(); 4074 WhichResult = (M[0] == 0 ? 0 : 1); 4075 unsigned Idx = WhichResult * NumElts / 2; 4076 for (unsigned i = 0; i != NumElts; i += 2) { 4077 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4078 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 4079 return false; 4080 Idx += 1; 4081 } 4082 4083 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4084 if (VT.is64BitVector() && EltSz == 32) 4085 return false; 4086 4087 return true; 4088} 4089 4090// If N is an integer constant that can be moved into a register in one 4091// instruction, return an SDValue of such a constant (will become a MOV 4092// instruction). Otherwise return null. 4093static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 4094 const ARMSubtarget *ST, DebugLoc dl) { 4095 uint64_t Val; 4096 if (!isa<ConstantSDNode>(N)) 4097 return SDValue(); 4098 Val = cast<ConstantSDNode>(N)->getZExtValue(); 4099 4100 if (ST->isThumb1Only()) { 4101 if (Val <= 255 || ~Val <= 255) 4102 return DAG.getConstant(Val, MVT::i32); 4103 } else { 4104 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 4105 return DAG.getConstant(Val, MVT::i32); 4106 } 4107 return SDValue(); 4108} 4109 4110// If this is a case we can't handle, return null and let the default 4111// expansion code take care of it. 4112SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4113 const ARMSubtarget *ST) const { 4114 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4115 DebugLoc dl = Op.getDebugLoc(); 4116 EVT VT = Op.getValueType(); 4117 4118 APInt SplatBits, SplatUndef; 4119 unsigned SplatBitSize; 4120 bool HasAnyUndefs; 4121 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 4122 if (SplatBitSize <= 64) { 4123 // Check if an immediate VMOV works. 4124 EVT VmovVT; 4125 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 4126 SplatUndef.getZExtValue(), SplatBitSize, 4127 DAG, VmovVT, VT.is128BitVector(), 4128 VMOVModImm); 4129 if (Val.getNode()) { 4130 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 4131 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4132 } 4133 4134 // Try an immediate VMVN. 4135 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 4136 Val = isNEONModifiedImm(NegatedImm, 4137 SplatUndef.getZExtValue(), SplatBitSize, 4138 DAG, VmovVT, VT.is128BitVector(), 4139 VMVNModImm); 4140 if (Val.getNode()) { 4141 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4142 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4143 } 4144 4145 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4146 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 4147 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4148 if (ImmVal != -1) { 4149 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4150 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4151 } 4152 } 4153 } 4154 } 4155 4156 // Scan through the operands to see if only one value is used. 4157 unsigned NumElts = VT.getVectorNumElements(); 4158 bool isOnlyLowElement = true; 4159 bool usesOnlyOneValue = true; 4160 bool isConstant = true; 4161 SDValue Value; 4162 for (unsigned i = 0; i < NumElts; ++i) { 4163 SDValue V = Op.getOperand(i); 4164 if (V.getOpcode() == ISD::UNDEF) 4165 continue; 4166 if (i > 0) 4167 isOnlyLowElement = false; 4168 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4169 isConstant = false; 4170 4171 if (!Value.getNode()) 4172 Value = V; 4173 else if (V != Value) 4174 usesOnlyOneValue = false; 4175 } 4176 4177 if (!Value.getNode()) 4178 return DAG.getUNDEF(VT); 4179 4180 if (isOnlyLowElement) 4181 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4182 4183 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4184 4185 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4186 // i32 and try again. 4187 if (usesOnlyOneValue && EltSize <= 32) { 4188 if (!isConstant) 4189 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4190 if (VT.getVectorElementType().isFloatingPoint()) { 4191 SmallVector<SDValue, 8> Ops; 4192 for (unsigned i = 0; i < NumElts; ++i) 4193 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4194 Op.getOperand(i))); 4195 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4196 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4197 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4198 if (Val.getNode()) 4199 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4200 } 4201 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4202 if (Val.getNode()) 4203 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4204 } 4205 4206 // If all elements are constants and the case above didn't get hit, fall back 4207 // to the default expansion, which will generate a load from the constant 4208 // pool. 4209 if (isConstant) 4210 return SDValue(); 4211 4212 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4213 if (NumElts >= 4) { 4214 SDValue shuffle = ReconstructShuffle(Op, DAG); 4215 if (shuffle != SDValue()) 4216 return shuffle; 4217 } 4218 4219 // Vectors with 32- or 64-bit elements can be built by directly assigning 4220 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4221 // will be legalized. 4222 if (EltSize >= 32) { 4223 // Do the expansion with floating-point types, since that is what the VFP 4224 // registers are defined to use, and since i64 is not legal. 4225 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4226 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4227 SmallVector<SDValue, 8> Ops; 4228 for (unsigned i = 0; i < NumElts; ++i) 4229 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4230 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4231 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4232 } 4233 4234 return SDValue(); 4235} 4236 4237// Gather data to see if the operation can be modelled as a 4238// shuffle in combination with VEXTs. 4239SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4240 SelectionDAG &DAG) const { 4241 DebugLoc dl = Op.getDebugLoc(); 4242 EVT VT = Op.getValueType(); 4243 unsigned NumElts = VT.getVectorNumElements(); 4244 4245 SmallVector<SDValue, 2> SourceVecs; 4246 SmallVector<unsigned, 2> MinElts; 4247 SmallVector<unsigned, 2> MaxElts; 4248 4249 for (unsigned i = 0; i < NumElts; ++i) { 4250 SDValue V = Op.getOperand(i); 4251 if (V.getOpcode() == ISD::UNDEF) 4252 continue; 4253 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 4254 // A shuffle can only come from building a vector from various 4255 // elements of other vectors. 4256 return SDValue(); 4257 } else if (V.getOperand(0).getValueType().getVectorElementType() != 4258 VT.getVectorElementType()) { 4259 // This code doesn't know how to handle shuffles where the vector 4260 // element types do not match (this happens because type legalization 4261 // promotes the return type of EXTRACT_VECTOR_ELT). 4262 // FIXME: It might be appropriate to extend this code to handle 4263 // mismatched types. 4264 return SDValue(); 4265 } 4266 4267 // Record this extraction against the appropriate vector if possible... 4268 SDValue SourceVec = V.getOperand(0); 4269 // If the element number isn't a constant, we can't effectively 4270 // analyze what's going on. 4271 if (!isa<ConstantSDNode>(V.getOperand(1))) 4272 return SDValue(); 4273 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4274 bool FoundSource = false; 4275 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4276 if (SourceVecs[j] == SourceVec) { 4277 if (MinElts[j] > EltNo) 4278 MinElts[j] = EltNo; 4279 if (MaxElts[j] < EltNo) 4280 MaxElts[j] = EltNo; 4281 FoundSource = true; 4282 break; 4283 } 4284 } 4285 4286 // Or record a new source if not... 4287 if (!FoundSource) { 4288 SourceVecs.push_back(SourceVec); 4289 MinElts.push_back(EltNo); 4290 MaxElts.push_back(EltNo); 4291 } 4292 } 4293 4294 // Currently only do something sane when at most two source vectors 4295 // involved. 4296 if (SourceVecs.size() > 2) 4297 return SDValue(); 4298 4299 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4300 int VEXTOffsets[2] = {0, 0}; 4301 4302 // This loop extracts the usage patterns of the source vectors 4303 // and prepares appropriate SDValues for a shuffle if possible. 4304 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4305 if (SourceVecs[i].getValueType() == VT) { 4306 // No VEXT necessary 4307 ShuffleSrcs[i] = SourceVecs[i]; 4308 VEXTOffsets[i] = 0; 4309 continue; 4310 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4311 // It probably isn't worth padding out a smaller vector just to 4312 // break it down again in a shuffle. 4313 return SDValue(); 4314 } 4315 4316 // Since only 64-bit and 128-bit vectors are legal on ARM and 4317 // we've eliminated the other cases... 4318 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4319 "unexpected vector sizes in ReconstructShuffle"); 4320 4321 if (MaxElts[i] - MinElts[i] >= NumElts) { 4322 // Span too large for a VEXT to cope 4323 return SDValue(); 4324 } 4325 4326 if (MinElts[i] >= NumElts) { 4327 // The extraction can just take the second half 4328 VEXTOffsets[i] = NumElts; 4329 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4330 SourceVecs[i], 4331 DAG.getIntPtrConstant(NumElts)); 4332 } else if (MaxElts[i] < NumElts) { 4333 // The extraction can just take the first half 4334 VEXTOffsets[i] = 0; 4335 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4336 SourceVecs[i], 4337 DAG.getIntPtrConstant(0)); 4338 } else { 4339 // An actual VEXT is needed 4340 VEXTOffsets[i] = MinElts[i]; 4341 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4342 SourceVecs[i], 4343 DAG.getIntPtrConstant(0)); 4344 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4345 SourceVecs[i], 4346 DAG.getIntPtrConstant(NumElts)); 4347 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4348 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4349 } 4350 } 4351 4352 SmallVector<int, 8> Mask; 4353 4354 for (unsigned i = 0; i < NumElts; ++i) { 4355 SDValue Entry = Op.getOperand(i); 4356 if (Entry.getOpcode() == ISD::UNDEF) { 4357 Mask.push_back(-1); 4358 continue; 4359 } 4360 4361 SDValue ExtractVec = Entry.getOperand(0); 4362 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4363 .getOperand(1))->getSExtValue(); 4364 if (ExtractVec == SourceVecs[0]) { 4365 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4366 } else { 4367 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4368 } 4369 } 4370 4371 // Final check before we try to produce nonsense... 4372 if (isShuffleMaskLegal(Mask, VT)) 4373 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4374 &Mask[0]); 4375 4376 return SDValue(); 4377} 4378 4379/// isShuffleMaskLegal - Targets can use this to indicate that they only 4380/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4381/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4382/// are assumed to be legal. 4383bool 4384ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4385 EVT VT) const { 4386 if (VT.getVectorNumElements() == 4 && 4387 (VT.is128BitVector() || VT.is64BitVector())) { 4388 unsigned PFIndexes[4]; 4389 for (unsigned i = 0; i != 4; ++i) { 4390 if (M[i] < 0) 4391 PFIndexes[i] = 8; 4392 else 4393 PFIndexes[i] = M[i]; 4394 } 4395 4396 // Compute the index in the perfect shuffle table. 4397 unsigned PFTableIndex = 4398 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4399 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4400 unsigned Cost = (PFEntry >> 30); 4401 4402 if (Cost <= 4) 4403 return true; 4404 } 4405 4406 bool ReverseVEXT; 4407 unsigned Imm, WhichResult; 4408 4409 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4410 return (EltSize >= 32 || 4411 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4412 isVREVMask(M, VT, 64) || 4413 isVREVMask(M, VT, 32) || 4414 isVREVMask(M, VT, 16) || 4415 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4416 isVTBLMask(M, VT) || 4417 isVTRNMask(M, VT, WhichResult) || 4418 isVUZPMask(M, VT, WhichResult) || 4419 isVZIPMask(M, VT, WhichResult) || 4420 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4421 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4422 isVZIP_v_undef_Mask(M, VT, WhichResult)); 4423} 4424 4425/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4426/// the specified operations to build the shuffle. 4427static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4428 SDValue RHS, SelectionDAG &DAG, 4429 DebugLoc dl) { 4430 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4431 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4432 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4433 4434 enum { 4435 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4436 OP_VREV, 4437 OP_VDUP0, 4438 OP_VDUP1, 4439 OP_VDUP2, 4440 OP_VDUP3, 4441 OP_VEXT1, 4442 OP_VEXT2, 4443 OP_VEXT3, 4444 OP_VUZPL, // VUZP, left result 4445 OP_VUZPR, // VUZP, right result 4446 OP_VZIPL, // VZIP, left result 4447 OP_VZIPR, // VZIP, right result 4448 OP_VTRNL, // VTRN, left result 4449 OP_VTRNR // VTRN, right result 4450 }; 4451 4452 if (OpNum == OP_COPY) { 4453 if (LHSID == (1*9+2)*9+3) return LHS; 4454 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4455 return RHS; 4456 } 4457 4458 SDValue OpLHS, OpRHS; 4459 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4460 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4461 EVT VT = OpLHS.getValueType(); 4462 4463 switch (OpNum) { 4464 default: llvm_unreachable("Unknown shuffle opcode!"); 4465 case OP_VREV: 4466 // VREV divides the vector in half and swaps within the half. 4467 if (VT.getVectorElementType() == MVT::i32 || 4468 VT.getVectorElementType() == MVT::f32) 4469 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4470 // vrev <4 x i16> -> VREV32 4471 if (VT.getVectorElementType() == MVT::i16) 4472 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 4473 // vrev <4 x i8> -> VREV16 4474 assert(VT.getVectorElementType() == MVT::i8); 4475 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 4476 case OP_VDUP0: 4477 case OP_VDUP1: 4478 case OP_VDUP2: 4479 case OP_VDUP3: 4480 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4481 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4482 case OP_VEXT1: 4483 case OP_VEXT2: 4484 case OP_VEXT3: 4485 return DAG.getNode(ARMISD::VEXT, dl, VT, 4486 OpLHS, OpRHS, 4487 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4488 case OP_VUZPL: 4489 case OP_VUZPR: 4490 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4491 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4492 case OP_VZIPL: 4493 case OP_VZIPR: 4494 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4495 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4496 case OP_VTRNL: 4497 case OP_VTRNR: 4498 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4499 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4500 } 4501} 4502 4503static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4504 ArrayRef<int> ShuffleMask, 4505 SelectionDAG &DAG) { 4506 // Check to see if we can use the VTBL instruction. 4507 SDValue V1 = Op.getOperand(0); 4508 SDValue V2 = Op.getOperand(1); 4509 DebugLoc DL = Op.getDebugLoc(); 4510 4511 SmallVector<SDValue, 8> VTBLMask; 4512 for (ArrayRef<int>::iterator 4513 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4514 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4515 4516 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4517 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4518 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4519 &VTBLMask[0], 8)); 4520 4521 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4522 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4523 &VTBLMask[0], 8)); 4524} 4525 4526static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4527 SDValue V1 = Op.getOperand(0); 4528 SDValue V2 = Op.getOperand(1); 4529 DebugLoc dl = Op.getDebugLoc(); 4530 EVT VT = Op.getValueType(); 4531 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4532 4533 // Convert shuffles that are directly supported on NEON to target-specific 4534 // DAG nodes, instead of keeping them as shuffles and matching them again 4535 // during code selection. This is more efficient and avoids the possibility 4536 // of inconsistencies between legalization and selection. 4537 // FIXME: floating-point vectors should be canonicalized to integer vectors 4538 // of the same time so that they get CSEd properly. 4539 ArrayRef<int> ShuffleMask = SVN->getMask(); 4540 4541 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4542 if (EltSize <= 32) { 4543 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4544 int Lane = SVN->getSplatIndex(); 4545 // If this is undef splat, generate it via "just" vdup, if possible. 4546 if (Lane == -1) Lane = 0; 4547 4548 // Test if V1 is a SCALAR_TO_VECTOR. 4549 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4550 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4551 } 4552 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 4553 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 4554 // reaches it). 4555 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 4556 !isa<ConstantSDNode>(V1.getOperand(0))) { 4557 bool IsScalarToVector = true; 4558 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 4559 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 4560 IsScalarToVector = false; 4561 break; 4562 } 4563 if (IsScalarToVector) 4564 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4565 } 4566 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4567 DAG.getConstant(Lane, MVT::i32)); 4568 } 4569 4570 bool ReverseVEXT; 4571 unsigned Imm; 4572 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4573 if (ReverseVEXT) 4574 std::swap(V1, V2); 4575 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4576 DAG.getConstant(Imm, MVT::i32)); 4577 } 4578 4579 if (isVREVMask(ShuffleMask, VT, 64)) 4580 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4581 if (isVREVMask(ShuffleMask, VT, 32)) 4582 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4583 if (isVREVMask(ShuffleMask, VT, 16)) 4584 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4585 4586 // Check for Neon shuffles that modify both input vectors in place. 4587 // If both results are used, i.e., if there are two shuffles with the same 4588 // source operands and with masks corresponding to both results of one of 4589 // these operations, DAG memoization will ensure that a single node is 4590 // used for both shuffles. 4591 unsigned WhichResult; 4592 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4593 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4594 V1, V2).getValue(WhichResult); 4595 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4596 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4597 V1, V2).getValue(WhichResult); 4598 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4599 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4600 V1, V2).getValue(WhichResult); 4601 4602 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4603 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4604 V1, V1).getValue(WhichResult); 4605 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4606 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4607 V1, V1).getValue(WhichResult); 4608 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4609 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4610 V1, V1).getValue(WhichResult); 4611 } 4612 4613 // If the shuffle is not directly supported and it has 4 elements, use 4614 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4615 unsigned NumElts = VT.getVectorNumElements(); 4616 if (NumElts == 4) { 4617 unsigned PFIndexes[4]; 4618 for (unsigned i = 0; i != 4; ++i) { 4619 if (ShuffleMask[i] < 0) 4620 PFIndexes[i] = 8; 4621 else 4622 PFIndexes[i] = ShuffleMask[i]; 4623 } 4624 4625 // Compute the index in the perfect shuffle table. 4626 unsigned PFTableIndex = 4627 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4628 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4629 unsigned Cost = (PFEntry >> 30); 4630 4631 if (Cost <= 4) 4632 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4633 } 4634 4635 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 4636 if (EltSize >= 32) { 4637 // Do the expansion with floating-point types, since that is what the VFP 4638 // registers are defined to use, and since i64 is not legal. 4639 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4640 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4641 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 4642 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 4643 SmallVector<SDValue, 8> Ops; 4644 for (unsigned i = 0; i < NumElts; ++i) { 4645 if (ShuffleMask[i] < 0) 4646 Ops.push_back(DAG.getUNDEF(EltVT)); 4647 else 4648 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 4649 ShuffleMask[i] < (int)NumElts ? V1 : V2, 4650 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 4651 MVT::i32))); 4652 } 4653 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4654 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4655 } 4656 4657 if (VT == MVT::v8i8) { 4658 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 4659 if (NewOp.getNode()) 4660 return NewOp; 4661 } 4662 4663 return SDValue(); 4664} 4665 4666static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4667 // INSERT_VECTOR_ELT is legal only for immediate indexes. 4668 SDValue Lane = Op.getOperand(2); 4669 if (!isa<ConstantSDNode>(Lane)) 4670 return SDValue(); 4671 4672 return Op; 4673} 4674 4675static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4676 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 4677 SDValue Lane = Op.getOperand(1); 4678 if (!isa<ConstantSDNode>(Lane)) 4679 return SDValue(); 4680 4681 SDValue Vec = Op.getOperand(0); 4682 if (Op.getValueType() == MVT::i32 && 4683 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 4684 DebugLoc dl = Op.getDebugLoc(); 4685 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 4686 } 4687 4688 return Op; 4689} 4690 4691static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 4692 // The only time a CONCAT_VECTORS operation can have legal types is when 4693 // two 64-bit vectors are concatenated to a 128-bit vector. 4694 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 4695 "unexpected CONCAT_VECTORS"); 4696 DebugLoc dl = Op.getDebugLoc(); 4697 SDValue Val = DAG.getUNDEF(MVT::v2f64); 4698 SDValue Op0 = Op.getOperand(0); 4699 SDValue Op1 = Op.getOperand(1); 4700 if (Op0.getOpcode() != ISD::UNDEF) 4701 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4702 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 4703 DAG.getIntPtrConstant(0)); 4704 if (Op1.getOpcode() != ISD::UNDEF) 4705 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4706 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 4707 DAG.getIntPtrConstant(1)); 4708 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 4709} 4710 4711/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 4712/// element has been zero/sign-extended, depending on the isSigned parameter, 4713/// from an integer type half its size. 4714static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 4715 bool isSigned) { 4716 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 4717 EVT VT = N->getValueType(0); 4718 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 4719 SDNode *BVN = N->getOperand(0).getNode(); 4720 if (BVN->getValueType(0) != MVT::v4i32 || 4721 BVN->getOpcode() != ISD::BUILD_VECTOR) 4722 return false; 4723 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4724 unsigned HiElt = 1 - LoElt; 4725 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 4726 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 4727 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 4728 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 4729 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 4730 return false; 4731 if (isSigned) { 4732 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 4733 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 4734 return true; 4735 } else { 4736 if (Hi0->isNullValue() && Hi1->isNullValue()) 4737 return true; 4738 } 4739 return false; 4740 } 4741 4742 if (N->getOpcode() != ISD::BUILD_VECTOR) 4743 return false; 4744 4745 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 4746 SDNode *Elt = N->getOperand(i).getNode(); 4747 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 4748 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4749 unsigned HalfSize = EltSize / 2; 4750 if (isSigned) { 4751 if (!isIntN(HalfSize, C->getSExtValue())) 4752 return false; 4753 } else { 4754 if (!isUIntN(HalfSize, C->getZExtValue())) 4755 return false; 4756 } 4757 continue; 4758 } 4759 return false; 4760 } 4761 4762 return true; 4763} 4764 4765/// isSignExtended - Check if a node is a vector value that is sign-extended 4766/// or a constant BUILD_VECTOR with sign-extended elements. 4767static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 4768 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 4769 return true; 4770 if (isExtendedBUILD_VECTOR(N, DAG, true)) 4771 return true; 4772 return false; 4773} 4774 4775/// isZeroExtended - Check if a node is a vector value that is zero-extended 4776/// or a constant BUILD_VECTOR with zero-extended elements. 4777static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 4778 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 4779 return true; 4780 if (isExtendedBUILD_VECTOR(N, DAG, false)) 4781 return true; 4782 return false; 4783} 4784 4785/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending 4786/// load, or BUILD_VECTOR with extended elements, return the unextended value. 4787static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 4788 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 4789 return N->getOperand(0); 4790 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 4791 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 4792 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 4793 LD->isNonTemporal(), LD->isInvariant(), 4794 LD->getAlignment()); 4795 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 4796 // have been legalized as a BITCAST from v4i32. 4797 if (N->getOpcode() == ISD::BITCAST) { 4798 SDNode *BVN = N->getOperand(0).getNode(); 4799 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 4800 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 4801 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4802 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 4803 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 4804 } 4805 // Construct a new BUILD_VECTOR with elements truncated to half the size. 4806 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 4807 EVT VT = N->getValueType(0); 4808 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 4809 unsigned NumElts = VT.getVectorNumElements(); 4810 MVT TruncVT = MVT::getIntegerVT(EltSize); 4811 SmallVector<SDValue, 8> Ops; 4812 for (unsigned i = 0; i != NumElts; ++i) { 4813 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 4814 const APInt &CInt = C->getAPIntValue(); 4815 // Element types smaller than 32 bits are not legal, so use i32 elements. 4816 // The values are implicitly truncated so sext vs. zext doesn't matter. 4817 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 4818 } 4819 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 4820 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 4821} 4822 4823static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 4824 unsigned Opcode = N->getOpcode(); 4825 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4826 SDNode *N0 = N->getOperand(0).getNode(); 4827 SDNode *N1 = N->getOperand(1).getNode(); 4828 return N0->hasOneUse() && N1->hasOneUse() && 4829 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 4830 } 4831 return false; 4832} 4833 4834static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 4835 unsigned Opcode = N->getOpcode(); 4836 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4837 SDNode *N0 = N->getOperand(0).getNode(); 4838 SDNode *N1 = N->getOperand(1).getNode(); 4839 return N0->hasOneUse() && N1->hasOneUse() && 4840 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 4841 } 4842 return false; 4843} 4844 4845static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 4846 // Multiplications are only custom-lowered for 128-bit vectors so that 4847 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 4848 EVT VT = Op.getValueType(); 4849 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 4850 SDNode *N0 = Op.getOperand(0).getNode(); 4851 SDNode *N1 = Op.getOperand(1).getNode(); 4852 unsigned NewOpc = 0; 4853 bool isMLA = false; 4854 bool isN0SExt = isSignExtended(N0, DAG); 4855 bool isN1SExt = isSignExtended(N1, DAG); 4856 if (isN0SExt && isN1SExt) 4857 NewOpc = ARMISD::VMULLs; 4858 else { 4859 bool isN0ZExt = isZeroExtended(N0, DAG); 4860 bool isN1ZExt = isZeroExtended(N1, DAG); 4861 if (isN0ZExt && isN1ZExt) 4862 NewOpc = ARMISD::VMULLu; 4863 else if (isN1SExt || isN1ZExt) { 4864 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 4865 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 4866 if (isN1SExt && isAddSubSExt(N0, DAG)) { 4867 NewOpc = ARMISD::VMULLs; 4868 isMLA = true; 4869 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 4870 NewOpc = ARMISD::VMULLu; 4871 isMLA = true; 4872 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 4873 std::swap(N0, N1); 4874 NewOpc = ARMISD::VMULLu; 4875 isMLA = true; 4876 } 4877 } 4878 4879 if (!NewOpc) { 4880 if (VT == MVT::v2i64) 4881 // Fall through to expand this. It is not legal. 4882 return SDValue(); 4883 else 4884 // Other vector multiplications are legal. 4885 return Op; 4886 } 4887 } 4888 4889 // Legalize to a VMULL instruction. 4890 DebugLoc DL = Op.getDebugLoc(); 4891 SDValue Op0; 4892 SDValue Op1 = SkipExtension(N1, DAG); 4893 if (!isMLA) { 4894 Op0 = SkipExtension(N0, DAG); 4895 assert(Op0.getValueType().is64BitVector() && 4896 Op1.getValueType().is64BitVector() && 4897 "unexpected types for extended operands to VMULL"); 4898 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 4899 } 4900 4901 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 4902 // isel lowering to take advantage of no-stall back to back vmul + vmla. 4903 // vmull q0, d4, d6 4904 // vmlal q0, d5, d6 4905 // is faster than 4906 // vaddl q0, d4, d5 4907 // vmovl q1, d6 4908 // vmul q0, q0, q1 4909 SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); 4910 SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); 4911 EVT Op1VT = Op1.getValueType(); 4912 return DAG.getNode(N0->getOpcode(), DL, VT, 4913 DAG.getNode(NewOpc, DL, VT, 4914 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 4915 DAG.getNode(NewOpc, DL, VT, 4916 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 4917} 4918 4919static SDValue 4920LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 4921 // Convert to float 4922 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 4923 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 4924 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 4925 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 4926 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 4927 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 4928 // Get reciprocal estimate. 4929 // float4 recip = vrecpeq_f32(yf); 4930 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4931 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 4932 // Because char has a smaller range than uchar, we can actually get away 4933 // without any newton steps. This requires that we use a weird bias 4934 // of 0xb000, however (again, this has been exhaustively tested). 4935 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 4936 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 4937 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 4938 Y = DAG.getConstant(0xb000, MVT::i32); 4939 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 4940 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 4941 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 4942 // Convert back to short. 4943 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 4944 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 4945 return X; 4946} 4947 4948static SDValue 4949LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 4950 SDValue N2; 4951 // Convert to float. 4952 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 4953 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 4954 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 4955 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 4956 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4957 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4958 4959 // Use reciprocal estimate and one refinement step. 4960 // float4 recip = vrecpeq_f32(yf); 4961 // recip *= vrecpsq_f32(yf, recip); 4962 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4963 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4964 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4965 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4966 N1, N2); 4967 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4968 // Because short has a smaller range than ushort, we can actually get away 4969 // with only a single newton step. This requires that we use a weird bias 4970 // of 89, however (again, this has been exhaustively tested). 4971 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 4972 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4973 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4974 N1 = DAG.getConstant(0x89, MVT::i32); 4975 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4976 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4977 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4978 // Convert back to integer and return. 4979 // return vmovn_s32(vcvt_s32_f32(result)); 4980 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4981 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4982 return N0; 4983} 4984 4985static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 4986 EVT VT = Op.getValueType(); 4987 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4988 "unexpected type for custom-lowering ISD::SDIV"); 4989 4990 DebugLoc dl = Op.getDebugLoc(); 4991 SDValue N0 = Op.getOperand(0); 4992 SDValue N1 = Op.getOperand(1); 4993 SDValue N2, N3; 4994 4995 if (VT == MVT::v8i8) { 4996 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 4997 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 4998 4999 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5000 DAG.getIntPtrConstant(4)); 5001 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5002 DAG.getIntPtrConstant(4)); 5003 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5004 DAG.getIntPtrConstant(0)); 5005 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5006 DAG.getIntPtrConstant(0)); 5007 5008 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 5009 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 5010 5011 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5012 N0 = LowerCONCAT_VECTORS(N0, DAG); 5013 5014 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 5015 return N0; 5016 } 5017 return LowerSDIV_v4i16(N0, N1, dl, DAG); 5018} 5019 5020static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 5021 EVT VT = Op.getValueType(); 5022 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5023 "unexpected type for custom-lowering ISD::UDIV"); 5024 5025 DebugLoc dl = Op.getDebugLoc(); 5026 SDValue N0 = Op.getOperand(0); 5027 SDValue N1 = Op.getOperand(1); 5028 SDValue N2, N3; 5029 5030 if (VT == MVT::v8i8) { 5031 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 5032 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 5033 5034 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5035 DAG.getIntPtrConstant(4)); 5036 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5037 DAG.getIntPtrConstant(4)); 5038 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5039 DAG.getIntPtrConstant(0)); 5040 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5041 DAG.getIntPtrConstant(0)); 5042 5043 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 5044 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 5045 5046 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5047 N0 = LowerCONCAT_VECTORS(N0, DAG); 5048 5049 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 5050 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 5051 N0); 5052 return N0; 5053 } 5054 5055 // v4i16 sdiv ... Convert to float. 5056 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 5057 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 5058 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 5059 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 5060 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5061 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5062 5063 // Use reciprocal estimate and two refinement steps. 5064 // float4 recip = vrecpeq_f32(yf); 5065 // recip *= vrecpsq_f32(yf, recip); 5066 // recip *= vrecpsq_f32(yf, recip); 5067 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5068 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 5069 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5070 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5071 BN1, N2); 5072 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5073 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5074 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5075 BN1, N2); 5076 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5077 // Simply multiplying by the reciprocal estimate can leave us a few ulps 5078 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 5079 // and that it will never cause us to return an answer too large). 5080 // float4 result = as_float4(as_int4(xf*recip) + 2); 5081 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5082 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5083 N1 = DAG.getConstant(2, MVT::i32); 5084 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5085 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5086 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5087 // Convert back to integer and return. 5088 // return vmovn_u32(vcvt_s32_f32(result)); 5089 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5090 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5091 return N0; 5092} 5093 5094static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 5095 EVT VT = Op.getNode()->getValueType(0); 5096 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 5097 5098 unsigned Opc; 5099 bool ExtraOp = false; 5100 switch (Op.getOpcode()) { 5101 default: llvm_unreachable("Invalid code"); 5102 case ISD::ADDC: Opc = ARMISD::ADDC; break; 5103 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 5104 case ISD::SUBC: Opc = ARMISD::SUBC; break; 5105 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 5106 } 5107 5108 if (!ExtraOp) 5109 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 5110 Op.getOperand(1)); 5111 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 5112 Op.getOperand(1), Op.getOperand(2)); 5113} 5114 5115static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 5116 // Monotonic load/store is legal for all targets 5117 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 5118 return Op; 5119 5120 // Aquire/Release load/store is not legal for targets without a 5121 // dmb or equivalent available. 5122 return SDValue(); 5123} 5124 5125 5126static void 5127ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 5128 SelectionDAG &DAG, unsigned NewOp) { 5129 DebugLoc dl = Node->getDebugLoc(); 5130 assert (Node->getValueType(0) == MVT::i64 && 5131 "Only know how to expand i64 atomics"); 5132 5133 SmallVector<SDValue, 6> Ops; 5134 Ops.push_back(Node->getOperand(0)); // Chain 5135 Ops.push_back(Node->getOperand(1)); // Ptr 5136 // Low part of Val1 5137 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5138 Node->getOperand(2), DAG.getIntPtrConstant(0))); 5139 // High part of Val1 5140 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5141 Node->getOperand(2), DAG.getIntPtrConstant(1))); 5142 if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { 5143 // High part of Val1 5144 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5145 Node->getOperand(3), DAG.getIntPtrConstant(0))); 5146 // High part of Val2 5147 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5148 Node->getOperand(3), DAG.getIntPtrConstant(1))); 5149 } 5150 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 5151 SDValue Result = 5152 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, 5153 cast<MemSDNode>(Node)->getMemOperand()); 5154 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 5155 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 5156 Results.push_back(Result.getValue(2)); 5157} 5158 5159SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5160 switch (Op.getOpcode()) { 5161 default: llvm_unreachable("Don't know how to custom lower this!"); 5162 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5163 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5164 case ISD::GlobalAddress: 5165 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 5166 LowerGlobalAddressELF(Op, DAG); 5167 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5168 case ISD::SELECT: return LowerSELECT(Op, DAG); 5169 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5170 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 5171 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 5172 case ISD::VASTART: return LowerVASTART(Op, DAG); 5173 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 5174 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5175 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 5176 case ISD::SINT_TO_FP: 5177 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5178 case ISD::FP_TO_SINT: 5179 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 5180 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5181 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5182 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5183 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 5184 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 5185 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 5186 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 5187 Subtarget); 5188 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 5189 case ISD::SHL: 5190 case ISD::SRL: 5191 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 5192 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 5193 case ISD::SRL_PARTS: 5194 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 5195 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 5196 case ISD::SETCC: return LowerVSETCC(Op, DAG); 5197 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 5198 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 5199 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5200 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5201 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5202 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 5203 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5204 case ISD::MUL: return LowerMUL(Op, DAG); 5205 case ISD::SDIV: return LowerSDIV(Op, DAG); 5206 case ISD::UDIV: return LowerUDIV(Op, DAG); 5207 case ISD::ADDC: 5208 case ISD::ADDE: 5209 case ISD::SUBC: 5210 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 5211 case ISD::ATOMIC_LOAD: 5212 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 5213 } 5214} 5215 5216/// ReplaceNodeResults - Replace the results of node with an illegal result 5217/// type with new values built out of custom code. 5218void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 5219 SmallVectorImpl<SDValue>&Results, 5220 SelectionDAG &DAG) const { 5221 SDValue Res; 5222 switch (N->getOpcode()) { 5223 default: 5224 llvm_unreachable("Don't know how to custom expand this!"); 5225 case ISD::BITCAST: 5226 Res = ExpandBITCAST(N, DAG); 5227 break; 5228 case ISD::SRL: 5229 case ISD::SRA: 5230 Res = Expand64BitShift(N, DAG, Subtarget); 5231 break; 5232 case ISD::ATOMIC_LOAD_ADD: 5233 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); 5234 return; 5235 case ISD::ATOMIC_LOAD_AND: 5236 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); 5237 return; 5238 case ISD::ATOMIC_LOAD_NAND: 5239 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); 5240 return; 5241 case ISD::ATOMIC_LOAD_OR: 5242 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); 5243 return; 5244 case ISD::ATOMIC_LOAD_SUB: 5245 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); 5246 return; 5247 case ISD::ATOMIC_LOAD_XOR: 5248 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); 5249 return; 5250 case ISD::ATOMIC_SWAP: 5251 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); 5252 return; 5253 case ISD::ATOMIC_CMP_SWAP: 5254 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); 5255 return; 5256 } 5257 if (Res.getNode()) 5258 Results.push_back(Res); 5259} 5260 5261//===----------------------------------------------------------------------===// 5262// ARM Scheduler Hooks 5263//===----------------------------------------------------------------------===// 5264 5265MachineBasicBlock * 5266ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 5267 MachineBasicBlock *BB, 5268 unsigned Size) const { 5269 unsigned dest = MI->getOperand(0).getReg(); 5270 unsigned ptr = MI->getOperand(1).getReg(); 5271 unsigned oldval = MI->getOperand(2).getReg(); 5272 unsigned newval = MI->getOperand(3).getReg(); 5273 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5274 DebugLoc dl = MI->getDebugLoc(); 5275 bool isThumb2 = Subtarget->isThumb2(); 5276 5277 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5278 unsigned scratch = MRI.createVirtualRegister(isThumb2 ? 5279 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5280 (const TargetRegisterClass*)&ARM::GPRRegClass); 5281 5282 if (isThumb2) { 5283 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5284 MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); 5285 MRI.constrainRegClass(newval, &ARM::rGPRRegClass); 5286 } 5287 5288 unsigned ldrOpc, strOpc; 5289 switch (Size) { 5290 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5291 case 1: 5292 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5293 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5294 break; 5295 case 2: 5296 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5297 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5298 break; 5299 case 4: 5300 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5301 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5302 break; 5303 } 5304 5305 MachineFunction *MF = BB->getParent(); 5306 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5307 MachineFunction::iterator It = BB; 5308 ++It; // insert the new blocks after the current block 5309 5310 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5311 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5312 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5313 MF->insert(It, loop1MBB); 5314 MF->insert(It, loop2MBB); 5315 MF->insert(It, exitMBB); 5316 5317 // Transfer the remainder of BB and its successor edges to exitMBB. 5318 exitMBB->splice(exitMBB->begin(), BB, 5319 llvm::next(MachineBasicBlock::iterator(MI)), 5320 BB->end()); 5321 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5322 5323 // thisMBB: 5324 // ... 5325 // fallthrough --> loop1MBB 5326 BB->addSuccessor(loop1MBB); 5327 5328 // loop1MBB: 5329 // ldrex dest, [ptr] 5330 // cmp dest, oldval 5331 // bne exitMBB 5332 BB = loop1MBB; 5333 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5334 if (ldrOpc == ARM::t2LDREX) 5335 MIB.addImm(0); 5336 AddDefaultPred(MIB); 5337 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5338 .addReg(dest).addReg(oldval)); 5339 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5340 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5341 BB->addSuccessor(loop2MBB); 5342 BB->addSuccessor(exitMBB); 5343 5344 // loop2MBB: 5345 // strex scratch, newval, [ptr] 5346 // cmp scratch, #0 5347 // bne loop1MBB 5348 BB = loop2MBB; 5349 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 5350 if (strOpc == ARM::t2STREX) 5351 MIB.addImm(0); 5352 AddDefaultPred(MIB); 5353 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5354 .addReg(scratch).addImm(0)); 5355 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5356 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5357 BB->addSuccessor(loop1MBB); 5358 BB->addSuccessor(exitMBB); 5359 5360 // exitMBB: 5361 // ... 5362 BB = exitMBB; 5363 5364 MI->eraseFromParent(); // The instruction is gone now. 5365 5366 return BB; 5367} 5368 5369MachineBasicBlock * 5370ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5371 unsigned Size, unsigned BinOpcode) const { 5372 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5373 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5374 5375 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5376 MachineFunction *MF = BB->getParent(); 5377 MachineFunction::iterator It = BB; 5378 ++It; 5379 5380 unsigned dest = MI->getOperand(0).getReg(); 5381 unsigned ptr = MI->getOperand(1).getReg(); 5382 unsigned incr = MI->getOperand(2).getReg(); 5383 DebugLoc dl = MI->getDebugLoc(); 5384 bool isThumb2 = Subtarget->isThumb2(); 5385 5386 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5387 if (isThumb2) { 5388 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5389 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 5390 } 5391 5392 unsigned ldrOpc, strOpc; 5393 switch (Size) { 5394 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5395 case 1: 5396 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5397 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5398 break; 5399 case 2: 5400 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5401 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5402 break; 5403 case 4: 5404 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5405 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5406 break; 5407 } 5408 5409 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5410 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5411 MF->insert(It, loopMBB); 5412 MF->insert(It, exitMBB); 5413 5414 // Transfer the remainder of BB and its successor edges to exitMBB. 5415 exitMBB->splice(exitMBB->begin(), BB, 5416 llvm::next(MachineBasicBlock::iterator(MI)), 5417 BB->end()); 5418 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5419 5420 const TargetRegisterClass *TRC = isThumb2 ? 5421 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5422 (const TargetRegisterClass*)&ARM::GPRRegClass; 5423 unsigned scratch = MRI.createVirtualRegister(TRC); 5424 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 5425 5426 // thisMBB: 5427 // ... 5428 // fallthrough --> loopMBB 5429 BB->addSuccessor(loopMBB); 5430 5431 // loopMBB: 5432 // ldrex dest, ptr 5433 // <binop> scratch2, dest, incr 5434 // strex scratch, scratch2, ptr 5435 // cmp scratch, #0 5436 // bne- loopMBB 5437 // fallthrough --> exitMBB 5438 BB = loopMBB; 5439 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5440 if (ldrOpc == ARM::t2LDREX) 5441 MIB.addImm(0); 5442 AddDefaultPred(MIB); 5443 if (BinOpcode) { 5444 // operand order needs to go the other way for NAND 5445 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 5446 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5447 addReg(incr).addReg(dest)).addReg(0); 5448 else 5449 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5450 addReg(dest).addReg(incr)).addReg(0); 5451 } 5452 5453 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5454 if (strOpc == ARM::t2STREX) 5455 MIB.addImm(0); 5456 AddDefaultPred(MIB); 5457 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5458 .addReg(scratch).addImm(0)); 5459 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5460 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5461 5462 BB->addSuccessor(loopMBB); 5463 BB->addSuccessor(exitMBB); 5464 5465 // exitMBB: 5466 // ... 5467 BB = exitMBB; 5468 5469 MI->eraseFromParent(); // The instruction is gone now. 5470 5471 return BB; 5472} 5473 5474MachineBasicBlock * 5475ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 5476 MachineBasicBlock *BB, 5477 unsigned Size, 5478 bool signExtend, 5479 ARMCC::CondCodes Cond) const { 5480 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5481 5482 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5483 MachineFunction *MF = BB->getParent(); 5484 MachineFunction::iterator It = BB; 5485 ++It; 5486 5487 unsigned dest = MI->getOperand(0).getReg(); 5488 unsigned ptr = MI->getOperand(1).getReg(); 5489 unsigned incr = MI->getOperand(2).getReg(); 5490 unsigned oldval = dest; 5491 DebugLoc dl = MI->getDebugLoc(); 5492 bool isThumb2 = Subtarget->isThumb2(); 5493 5494 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5495 if (isThumb2) { 5496 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5497 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 5498 } 5499 5500 unsigned ldrOpc, strOpc, extendOpc; 5501 switch (Size) { 5502 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5503 case 1: 5504 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5505 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5506 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 5507 break; 5508 case 2: 5509 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5510 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5511 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 5512 break; 5513 case 4: 5514 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5515 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5516 extendOpc = 0; 5517 break; 5518 } 5519 5520 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5521 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5522 MF->insert(It, loopMBB); 5523 MF->insert(It, exitMBB); 5524 5525 // Transfer the remainder of BB and its successor edges to exitMBB. 5526 exitMBB->splice(exitMBB->begin(), BB, 5527 llvm::next(MachineBasicBlock::iterator(MI)), 5528 BB->end()); 5529 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5530 5531 const TargetRegisterClass *TRC = isThumb2 ? 5532 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5533 (const TargetRegisterClass*)&ARM::GPRRegClass; 5534 unsigned scratch = MRI.createVirtualRegister(TRC); 5535 unsigned scratch2 = MRI.createVirtualRegister(TRC); 5536 5537 // thisMBB: 5538 // ... 5539 // fallthrough --> loopMBB 5540 BB->addSuccessor(loopMBB); 5541 5542 // loopMBB: 5543 // ldrex dest, ptr 5544 // (sign extend dest, if required) 5545 // cmp dest, incr 5546 // cmov.cond scratch2, dest, incr 5547 // strex scratch, scratch2, ptr 5548 // cmp scratch, #0 5549 // bne- loopMBB 5550 // fallthrough --> exitMBB 5551 BB = loopMBB; 5552 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5553 if (ldrOpc == ARM::t2LDREX) 5554 MIB.addImm(0); 5555 AddDefaultPred(MIB); 5556 5557 // Sign extend the value, if necessary. 5558 if (signExtend && extendOpc) { 5559 oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); 5560 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 5561 .addReg(dest) 5562 .addImm(0)); 5563 } 5564 5565 // Build compare and cmov instructions. 5566 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5567 .addReg(oldval).addReg(incr)); 5568 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 5569 .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); 5570 5571 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5572 if (strOpc == ARM::t2STREX) 5573 MIB.addImm(0); 5574 AddDefaultPred(MIB); 5575 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5576 .addReg(scratch).addImm(0)); 5577 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5578 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5579 5580 BB->addSuccessor(loopMBB); 5581 BB->addSuccessor(exitMBB); 5582 5583 // exitMBB: 5584 // ... 5585 BB = exitMBB; 5586 5587 MI->eraseFromParent(); // The instruction is gone now. 5588 5589 return BB; 5590} 5591 5592MachineBasicBlock * 5593ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 5594 unsigned Op1, unsigned Op2, 5595 bool NeedsCarry, bool IsCmpxchg) const { 5596 // This also handles ATOMIC_SWAP, indicated by Op1==0. 5597 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5598 5599 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5600 MachineFunction *MF = BB->getParent(); 5601 MachineFunction::iterator It = BB; 5602 ++It; 5603 5604 unsigned destlo = MI->getOperand(0).getReg(); 5605 unsigned desthi = MI->getOperand(1).getReg(); 5606 unsigned ptr = MI->getOperand(2).getReg(); 5607 unsigned vallo = MI->getOperand(3).getReg(); 5608 unsigned valhi = MI->getOperand(4).getReg(); 5609 DebugLoc dl = MI->getDebugLoc(); 5610 bool isThumb2 = Subtarget->isThumb2(); 5611 5612 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5613 if (isThumb2) { 5614 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 5615 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 5616 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 5617 } 5618 5619 unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; 5620 unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; 5621 5622 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5623 MachineBasicBlock *contBB = 0, *cont2BB = 0; 5624 if (IsCmpxchg) { 5625 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 5626 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 5627 } 5628 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5629 MF->insert(It, loopMBB); 5630 if (IsCmpxchg) { 5631 MF->insert(It, contBB); 5632 MF->insert(It, cont2BB); 5633 } 5634 MF->insert(It, exitMBB); 5635 5636 // Transfer the remainder of BB and its successor edges to exitMBB. 5637 exitMBB->splice(exitMBB->begin(), BB, 5638 llvm::next(MachineBasicBlock::iterator(MI)), 5639 BB->end()); 5640 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5641 5642 const TargetRegisterClass *TRC = isThumb2 ? 5643 (const TargetRegisterClass*)&ARM::tGPRRegClass : 5644 (const TargetRegisterClass*)&ARM::GPRRegClass; 5645 unsigned storesuccess = MRI.createVirtualRegister(TRC); 5646 5647 // thisMBB: 5648 // ... 5649 // fallthrough --> loopMBB 5650 BB->addSuccessor(loopMBB); 5651 5652 // loopMBB: 5653 // ldrexd r2, r3, ptr 5654 // <binopa> r0, r2, incr 5655 // <binopb> r1, r3, incr 5656 // strexd storesuccess, r0, r1, ptr 5657 // cmp storesuccess, #0 5658 // bne- loopMBB 5659 // fallthrough --> exitMBB 5660 // 5661 // Note that the registers are explicitly specified because there is not any 5662 // way to force the register allocator to allocate a register pair. 5663 // 5664 // FIXME: The hardcoded registers are not necessary for Thumb2, but we 5665 // need to properly enforce the restriction that the two output registers 5666 // for ldrexd must be different. 5667 BB = loopMBB; 5668 // Load 5669 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 5670 .addReg(ARM::R2, RegState::Define) 5671 .addReg(ARM::R3, RegState::Define).addReg(ptr)); 5672 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 5673 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); 5674 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); 5675 5676 if (IsCmpxchg) { 5677 // Add early exit 5678 for (unsigned i = 0; i < 2; i++) { 5679 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 5680 ARM::CMPrr)) 5681 .addReg(i == 0 ? destlo : desthi) 5682 .addReg(i == 0 ? vallo : valhi)); 5683 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5684 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5685 BB->addSuccessor(exitMBB); 5686 BB->addSuccessor(i == 0 ? contBB : cont2BB); 5687 BB = (i == 0 ? contBB : cont2BB); 5688 } 5689 5690 // Copy to physregs for strexd 5691 unsigned setlo = MI->getOperand(5).getReg(); 5692 unsigned sethi = MI->getOperand(6).getReg(); 5693 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); 5694 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); 5695 } else if (Op1) { 5696 // Perform binary operation 5697 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) 5698 .addReg(destlo).addReg(vallo)) 5699 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 5700 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) 5701 .addReg(desthi).addReg(valhi)).addReg(0); 5702 } else { 5703 // Copy to physregs for strexd 5704 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); 5705 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); 5706 } 5707 5708 // Store 5709 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 5710 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); 5711 // Cmp+jump 5712 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5713 .addReg(storesuccess).addImm(0)); 5714 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5715 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5716 5717 BB->addSuccessor(loopMBB); 5718 BB->addSuccessor(exitMBB); 5719 5720 // exitMBB: 5721 // ... 5722 BB = exitMBB; 5723 5724 MI->eraseFromParent(); // The instruction is gone now. 5725 5726 return BB; 5727} 5728 5729/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 5730/// registers the function context. 5731void ARMTargetLowering:: 5732SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 5733 MachineBasicBlock *DispatchBB, int FI) const { 5734 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5735 DebugLoc dl = MI->getDebugLoc(); 5736 MachineFunction *MF = MBB->getParent(); 5737 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5738 MachineConstantPool *MCP = MF->getConstantPool(); 5739 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 5740 const Function *F = MF->getFunction(); 5741 5742 bool isThumb = Subtarget->isThumb(); 5743 bool isThumb2 = Subtarget->isThumb2(); 5744 5745 unsigned PCLabelId = AFI->createPICLabelUId(); 5746 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 5747 ARMConstantPoolValue *CPV = 5748 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 5749 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 5750 5751 const TargetRegisterClass *TRC = isThumb ? 5752 (const TargetRegisterClass*)&ARM::tGPRRegClass : 5753 (const TargetRegisterClass*)&ARM::GPRRegClass; 5754 5755 // Grab constant pool and fixed stack memory operands. 5756 MachineMemOperand *CPMMO = 5757 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 5758 MachineMemOperand::MOLoad, 4, 4); 5759 5760 MachineMemOperand *FIMMOSt = 5761 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 5762 MachineMemOperand::MOStore, 4, 4); 5763 5764 // Load the address of the dispatch MBB into the jump buffer. 5765 if (isThumb2) { 5766 // Incoming value: jbuf 5767 // ldr.n r5, LCPI1_1 5768 // orr r5, r5, #1 5769 // add r5, pc 5770 // str r5, [$jbuf, #+4] ; &jbuf[1] 5771 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5772 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 5773 .addConstantPoolIndex(CPI) 5774 .addMemOperand(CPMMO)); 5775 // Set the low bit because of thumb mode. 5776 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5777 AddDefaultCC( 5778 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 5779 .addReg(NewVReg1, RegState::Kill) 5780 .addImm(0x01))); 5781 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5782 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 5783 .addReg(NewVReg2, RegState::Kill) 5784 .addImm(PCLabelId); 5785 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 5786 .addReg(NewVReg3, RegState::Kill) 5787 .addFrameIndex(FI) 5788 .addImm(36) // &jbuf[1] :: pc 5789 .addMemOperand(FIMMOSt)); 5790 } else if (isThumb) { 5791 // Incoming value: jbuf 5792 // ldr.n r1, LCPI1_4 5793 // add r1, pc 5794 // mov r2, #1 5795 // orrs r1, r2 5796 // add r2, $jbuf, #+4 ; &jbuf[1] 5797 // str r1, [r2] 5798 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5799 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 5800 .addConstantPoolIndex(CPI) 5801 .addMemOperand(CPMMO)); 5802 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5803 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 5804 .addReg(NewVReg1, RegState::Kill) 5805 .addImm(PCLabelId); 5806 // Set the low bit because of thumb mode. 5807 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5808 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 5809 .addReg(ARM::CPSR, RegState::Define) 5810 .addImm(1)); 5811 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5812 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 5813 .addReg(ARM::CPSR, RegState::Define) 5814 .addReg(NewVReg2, RegState::Kill) 5815 .addReg(NewVReg3, RegState::Kill)); 5816 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 5817 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 5818 .addFrameIndex(FI) 5819 .addImm(36)); // &jbuf[1] :: pc 5820 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 5821 .addReg(NewVReg4, RegState::Kill) 5822 .addReg(NewVReg5, RegState::Kill) 5823 .addImm(0) 5824 .addMemOperand(FIMMOSt)); 5825 } else { 5826 // Incoming value: jbuf 5827 // ldr r1, LCPI1_1 5828 // add r1, pc, r1 5829 // str r1, [$jbuf, #+4] ; &jbuf[1] 5830 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5831 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 5832 .addConstantPoolIndex(CPI) 5833 .addImm(0) 5834 .addMemOperand(CPMMO)); 5835 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5836 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 5837 .addReg(NewVReg1, RegState::Kill) 5838 .addImm(PCLabelId)); 5839 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 5840 .addReg(NewVReg2, RegState::Kill) 5841 .addFrameIndex(FI) 5842 .addImm(36) // &jbuf[1] :: pc 5843 .addMemOperand(FIMMOSt)); 5844 } 5845} 5846 5847MachineBasicBlock *ARMTargetLowering:: 5848EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 5849 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5850 DebugLoc dl = MI->getDebugLoc(); 5851 MachineFunction *MF = MBB->getParent(); 5852 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5853 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 5854 MachineFrameInfo *MFI = MF->getFrameInfo(); 5855 int FI = MFI->getFunctionContextIndex(); 5856 5857 const TargetRegisterClass *TRC = Subtarget->isThumb() ? 5858 (const TargetRegisterClass*)&ARM::tGPRRegClass : 5859 (const TargetRegisterClass*)&ARM::GPRnopcRegClass; 5860 5861 // Get a mapping of the call site numbers to all of the landing pads they're 5862 // associated with. 5863 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 5864 unsigned MaxCSNum = 0; 5865 MachineModuleInfo &MMI = MF->getMMI(); 5866 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 5867 ++BB) { 5868 if (!BB->isLandingPad()) continue; 5869 5870 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 5871 // pad. 5872 for (MachineBasicBlock::iterator 5873 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 5874 if (!II->isEHLabel()) continue; 5875 5876 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 5877 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 5878 5879 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 5880 for (SmallVectorImpl<unsigned>::iterator 5881 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 5882 CSI != CSE; ++CSI) { 5883 CallSiteNumToLPad[*CSI].push_back(BB); 5884 MaxCSNum = std::max(MaxCSNum, *CSI); 5885 } 5886 break; 5887 } 5888 } 5889 5890 // Get an ordered list of the machine basic blocks for the jump table. 5891 std::vector<MachineBasicBlock*> LPadList; 5892 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 5893 LPadList.reserve(CallSiteNumToLPad.size()); 5894 for (unsigned I = 1; I <= MaxCSNum; ++I) { 5895 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 5896 for (SmallVectorImpl<MachineBasicBlock*>::iterator 5897 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 5898 LPadList.push_back(*II); 5899 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 5900 } 5901 } 5902 5903 assert(!LPadList.empty() && 5904 "No landing pad destinations for the dispatch jump table!"); 5905 5906 // Create the jump table and associated information. 5907 MachineJumpTableInfo *JTI = 5908 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 5909 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 5910 unsigned UId = AFI->createJumpTableUId(); 5911 5912 // Create the MBBs for the dispatch code. 5913 5914 // Shove the dispatch's address into the return slot in the function context. 5915 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 5916 DispatchBB->setIsLandingPad(); 5917 5918 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 5919 BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); 5920 DispatchBB->addSuccessor(TrapBB); 5921 5922 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 5923 DispatchBB->addSuccessor(DispContBB); 5924 5925 // Insert and MBBs. 5926 MF->insert(MF->end(), DispatchBB); 5927 MF->insert(MF->end(), DispContBB); 5928 MF->insert(MF->end(), TrapBB); 5929 5930 // Insert code into the entry block that creates and registers the function 5931 // context. 5932 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 5933 5934 MachineMemOperand *FIMMOLd = 5935 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 5936 MachineMemOperand::MOLoad | 5937 MachineMemOperand::MOVolatile, 4, 4); 5938 5939 if (AFI->isThumb1OnlyFunction()) 5940 BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); 5941 else if (!Subtarget->hasVFP2()) 5942 BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); 5943 else 5944 BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 5945 5946 unsigned NumLPads = LPadList.size(); 5947 if (Subtarget->isThumb2()) { 5948 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5949 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 5950 .addFrameIndex(FI) 5951 .addImm(4) 5952 .addMemOperand(FIMMOLd)); 5953 5954 if (NumLPads < 256) { 5955 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 5956 .addReg(NewVReg1) 5957 .addImm(LPadList.size())); 5958 } else { 5959 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5960 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 5961 .addImm(NumLPads & 0xFFFF)); 5962 5963 unsigned VReg2 = VReg1; 5964 if ((NumLPads & 0xFFFF0000) != 0) { 5965 VReg2 = MRI->createVirtualRegister(TRC); 5966 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 5967 .addReg(VReg1) 5968 .addImm(NumLPads >> 16)); 5969 } 5970 5971 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 5972 .addReg(NewVReg1) 5973 .addReg(VReg2)); 5974 } 5975 5976 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 5977 .addMBB(TrapBB) 5978 .addImm(ARMCC::HI) 5979 .addReg(ARM::CPSR); 5980 5981 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5982 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 5983 .addJumpTableIndex(MJTI) 5984 .addImm(UId)); 5985 5986 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5987 AddDefaultCC( 5988 AddDefaultPred( 5989 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 5990 .addReg(NewVReg3, RegState::Kill) 5991 .addReg(NewVReg1) 5992 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 5993 5994 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 5995 .addReg(NewVReg4, RegState::Kill) 5996 .addReg(NewVReg1) 5997 .addJumpTableIndex(MJTI) 5998 .addImm(UId); 5999 } else if (Subtarget->isThumb()) { 6000 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6001 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 6002 .addFrameIndex(FI) 6003 .addImm(1) 6004 .addMemOperand(FIMMOLd)); 6005 6006 if (NumLPads < 256) { 6007 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 6008 .addReg(NewVReg1) 6009 .addImm(NumLPads)); 6010 } else { 6011 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6012 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6013 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6014 6015 // MachineConstantPool wants an explicit alignment. 6016 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 6017 if (Align == 0) 6018 Align = getTargetData()->getTypeAllocSize(C->getType()); 6019 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6020 6021 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6022 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 6023 .addReg(VReg1, RegState::Define) 6024 .addConstantPoolIndex(Idx)); 6025 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 6026 .addReg(NewVReg1) 6027 .addReg(VReg1)); 6028 } 6029 6030 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 6031 .addMBB(TrapBB) 6032 .addImm(ARMCC::HI) 6033 .addReg(ARM::CPSR); 6034 6035 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6036 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 6037 .addReg(ARM::CPSR, RegState::Define) 6038 .addReg(NewVReg1) 6039 .addImm(2)); 6040 6041 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6042 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 6043 .addJumpTableIndex(MJTI) 6044 .addImm(UId)); 6045 6046 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6047 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 6048 .addReg(ARM::CPSR, RegState::Define) 6049 .addReg(NewVReg2, RegState::Kill) 6050 .addReg(NewVReg3)); 6051 6052 MachineMemOperand *JTMMOLd = 6053 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6054 MachineMemOperand::MOLoad, 4, 4); 6055 6056 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6057 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 6058 .addReg(NewVReg4, RegState::Kill) 6059 .addImm(0) 6060 .addMemOperand(JTMMOLd)); 6061 6062 unsigned NewVReg6 = MRI->createVirtualRegister(TRC); 6063 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 6064 .addReg(ARM::CPSR, RegState::Define) 6065 .addReg(NewVReg5, RegState::Kill) 6066 .addReg(NewVReg3)); 6067 6068 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 6069 .addReg(NewVReg6, RegState::Kill) 6070 .addJumpTableIndex(MJTI) 6071 .addImm(UId); 6072 } else { 6073 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6074 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 6075 .addFrameIndex(FI) 6076 .addImm(4) 6077 .addMemOperand(FIMMOLd)); 6078 6079 if (NumLPads < 256) { 6080 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 6081 .addReg(NewVReg1) 6082 .addImm(NumLPads)); 6083 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 6084 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6085 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 6086 .addImm(NumLPads & 0xFFFF)); 6087 6088 unsigned VReg2 = VReg1; 6089 if ((NumLPads & 0xFFFF0000) != 0) { 6090 VReg2 = MRI->createVirtualRegister(TRC); 6091 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 6092 .addReg(VReg1) 6093 .addImm(NumLPads >> 16)); 6094 } 6095 6096 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6097 .addReg(NewVReg1) 6098 .addReg(VReg2)); 6099 } else { 6100 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6101 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6102 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6103 6104 // MachineConstantPool wants an explicit alignment. 6105 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 6106 if (Align == 0) 6107 Align = getTargetData()->getTypeAllocSize(C->getType()); 6108 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6109 6110 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6111 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 6112 .addReg(VReg1, RegState::Define) 6113 .addConstantPoolIndex(Idx) 6114 .addImm(0)); 6115 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6116 .addReg(NewVReg1) 6117 .addReg(VReg1, RegState::Kill)); 6118 } 6119 6120 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 6121 .addMBB(TrapBB) 6122 .addImm(ARMCC::HI) 6123 .addReg(ARM::CPSR); 6124 6125 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6126 AddDefaultCC( 6127 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 6128 .addReg(NewVReg1) 6129 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6130 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6131 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 6132 .addJumpTableIndex(MJTI) 6133 .addImm(UId)); 6134 6135 MachineMemOperand *JTMMOLd = 6136 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6137 MachineMemOperand::MOLoad, 4, 4); 6138 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6139 AddDefaultPred( 6140 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 6141 .addReg(NewVReg3, RegState::Kill) 6142 .addReg(NewVReg4) 6143 .addImm(0) 6144 .addMemOperand(JTMMOLd)); 6145 6146 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 6147 .addReg(NewVReg5, RegState::Kill) 6148 .addReg(NewVReg4) 6149 .addJumpTableIndex(MJTI) 6150 .addImm(UId); 6151 } 6152 6153 // Add the jump table entries as successors to the MBB. 6154 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 6155 for (std::vector<MachineBasicBlock*>::iterator 6156 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 6157 MachineBasicBlock *CurMBB = *I; 6158 if (SeenMBBs.insert(CurMBB)) 6159 DispContBB->addSuccessor(CurMBB); 6160 } 6161 6162 // N.B. the order the invoke BBs are processed in doesn't matter here. 6163 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6164 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6165 const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); 6166 SmallVector<MachineBasicBlock*, 64> MBBLPads; 6167 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 6168 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 6169 MachineBasicBlock *BB = *I; 6170 6171 // Remove the landing pad successor from the invoke block and replace it 6172 // with the new dispatch block. 6173 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 6174 BB->succ_end()); 6175 while (!Successors.empty()) { 6176 MachineBasicBlock *SMBB = Successors.pop_back_val(); 6177 if (SMBB->isLandingPad()) { 6178 BB->removeSuccessor(SMBB); 6179 MBBLPads.push_back(SMBB); 6180 } 6181 } 6182 6183 BB->addSuccessor(DispatchBB); 6184 6185 // Find the invoke call and mark all of the callee-saved registers as 6186 // 'implicit defined' so that they're spilled. This prevents code from 6187 // moving instructions to before the EH block, where they will never be 6188 // executed. 6189 for (MachineBasicBlock::reverse_iterator 6190 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 6191 if (!II->isCall()) continue; 6192 6193 DenseMap<unsigned, bool> DefRegs; 6194 for (MachineInstr::mop_iterator 6195 OI = II->operands_begin(), OE = II->operands_end(); 6196 OI != OE; ++OI) { 6197 if (!OI->isReg()) continue; 6198 DefRegs[OI->getReg()] = true; 6199 } 6200 6201 MachineInstrBuilder MIB(&*II); 6202 6203 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 6204 unsigned Reg = SavedRegs[i]; 6205 if (Subtarget->isThumb2() && 6206 !ARM::tGPRRegClass.contains(Reg) && 6207 !ARM::hGPRRegClass.contains(Reg)) 6208 continue; 6209 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 6210 continue; 6211 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 6212 continue; 6213 if (!DefRegs[Reg]) 6214 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 6215 } 6216 6217 break; 6218 } 6219 } 6220 6221 // Mark all former landing pads as non-landing pads. The dispatch is the only 6222 // landing pad now. 6223 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6224 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 6225 (*I)->setIsLandingPad(false); 6226 6227 // The instruction is gone now. 6228 MI->eraseFromParent(); 6229 6230 return MBB; 6231} 6232 6233static 6234MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 6235 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 6236 E = MBB->succ_end(); I != E; ++I) 6237 if (*I != Succ) 6238 return *I; 6239 llvm_unreachable("Expecting a BB with two successors!"); 6240} 6241 6242MachineBasicBlock *ARMTargetLowering:: 6243EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { 6244 // This pseudo instruction has 3 operands: dst, src, size 6245 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 6246 // Otherwise, we will generate unrolled scalar copies. 6247 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6248 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6249 MachineFunction::iterator It = BB; 6250 ++It; 6251 6252 unsigned dest = MI->getOperand(0).getReg(); 6253 unsigned src = MI->getOperand(1).getReg(); 6254 unsigned SizeVal = MI->getOperand(2).getImm(); 6255 unsigned Align = MI->getOperand(3).getImm(); 6256 DebugLoc dl = MI->getDebugLoc(); 6257 6258 bool isThumb2 = Subtarget->isThumb2(); 6259 MachineFunction *MF = BB->getParent(); 6260 MachineRegisterInfo &MRI = MF->getRegInfo(); 6261 unsigned ldrOpc, strOpc, UnitSize = 0; 6262 6263 const TargetRegisterClass *TRC = isThumb2 ? 6264 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6265 (const TargetRegisterClass*)&ARM::GPRRegClass; 6266 const TargetRegisterClass *TRC_Vec = 0; 6267 6268 if (Align & 1) { 6269 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 6270 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 6271 UnitSize = 1; 6272 } else if (Align & 2) { 6273 ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; 6274 strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; 6275 UnitSize = 2; 6276 } else { 6277 // Check whether we can use NEON instructions. 6278 if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && 6279 Subtarget->hasNEON()) { 6280 if ((Align % 16 == 0) && SizeVal >= 16) { 6281 ldrOpc = ARM::VLD1q32wb_fixed; 6282 strOpc = ARM::VST1q32wb_fixed; 6283 UnitSize = 16; 6284 TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; 6285 } 6286 else if ((Align % 8 == 0) && SizeVal >= 8) { 6287 ldrOpc = ARM::VLD1d32wb_fixed; 6288 strOpc = ARM::VST1d32wb_fixed; 6289 UnitSize = 8; 6290 TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; 6291 } 6292 } 6293 // Can't use NEON instructions. 6294 if (UnitSize == 0) { 6295 ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; 6296 strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; 6297 UnitSize = 4; 6298 } 6299 } 6300 6301 unsigned BytesLeft = SizeVal % UnitSize; 6302 unsigned LoopSize = SizeVal - BytesLeft; 6303 6304 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 6305 // Use LDR and STR to copy. 6306 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 6307 // [destOut] = STR_POST(scratch, destIn, UnitSize) 6308 unsigned srcIn = src; 6309 unsigned destIn = dest; 6310 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 6311 unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); 6312 unsigned srcOut = MRI.createVirtualRegister(TRC); 6313 unsigned destOut = MRI.createVirtualRegister(TRC); 6314 if (UnitSize >= 8) { 6315 AddDefaultPred(BuildMI(*BB, MI, dl, 6316 TII->get(ldrOpc), scratch) 6317 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); 6318 6319 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6320 .addReg(destIn).addImm(0).addReg(scratch)); 6321 } else if (isThumb2) { 6322 AddDefaultPred(BuildMI(*BB, MI, dl, 6323 TII->get(ldrOpc), scratch) 6324 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); 6325 6326 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6327 .addReg(scratch).addReg(destIn) 6328 .addImm(UnitSize)); 6329 } else { 6330 AddDefaultPred(BuildMI(*BB, MI, dl, 6331 TII->get(ldrOpc), scratch) 6332 .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) 6333 .addImm(UnitSize)); 6334 6335 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6336 .addReg(scratch).addReg(destIn) 6337 .addReg(0).addImm(UnitSize)); 6338 } 6339 srcIn = srcOut; 6340 destIn = destOut; 6341 } 6342 6343 // Handle the leftover bytes with LDRB and STRB. 6344 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 6345 // [destOut] = STRB_POST(scratch, destIn, 1) 6346 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 6347 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 6348 for (unsigned i = 0; i < BytesLeft; i++) { 6349 unsigned scratch = MRI.createVirtualRegister(TRC); 6350 unsigned srcOut = MRI.createVirtualRegister(TRC); 6351 unsigned destOut = MRI.createVirtualRegister(TRC); 6352 if (isThumb2) { 6353 AddDefaultPred(BuildMI(*BB, MI, dl, 6354 TII->get(ldrOpc),scratch) 6355 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 6356 6357 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6358 .addReg(scratch).addReg(destIn) 6359 .addReg(0).addImm(1)); 6360 } else { 6361 AddDefaultPred(BuildMI(*BB, MI, dl, 6362 TII->get(ldrOpc),scratch) 6363 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 6364 6365 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 6366 .addReg(scratch).addReg(destIn) 6367 .addReg(0).addImm(1)); 6368 } 6369 srcIn = srcOut; 6370 destIn = destOut; 6371 } 6372 MI->eraseFromParent(); // The instruction is gone now. 6373 return BB; 6374 } 6375 6376 // Expand the pseudo op to a loop. 6377 // thisMBB: 6378 // ... 6379 // movw varEnd, # --> with thumb2 6380 // movt varEnd, # 6381 // ldrcp varEnd, idx --> without thumb2 6382 // fallthrough --> loopMBB 6383 // loopMBB: 6384 // PHI varPhi, varEnd, varLoop 6385 // PHI srcPhi, src, srcLoop 6386 // PHI destPhi, dst, destLoop 6387 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 6388 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 6389 // subs varLoop, varPhi, #UnitSize 6390 // bne loopMBB 6391 // fallthrough --> exitMBB 6392 // exitMBB: 6393 // epilogue to handle left-over bytes 6394 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 6395 // [destOut] = STRB_POST(scratch, destLoop, 1) 6396 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6397 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6398 MF->insert(It, loopMBB); 6399 MF->insert(It, exitMBB); 6400 6401 // Transfer the remainder of BB and its successor edges to exitMBB. 6402 exitMBB->splice(exitMBB->begin(), BB, 6403 llvm::next(MachineBasicBlock::iterator(MI)), 6404 BB->end()); 6405 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6406 6407 // Load an immediate to varEnd. 6408 unsigned varEnd = MRI.createVirtualRegister(TRC); 6409 if (isThumb2) { 6410 unsigned VReg1 = varEnd; 6411 if ((LoopSize & 0xFFFF0000) != 0) 6412 VReg1 = MRI.createVirtualRegister(TRC); 6413 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) 6414 .addImm(LoopSize & 0xFFFF)); 6415 6416 if ((LoopSize & 0xFFFF0000) != 0) 6417 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) 6418 .addReg(VReg1) 6419 .addImm(LoopSize >> 16)); 6420 } else { 6421 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6422 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6423 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 6424 6425 // MachineConstantPool wants an explicit alignment. 6426 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 6427 if (Align == 0) 6428 Align = getTargetData()->getTypeAllocSize(C->getType()); 6429 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6430 6431 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) 6432 .addReg(varEnd, RegState::Define) 6433 .addConstantPoolIndex(Idx) 6434 .addImm(0)); 6435 } 6436 BB->addSuccessor(loopMBB); 6437 6438 // Generate the loop body: 6439 // varPhi = PHI(varLoop, varEnd) 6440 // srcPhi = PHI(srcLoop, src) 6441 // destPhi = PHI(destLoop, dst) 6442 MachineBasicBlock *entryBB = BB; 6443 BB = loopMBB; 6444 unsigned varLoop = MRI.createVirtualRegister(TRC); 6445 unsigned varPhi = MRI.createVirtualRegister(TRC); 6446 unsigned srcLoop = MRI.createVirtualRegister(TRC); 6447 unsigned srcPhi = MRI.createVirtualRegister(TRC); 6448 unsigned destLoop = MRI.createVirtualRegister(TRC); 6449 unsigned destPhi = MRI.createVirtualRegister(TRC); 6450 6451 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 6452 .addReg(varLoop).addMBB(loopMBB) 6453 .addReg(varEnd).addMBB(entryBB); 6454 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 6455 .addReg(srcLoop).addMBB(loopMBB) 6456 .addReg(src).addMBB(entryBB); 6457 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 6458 .addReg(destLoop).addMBB(loopMBB) 6459 .addReg(dest).addMBB(entryBB); 6460 6461 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 6462 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 6463 unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); 6464 if (UnitSize >= 8) { 6465 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 6466 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); 6467 6468 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 6469 .addReg(destPhi).addImm(0).addReg(scratch)); 6470 } else if (isThumb2) { 6471 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 6472 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); 6473 6474 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 6475 .addReg(scratch).addReg(destPhi) 6476 .addImm(UnitSize)); 6477 } else { 6478 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 6479 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) 6480 .addImm(UnitSize)); 6481 6482 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 6483 .addReg(scratch).addReg(destPhi) 6484 .addReg(0).addImm(UnitSize)); 6485 } 6486 6487 // Decrement loop variable by UnitSize. 6488 MachineInstrBuilder MIB = BuildMI(BB, dl, 6489 TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 6490 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 6491 MIB->getOperand(5).setReg(ARM::CPSR); 6492 MIB->getOperand(5).setIsDef(true); 6493 6494 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6495 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6496 6497 // loopMBB can loop back to loopMBB or fall through to exitMBB. 6498 BB->addSuccessor(loopMBB); 6499 BB->addSuccessor(exitMBB); 6500 6501 // Add epilogue to handle BytesLeft. 6502 BB = exitMBB; 6503 MachineInstr *StartOfExit = exitMBB->begin(); 6504 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 6505 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 6506 6507 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 6508 // [destOut] = STRB_POST(scratch, destLoop, 1) 6509 unsigned srcIn = srcLoop; 6510 unsigned destIn = destLoop; 6511 for (unsigned i = 0; i < BytesLeft; i++) { 6512 unsigned scratch = MRI.createVirtualRegister(TRC); 6513 unsigned srcOut = MRI.createVirtualRegister(TRC); 6514 unsigned destOut = MRI.createVirtualRegister(TRC); 6515 if (isThumb2) { 6516 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, 6517 TII->get(ldrOpc),scratch) 6518 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 6519 6520 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) 6521 .addReg(scratch).addReg(destIn) 6522 .addImm(1)); 6523 } else { 6524 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, 6525 TII->get(ldrOpc),scratch) 6526 .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); 6527 6528 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) 6529 .addReg(scratch).addReg(destIn) 6530 .addReg(0).addImm(1)); 6531 } 6532 srcIn = srcOut; 6533 destIn = destOut; 6534 } 6535 6536 MI->eraseFromParent(); // The instruction is gone now. 6537 return BB; 6538} 6539 6540MachineBasicBlock * 6541ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6542 MachineBasicBlock *BB) const { 6543 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6544 DebugLoc dl = MI->getDebugLoc(); 6545 bool isThumb2 = Subtarget->isThumb2(); 6546 switch (MI->getOpcode()) { 6547 default: { 6548 MI->dump(); 6549 llvm_unreachable("Unexpected instr type to insert"); 6550 } 6551 // The Thumb2 pre-indexed stores have the same MI operands, they just 6552 // define them differently in the .td files from the isel patterns, so 6553 // they need pseudos. 6554 case ARM::t2STR_preidx: 6555 MI->setDesc(TII->get(ARM::t2STR_PRE)); 6556 return BB; 6557 case ARM::t2STRB_preidx: 6558 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 6559 return BB; 6560 case ARM::t2STRH_preidx: 6561 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 6562 return BB; 6563 6564 case ARM::STRi_preidx: 6565 case ARM::STRBi_preidx: { 6566 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 6567 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 6568 // Decode the offset. 6569 unsigned Offset = MI->getOperand(4).getImm(); 6570 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 6571 Offset = ARM_AM::getAM2Offset(Offset); 6572 if (isSub) 6573 Offset = -Offset; 6574 6575 MachineMemOperand *MMO = *MI->memoperands_begin(); 6576 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 6577 .addOperand(MI->getOperand(0)) // Rn_wb 6578 .addOperand(MI->getOperand(1)) // Rt 6579 .addOperand(MI->getOperand(2)) // Rn 6580 .addImm(Offset) // offset (skip GPR==zero_reg) 6581 .addOperand(MI->getOperand(5)) // pred 6582 .addOperand(MI->getOperand(6)) 6583 .addMemOperand(MMO); 6584 MI->eraseFromParent(); 6585 return BB; 6586 } 6587 case ARM::STRr_preidx: 6588 case ARM::STRBr_preidx: 6589 case ARM::STRH_preidx: { 6590 unsigned NewOpc; 6591 switch (MI->getOpcode()) { 6592 default: llvm_unreachable("unexpected opcode!"); 6593 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 6594 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 6595 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 6596 } 6597 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 6598 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 6599 MIB.addOperand(MI->getOperand(i)); 6600 MI->eraseFromParent(); 6601 return BB; 6602 } 6603 case ARM::ATOMIC_LOAD_ADD_I8: 6604 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6605 case ARM::ATOMIC_LOAD_ADD_I16: 6606 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6607 case ARM::ATOMIC_LOAD_ADD_I32: 6608 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6609 6610 case ARM::ATOMIC_LOAD_AND_I8: 6611 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6612 case ARM::ATOMIC_LOAD_AND_I16: 6613 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6614 case ARM::ATOMIC_LOAD_AND_I32: 6615 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6616 6617 case ARM::ATOMIC_LOAD_OR_I8: 6618 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6619 case ARM::ATOMIC_LOAD_OR_I16: 6620 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6621 case ARM::ATOMIC_LOAD_OR_I32: 6622 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6623 6624 case ARM::ATOMIC_LOAD_XOR_I8: 6625 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6626 case ARM::ATOMIC_LOAD_XOR_I16: 6627 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6628 case ARM::ATOMIC_LOAD_XOR_I32: 6629 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6630 6631 case ARM::ATOMIC_LOAD_NAND_I8: 6632 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6633 case ARM::ATOMIC_LOAD_NAND_I16: 6634 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6635 case ARM::ATOMIC_LOAD_NAND_I32: 6636 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6637 6638 case ARM::ATOMIC_LOAD_SUB_I8: 6639 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6640 case ARM::ATOMIC_LOAD_SUB_I16: 6641 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6642 case ARM::ATOMIC_LOAD_SUB_I32: 6643 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6644 6645 case ARM::ATOMIC_LOAD_MIN_I8: 6646 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 6647 case ARM::ATOMIC_LOAD_MIN_I16: 6648 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 6649 case ARM::ATOMIC_LOAD_MIN_I32: 6650 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 6651 6652 case ARM::ATOMIC_LOAD_MAX_I8: 6653 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 6654 case ARM::ATOMIC_LOAD_MAX_I16: 6655 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 6656 case ARM::ATOMIC_LOAD_MAX_I32: 6657 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 6658 6659 case ARM::ATOMIC_LOAD_UMIN_I8: 6660 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 6661 case ARM::ATOMIC_LOAD_UMIN_I16: 6662 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 6663 case ARM::ATOMIC_LOAD_UMIN_I32: 6664 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 6665 6666 case ARM::ATOMIC_LOAD_UMAX_I8: 6667 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 6668 case ARM::ATOMIC_LOAD_UMAX_I16: 6669 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 6670 case ARM::ATOMIC_LOAD_UMAX_I32: 6671 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 6672 6673 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 6674 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 6675 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 6676 6677 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 6678 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 6679 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 6680 6681 6682 case ARM::ATOMADD6432: 6683 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 6684 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 6685 /*NeedsCarry*/ true); 6686 case ARM::ATOMSUB6432: 6687 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 6688 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 6689 /*NeedsCarry*/ true); 6690 case ARM::ATOMOR6432: 6691 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 6692 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6693 case ARM::ATOMXOR6432: 6694 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 6695 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6696 case ARM::ATOMAND6432: 6697 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 6698 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6699 case ARM::ATOMSWAP6432: 6700 return EmitAtomicBinary64(MI, BB, 0, 0, false); 6701 case ARM::ATOMCMPXCHG6432: 6702 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 6703 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 6704 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 6705 6706 case ARM::tMOVCCr_pseudo: { 6707 // To "insert" a SELECT_CC instruction, we actually have to insert the 6708 // diamond control-flow pattern. The incoming instruction knows the 6709 // destination vreg to set, the condition code register to branch on, the 6710 // true/false values to select between, and a branch opcode to use. 6711 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6712 MachineFunction::iterator It = BB; 6713 ++It; 6714 6715 // thisMBB: 6716 // ... 6717 // TrueVal = ... 6718 // cmpTY ccX, r1, r2 6719 // bCC copy1MBB 6720 // fallthrough --> copy0MBB 6721 MachineBasicBlock *thisMBB = BB; 6722 MachineFunction *F = BB->getParent(); 6723 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6724 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6725 F->insert(It, copy0MBB); 6726 F->insert(It, sinkMBB); 6727 6728 // Transfer the remainder of BB and its successor edges to sinkMBB. 6729 sinkMBB->splice(sinkMBB->begin(), BB, 6730 llvm::next(MachineBasicBlock::iterator(MI)), 6731 BB->end()); 6732 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6733 6734 BB->addSuccessor(copy0MBB); 6735 BB->addSuccessor(sinkMBB); 6736 6737 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 6738 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 6739 6740 // copy0MBB: 6741 // %FalseValue = ... 6742 // # fallthrough to sinkMBB 6743 BB = copy0MBB; 6744 6745 // Update machine-CFG edges 6746 BB->addSuccessor(sinkMBB); 6747 6748 // sinkMBB: 6749 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6750 // ... 6751 BB = sinkMBB; 6752 BuildMI(*BB, BB->begin(), dl, 6753 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 6754 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 6755 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6756 6757 MI->eraseFromParent(); // The pseudo instruction is gone now. 6758 return BB; 6759 } 6760 6761 case ARM::BCCi64: 6762 case ARM::BCCZi64: { 6763 // If there is an unconditional branch to the other successor, remove it. 6764 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 6765 6766 // Compare both parts that make up the double comparison separately for 6767 // equality. 6768 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 6769 6770 unsigned LHS1 = MI->getOperand(1).getReg(); 6771 unsigned LHS2 = MI->getOperand(2).getReg(); 6772 if (RHSisZero) { 6773 AddDefaultPred(BuildMI(BB, dl, 6774 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6775 .addReg(LHS1).addImm(0)); 6776 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6777 .addReg(LHS2).addImm(0) 6778 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 6779 } else { 6780 unsigned RHS1 = MI->getOperand(3).getReg(); 6781 unsigned RHS2 = MI->getOperand(4).getReg(); 6782 AddDefaultPred(BuildMI(BB, dl, 6783 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6784 .addReg(LHS1).addReg(RHS1)); 6785 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6786 .addReg(LHS2).addReg(RHS2) 6787 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 6788 } 6789 6790 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 6791 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 6792 if (MI->getOperand(0).getImm() == ARMCC::NE) 6793 std::swap(destMBB, exitMBB); 6794 6795 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6796 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 6797 if (isThumb2) 6798 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 6799 else 6800 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 6801 6802 MI->eraseFromParent(); // The pseudo instruction is gone now. 6803 return BB; 6804 } 6805 6806 case ARM::Int_eh_sjlj_setjmp: 6807 case ARM::Int_eh_sjlj_setjmp_nofp: 6808 case ARM::tInt_eh_sjlj_setjmp: 6809 case ARM::t2Int_eh_sjlj_setjmp: 6810 case ARM::t2Int_eh_sjlj_setjmp_nofp: 6811 EmitSjLjDispatchBlock(MI, BB); 6812 return BB; 6813 6814 case ARM::ABS: 6815 case ARM::t2ABS: { 6816 // To insert an ABS instruction, we have to insert the 6817 // diamond control-flow pattern. The incoming instruction knows the 6818 // source vreg to test against 0, the destination vreg to set, 6819 // the condition code register to branch on, the 6820 // true/false values to select between, and a branch opcode to use. 6821 // It transforms 6822 // V1 = ABS V0 6823 // into 6824 // V2 = MOVS V0 6825 // BCC (branch to SinkBB if V0 >= 0) 6826 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 6827 // SinkBB: V1 = PHI(V2, V3) 6828 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6829 MachineFunction::iterator BBI = BB; 6830 ++BBI; 6831 MachineFunction *Fn = BB->getParent(); 6832 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 6833 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 6834 Fn->insert(BBI, RSBBB); 6835 Fn->insert(BBI, SinkBB); 6836 6837 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 6838 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 6839 bool isThumb2 = Subtarget->isThumb2(); 6840 MachineRegisterInfo &MRI = Fn->getRegInfo(); 6841 // In Thumb mode S must not be specified if source register is the SP or 6842 // PC and if destination register is the SP, so restrict register class 6843 unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? 6844 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6845 (const TargetRegisterClass*)&ARM::GPRRegClass); 6846 6847 // Transfer the remainder of BB and its successor edges to sinkMBB. 6848 SinkBB->splice(SinkBB->begin(), BB, 6849 llvm::next(MachineBasicBlock::iterator(MI)), 6850 BB->end()); 6851 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 6852 6853 BB->addSuccessor(RSBBB); 6854 BB->addSuccessor(SinkBB); 6855 6856 // fall through to SinkMBB 6857 RSBBB->addSuccessor(SinkBB); 6858 6859 // insert a cmp at the end of BB 6860 AddDefaultPred(BuildMI(BB, dl, 6861 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6862 .addReg(ABSSrcReg).addImm(0)); 6863 6864 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 6865 BuildMI(BB, dl, 6866 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 6867 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 6868 6869 // insert rsbri in RSBBB 6870 // Note: BCC and rsbri will be converted into predicated rsbmi 6871 // by if-conversion pass 6872 BuildMI(*RSBBB, RSBBB->begin(), dl, 6873 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 6874 .addReg(ABSSrcReg, RegState::Kill) 6875 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 6876 6877 // insert PHI in SinkBB, 6878 // reuse ABSDstReg to not change uses of ABS instruction 6879 BuildMI(*SinkBB, SinkBB->begin(), dl, 6880 TII->get(ARM::PHI), ABSDstReg) 6881 .addReg(NewRsbDstReg).addMBB(RSBBB) 6882 .addReg(ABSSrcReg).addMBB(BB); 6883 6884 // remove ABS instruction 6885 MI->eraseFromParent(); 6886 6887 // return last added BB 6888 return SinkBB; 6889 } 6890 case ARM::COPY_STRUCT_BYVAL_I32: 6891 ++NumLoopByVals; 6892 return EmitStructByval(MI, BB); 6893 } 6894} 6895 6896void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 6897 SDNode *Node) const { 6898 if (!MI->hasPostISelHook()) { 6899 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 6900 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 6901 return; 6902 } 6903 6904 const MCInstrDesc *MCID = &MI->getDesc(); 6905 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 6906 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 6907 // operand is still set to noreg. If needed, set the optional operand's 6908 // register to CPSR, and remove the redundant implicit def. 6909 // 6910 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 6911 6912 // Rename pseudo opcodes. 6913 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 6914 if (NewOpc) { 6915 const ARMBaseInstrInfo *TII = 6916 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 6917 MCID = &TII->get(NewOpc); 6918 6919 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 6920 "converted opcode should be the same except for cc_out"); 6921 6922 MI->setDesc(*MCID); 6923 6924 // Add the optional cc_out operand 6925 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 6926 } 6927 unsigned ccOutIdx = MCID->getNumOperands() - 1; 6928 6929 // Any ARM instruction that sets the 's' bit should specify an optional 6930 // "cc_out" operand in the last operand position. 6931 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 6932 assert(!NewOpc && "Optional cc_out operand required"); 6933 return; 6934 } 6935 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 6936 // since we already have an optional CPSR def. 6937 bool definesCPSR = false; 6938 bool deadCPSR = false; 6939 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 6940 i != e; ++i) { 6941 const MachineOperand &MO = MI->getOperand(i); 6942 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 6943 definesCPSR = true; 6944 if (MO.isDead()) 6945 deadCPSR = true; 6946 MI->RemoveOperand(i); 6947 break; 6948 } 6949 } 6950 if (!definesCPSR) { 6951 assert(!NewOpc && "Optional cc_out operand required"); 6952 return; 6953 } 6954 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 6955 if (deadCPSR) { 6956 assert(!MI->getOperand(ccOutIdx).getReg() && 6957 "expect uninitialized optional cc_out operand"); 6958 return; 6959 } 6960 6961 // If this instruction was defined with an optional CPSR def and its dag node 6962 // had a live implicit CPSR def, then activate the optional CPSR def. 6963 MachineOperand &MO = MI->getOperand(ccOutIdx); 6964 MO.setReg(ARM::CPSR); 6965 MO.setIsDef(true); 6966} 6967 6968//===----------------------------------------------------------------------===// 6969// ARM Optimization Hooks 6970//===----------------------------------------------------------------------===// 6971 6972// Helper function that checks if N is a null or all ones constant. 6973static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 6974 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 6975 if (!C) 6976 return false; 6977 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 6978} 6979 6980// Return true if N is conditionally 0 or all ones. 6981// Detects these expressions where cc is an i1 value: 6982// 6983// (select cc 0, y) [AllOnes=0] 6984// (select cc y, 0) [AllOnes=0] 6985// (zext cc) [AllOnes=0] 6986// (sext cc) [AllOnes=0/1] 6987// (select cc -1, y) [AllOnes=1] 6988// (select cc y, -1) [AllOnes=1] 6989// 6990// Invert is set when N is the null/all ones constant when CC is false. 6991// OtherOp is set to the alternative value of N. 6992static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 6993 SDValue &CC, bool &Invert, 6994 SDValue &OtherOp, 6995 SelectionDAG &DAG) { 6996 switch (N->getOpcode()) { 6997 default: return false; 6998 case ISD::SELECT: { 6999 CC = N->getOperand(0); 7000 SDValue N1 = N->getOperand(1); 7001 SDValue N2 = N->getOperand(2); 7002 if (isZeroOrAllOnes(N1, AllOnes)) { 7003 Invert = false; 7004 OtherOp = N2; 7005 return true; 7006 } 7007 if (isZeroOrAllOnes(N2, AllOnes)) { 7008 Invert = true; 7009 OtherOp = N1; 7010 return true; 7011 } 7012 return false; 7013 } 7014 case ISD::ZERO_EXTEND: 7015 // (zext cc) can never be the all ones value. 7016 if (AllOnes) 7017 return false; 7018 // Fall through. 7019 case ISD::SIGN_EXTEND: { 7020 EVT VT = N->getValueType(0); 7021 CC = N->getOperand(0); 7022 if (CC.getValueType() != MVT::i1) 7023 return false; 7024 Invert = !AllOnes; 7025 if (AllOnes) 7026 // When looking for an AllOnes constant, N is an sext, and the 'other' 7027 // value is 0. 7028 OtherOp = DAG.getConstant(0, VT); 7029 else if (N->getOpcode() == ISD::ZERO_EXTEND) 7030 // When looking for a 0 constant, N can be zext or sext. 7031 OtherOp = DAG.getConstant(1, VT); 7032 else 7033 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); 7034 return true; 7035 } 7036 } 7037} 7038 7039// Combine a constant select operand into its use: 7040// 7041// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 7042// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 7043// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 7044// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 7045// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 7046// 7047// The transform is rejected if the select doesn't have a constant operand that 7048// is null, or all ones when AllOnes is set. 7049// 7050// Also recognize sext/zext from i1: 7051// 7052// (add (zext cc), x) -> (select cc (add x, 1), x) 7053// (add (sext cc), x) -> (select cc (add x, -1), x) 7054// 7055// These transformations eventually create predicated instructions. 7056// 7057// @param N The node to transform. 7058// @param Slct The N operand that is a select. 7059// @param OtherOp The other N operand (x above). 7060// @param DCI Context. 7061// @param AllOnes Require the select constant to be all ones instead of null. 7062// @returns The new node, or SDValue() on failure. 7063static 7064SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 7065 TargetLowering::DAGCombinerInfo &DCI, 7066 bool AllOnes = false) { 7067 SelectionDAG &DAG = DCI.DAG; 7068 EVT VT = N->getValueType(0); 7069 SDValue NonConstantVal; 7070 SDValue CCOp; 7071 bool SwapSelectOps; 7072 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 7073 NonConstantVal, DAG)) 7074 return SDValue(); 7075 7076 // Slct is now know to be the desired identity constant when CC is true. 7077 SDValue TrueVal = OtherOp; 7078 SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, 7079 OtherOp, NonConstantVal); 7080 // Unless SwapSelectOps says CC should be false. 7081 if (SwapSelectOps) 7082 std::swap(TrueVal, FalseVal); 7083 7084 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 7085 CCOp, TrueVal, FalseVal); 7086} 7087 7088// Attempt combineSelectAndUse on each operand of a commutative operator N. 7089static 7090SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 7091 TargetLowering::DAGCombinerInfo &DCI) { 7092 SDValue N0 = N->getOperand(0); 7093 SDValue N1 = N->getOperand(1); 7094 if (N0.getNode()->hasOneUse()) { 7095 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 7096 if (Result.getNode()) 7097 return Result; 7098 } 7099 if (N1.getNode()->hasOneUse()) { 7100 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 7101 if (Result.getNode()) 7102 return Result; 7103 } 7104 return SDValue(); 7105} 7106 7107// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 7108// (only after legalization). 7109static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 7110 TargetLowering::DAGCombinerInfo &DCI, 7111 const ARMSubtarget *Subtarget) { 7112 7113 // Only perform optimization if after legalize, and if NEON is available. We 7114 // also expected both operands to be BUILD_VECTORs. 7115 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 7116 || N0.getOpcode() != ISD::BUILD_VECTOR 7117 || N1.getOpcode() != ISD::BUILD_VECTOR) 7118 return SDValue(); 7119 7120 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 7121 EVT VT = N->getValueType(0); 7122 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 7123 return SDValue(); 7124 7125 // Check that the vector operands are of the right form. 7126 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 7127 // operands, where N is the size of the formed vector. 7128 // Each EXTRACT_VECTOR should have the same input vector and odd or even 7129 // index such that we have a pair wise add pattern. 7130 7131 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 7132 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 7133 return SDValue(); 7134 SDValue Vec = N0->getOperand(0)->getOperand(0); 7135 SDNode *V = Vec.getNode(); 7136 unsigned nextIndex = 0; 7137 7138 // For each operands to the ADD which are BUILD_VECTORs, 7139 // check to see if each of their operands are an EXTRACT_VECTOR with 7140 // the same vector and appropriate index. 7141 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 7142 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 7143 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 7144 7145 SDValue ExtVec0 = N0->getOperand(i); 7146 SDValue ExtVec1 = N1->getOperand(i); 7147 7148 // First operand is the vector, verify its the same. 7149 if (V != ExtVec0->getOperand(0).getNode() || 7150 V != ExtVec1->getOperand(0).getNode()) 7151 return SDValue(); 7152 7153 // Second is the constant, verify its correct. 7154 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 7155 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 7156 7157 // For the constant, we want to see all the even or all the odd. 7158 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 7159 || C1->getZExtValue() != nextIndex+1) 7160 return SDValue(); 7161 7162 // Increment index. 7163 nextIndex+=2; 7164 } else 7165 return SDValue(); 7166 } 7167 7168 // Create VPADDL node. 7169 SelectionDAG &DAG = DCI.DAG; 7170 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7171 7172 // Build operand list. 7173 SmallVector<SDValue, 8> Ops; 7174 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 7175 TLI.getPointerTy())); 7176 7177 // Input is the vector. 7178 Ops.push_back(Vec); 7179 7180 // Get widened type and narrowed type. 7181 MVT widenType; 7182 unsigned numElem = VT.getVectorNumElements(); 7183 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 7184 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 7185 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 7186 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 7187 default: 7188 llvm_unreachable("Invalid vector element type for padd optimization."); 7189 } 7190 7191 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 7192 widenType, &Ops[0], Ops.size()); 7193 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); 7194} 7195 7196/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 7197/// operands N0 and N1. This is a helper for PerformADDCombine that is 7198/// called with the default operands, and if that fails, with commuted 7199/// operands. 7200static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 7201 TargetLowering::DAGCombinerInfo &DCI, 7202 const ARMSubtarget *Subtarget){ 7203 7204 // Attempt to create vpaddl for this add. 7205 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 7206 if (Result.getNode()) 7207 return Result; 7208 7209 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 7210 if (N0.getNode()->hasOneUse()) { 7211 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 7212 if (Result.getNode()) return Result; 7213 } 7214 return SDValue(); 7215} 7216 7217/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 7218/// 7219static SDValue PerformADDCombine(SDNode *N, 7220 TargetLowering::DAGCombinerInfo &DCI, 7221 const ARMSubtarget *Subtarget) { 7222 SDValue N0 = N->getOperand(0); 7223 SDValue N1 = N->getOperand(1); 7224 7225 // First try with the default operand order. 7226 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 7227 if (Result.getNode()) 7228 return Result; 7229 7230 // If that didn't work, try again with the operands commuted. 7231 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 7232} 7233 7234/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 7235/// 7236static SDValue PerformSUBCombine(SDNode *N, 7237 TargetLowering::DAGCombinerInfo &DCI) { 7238 SDValue N0 = N->getOperand(0); 7239 SDValue N1 = N->getOperand(1); 7240 7241 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 7242 if (N1.getNode()->hasOneUse()) { 7243 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 7244 if (Result.getNode()) return Result; 7245 } 7246 7247 return SDValue(); 7248} 7249 7250/// PerformVMULCombine 7251/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 7252/// special multiplier accumulator forwarding. 7253/// vmul d3, d0, d2 7254/// vmla d3, d1, d2 7255/// is faster than 7256/// vadd d3, d0, d1 7257/// vmul d3, d3, d2 7258static SDValue PerformVMULCombine(SDNode *N, 7259 TargetLowering::DAGCombinerInfo &DCI, 7260 const ARMSubtarget *Subtarget) { 7261 if (!Subtarget->hasVMLxForwarding()) 7262 return SDValue(); 7263 7264 SelectionDAG &DAG = DCI.DAG; 7265 SDValue N0 = N->getOperand(0); 7266 SDValue N1 = N->getOperand(1); 7267 unsigned Opcode = N0.getOpcode(); 7268 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 7269 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 7270 Opcode = N1.getOpcode(); 7271 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 7272 Opcode != ISD::FADD && Opcode != ISD::FSUB) 7273 return SDValue(); 7274 std::swap(N0, N1); 7275 } 7276 7277 EVT VT = N->getValueType(0); 7278 DebugLoc DL = N->getDebugLoc(); 7279 SDValue N00 = N0->getOperand(0); 7280 SDValue N01 = N0->getOperand(1); 7281 return DAG.getNode(Opcode, DL, VT, 7282 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 7283 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 7284} 7285 7286static SDValue PerformMULCombine(SDNode *N, 7287 TargetLowering::DAGCombinerInfo &DCI, 7288 const ARMSubtarget *Subtarget) { 7289 SelectionDAG &DAG = DCI.DAG; 7290 7291 if (Subtarget->isThumb1Only()) 7292 return SDValue(); 7293 7294 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 7295 return SDValue(); 7296 7297 EVT VT = N->getValueType(0); 7298 if (VT.is64BitVector() || VT.is128BitVector()) 7299 return PerformVMULCombine(N, DCI, Subtarget); 7300 if (VT != MVT::i32) 7301 return SDValue(); 7302 7303 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7304 if (!C) 7305 return SDValue(); 7306 7307 int64_t MulAmt = C->getSExtValue(); 7308 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 7309 7310 ShiftAmt = ShiftAmt & (32 - 1); 7311 SDValue V = N->getOperand(0); 7312 DebugLoc DL = N->getDebugLoc(); 7313 7314 SDValue Res; 7315 MulAmt >>= ShiftAmt; 7316 7317 if (MulAmt >= 0) { 7318 if (isPowerOf2_32(MulAmt - 1)) { 7319 // (mul x, 2^N + 1) => (add (shl x, N), x) 7320 Res = DAG.getNode(ISD::ADD, DL, VT, 7321 V, 7322 DAG.getNode(ISD::SHL, DL, VT, 7323 V, 7324 DAG.getConstant(Log2_32(MulAmt - 1), 7325 MVT::i32))); 7326 } else if (isPowerOf2_32(MulAmt + 1)) { 7327 // (mul x, 2^N - 1) => (sub (shl x, N), x) 7328 Res = DAG.getNode(ISD::SUB, DL, VT, 7329 DAG.getNode(ISD::SHL, DL, VT, 7330 V, 7331 DAG.getConstant(Log2_32(MulAmt + 1), 7332 MVT::i32)), 7333 V); 7334 } else 7335 return SDValue(); 7336 } else { 7337 uint64_t MulAmtAbs = -MulAmt; 7338 if (isPowerOf2_32(MulAmtAbs + 1)) { 7339 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 7340 Res = DAG.getNode(ISD::SUB, DL, VT, 7341 V, 7342 DAG.getNode(ISD::SHL, DL, VT, 7343 V, 7344 DAG.getConstant(Log2_32(MulAmtAbs + 1), 7345 MVT::i32))); 7346 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 7347 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 7348 Res = DAG.getNode(ISD::ADD, DL, VT, 7349 V, 7350 DAG.getNode(ISD::SHL, DL, VT, 7351 V, 7352 DAG.getConstant(Log2_32(MulAmtAbs-1), 7353 MVT::i32))); 7354 Res = DAG.getNode(ISD::SUB, DL, VT, 7355 DAG.getConstant(0, MVT::i32),Res); 7356 7357 } else 7358 return SDValue(); 7359 } 7360 7361 if (ShiftAmt != 0) 7362 Res = DAG.getNode(ISD::SHL, DL, VT, 7363 Res, DAG.getConstant(ShiftAmt, MVT::i32)); 7364 7365 // Do not add new nodes to DAG combiner worklist. 7366 DCI.CombineTo(N, Res, false); 7367 return SDValue(); 7368} 7369 7370static SDValue PerformANDCombine(SDNode *N, 7371 TargetLowering::DAGCombinerInfo &DCI, 7372 const ARMSubtarget *Subtarget) { 7373 7374 // Attempt to use immediate-form VBIC 7375 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 7376 DebugLoc dl = N->getDebugLoc(); 7377 EVT VT = N->getValueType(0); 7378 SelectionDAG &DAG = DCI.DAG; 7379 7380 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 7381 return SDValue(); 7382 7383 APInt SplatBits, SplatUndef; 7384 unsigned SplatBitSize; 7385 bool HasAnyUndefs; 7386 if (BVN && 7387 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 7388 if (SplatBitSize <= 64) { 7389 EVT VbicVT; 7390 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 7391 SplatUndef.getZExtValue(), SplatBitSize, 7392 DAG, VbicVT, VT.is128BitVector(), 7393 OtherModImm); 7394 if (Val.getNode()) { 7395 SDValue Input = 7396 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 7397 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 7398 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 7399 } 7400 } 7401 } 7402 7403 if (!Subtarget->isThumb1Only()) { 7404 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 7405 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 7406 if (Result.getNode()) 7407 return Result; 7408 } 7409 7410 return SDValue(); 7411} 7412 7413/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 7414static SDValue PerformORCombine(SDNode *N, 7415 TargetLowering::DAGCombinerInfo &DCI, 7416 const ARMSubtarget *Subtarget) { 7417 // Attempt to use immediate-form VORR 7418 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 7419 DebugLoc dl = N->getDebugLoc(); 7420 EVT VT = N->getValueType(0); 7421 SelectionDAG &DAG = DCI.DAG; 7422 7423 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 7424 return SDValue(); 7425 7426 APInt SplatBits, SplatUndef; 7427 unsigned SplatBitSize; 7428 bool HasAnyUndefs; 7429 if (BVN && Subtarget->hasNEON() && 7430 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 7431 if (SplatBitSize <= 64) { 7432 EVT VorrVT; 7433 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 7434 SplatUndef.getZExtValue(), SplatBitSize, 7435 DAG, VorrVT, VT.is128BitVector(), 7436 OtherModImm); 7437 if (Val.getNode()) { 7438 SDValue Input = 7439 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 7440 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 7441 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 7442 } 7443 } 7444 } 7445 7446 if (!Subtarget->isThumb1Only()) { 7447 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 7448 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 7449 if (Result.getNode()) 7450 return Result; 7451 } 7452 7453 // The code below optimizes (or (and X, Y), Z). 7454 // The AND operand needs to have a single user to make these optimizations 7455 // profitable. 7456 SDValue N0 = N->getOperand(0); 7457 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 7458 return SDValue(); 7459 SDValue N1 = N->getOperand(1); 7460 7461 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 7462 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 7463 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 7464 APInt SplatUndef; 7465 unsigned SplatBitSize; 7466 bool HasAnyUndefs; 7467 7468 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 7469 APInt SplatBits0; 7470 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 7471 HasAnyUndefs) && !HasAnyUndefs) { 7472 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 7473 APInt SplatBits1; 7474 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 7475 HasAnyUndefs) && !HasAnyUndefs && 7476 SplatBits0 == ~SplatBits1) { 7477 // Canonicalize the vector type to make instruction selection simpler. 7478 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 7479 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 7480 N0->getOperand(1), N0->getOperand(0), 7481 N1->getOperand(0)); 7482 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 7483 } 7484 } 7485 } 7486 7487 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 7488 // reasonable. 7489 7490 // BFI is only available on V6T2+ 7491 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 7492 return SDValue(); 7493 7494 DebugLoc DL = N->getDebugLoc(); 7495 // 1) or (and A, mask), val => ARMbfi A, val, mask 7496 // iff (val & mask) == val 7497 // 7498 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 7499 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 7500 // && mask == ~mask2 7501 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 7502 // && ~mask == mask2 7503 // (i.e., copy a bitfield value into another bitfield of the same width) 7504 7505 if (VT != MVT::i32) 7506 return SDValue(); 7507 7508 SDValue N00 = N0.getOperand(0); 7509 7510 // The value and the mask need to be constants so we can verify this is 7511 // actually a bitfield set. If the mask is 0xffff, we can do better 7512 // via a movt instruction, so don't use BFI in that case. 7513 SDValue MaskOp = N0.getOperand(1); 7514 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 7515 if (!MaskC) 7516 return SDValue(); 7517 unsigned Mask = MaskC->getZExtValue(); 7518 if (Mask == 0xffff) 7519 return SDValue(); 7520 SDValue Res; 7521 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 7522 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 7523 if (N1C) { 7524 unsigned Val = N1C->getZExtValue(); 7525 if ((Val & ~Mask) != Val) 7526 return SDValue(); 7527 7528 if (ARM::isBitFieldInvertedMask(Mask)) { 7529 Val >>= CountTrailingZeros_32(~Mask); 7530 7531 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 7532 DAG.getConstant(Val, MVT::i32), 7533 DAG.getConstant(Mask, MVT::i32)); 7534 7535 // Do not add new nodes to DAG combiner worklist. 7536 DCI.CombineTo(N, Res, false); 7537 return SDValue(); 7538 } 7539 } else if (N1.getOpcode() == ISD::AND) { 7540 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 7541 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 7542 if (!N11C) 7543 return SDValue(); 7544 unsigned Mask2 = N11C->getZExtValue(); 7545 7546 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 7547 // as is to match. 7548 if (ARM::isBitFieldInvertedMask(Mask) && 7549 (Mask == ~Mask2)) { 7550 // The pack halfword instruction works better for masks that fit it, 7551 // so use that when it's available. 7552 if (Subtarget->hasT2ExtractPack() && 7553 (Mask == 0xffff || Mask == 0xffff0000)) 7554 return SDValue(); 7555 // 2a 7556 unsigned amt = CountTrailingZeros_32(Mask2); 7557 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 7558 DAG.getConstant(amt, MVT::i32)); 7559 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 7560 DAG.getConstant(Mask, MVT::i32)); 7561 // Do not add new nodes to DAG combiner worklist. 7562 DCI.CombineTo(N, Res, false); 7563 return SDValue(); 7564 } else if (ARM::isBitFieldInvertedMask(~Mask) && 7565 (~Mask == Mask2)) { 7566 // The pack halfword instruction works better for masks that fit it, 7567 // so use that when it's available. 7568 if (Subtarget->hasT2ExtractPack() && 7569 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 7570 return SDValue(); 7571 // 2b 7572 unsigned lsb = CountTrailingZeros_32(Mask); 7573 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 7574 DAG.getConstant(lsb, MVT::i32)); 7575 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 7576 DAG.getConstant(Mask2, MVT::i32)); 7577 // Do not add new nodes to DAG combiner worklist. 7578 DCI.CombineTo(N, Res, false); 7579 return SDValue(); 7580 } 7581 } 7582 7583 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 7584 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 7585 ARM::isBitFieldInvertedMask(~Mask)) { 7586 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 7587 // where lsb(mask) == #shamt and masked bits of B are known zero. 7588 SDValue ShAmt = N00.getOperand(1); 7589 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 7590 unsigned LSB = CountTrailingZeros_32(Mask); 7591 if (ShAmtC != LSB) 7592 return SDValue(); 7593 7594 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 7595 DAG.getConstant(~Mask, MVT::i32)); 7596 7597 // Do not add new nodes to DAG combiner worklist. 7598 DCI.CombineTo(N, Res, false); 7599 } 7600 7601 return SDValue(); 7602} 7603 7604static SDValue PerformXORCombine(SDNode *N, 7605 TargetLowering::DAGCombinerInfo &DCI, 7606 const ARMSubtarget *Subtarget) { 7607 EVT VT = N->getValueType(0); 7608 SelectionDAG &DAG = DCI.DAG; 7609 7610 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 7611 return SDValue(); 7612 7613 if (!Subtarget->isThumb1Only()) { 7614 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 7615 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 7616 if (Result.getNode()) 7617 return Result; 7618 } 7619 7620 return SDValue(); 7621} 7622 7623/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 7624/// the bits being cleared by the AND are not demanded by the BFI. 7625static SDValue PerformBFICombine(SDNode *N, 7626 TargetLowering::DAGCombinerInfo &DCI) { 7627 SDValue N1 = N->getOperand(1); 7628 if (N1.getOpcode() == ISD::AND) { 7629 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 7630 if (!N11C) 7631 return SDValue(); 7632 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 7633 unsigned LSB = CountTrailingZeros_32(~InvMask); 7634 unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; 7635 unsigned Mask = (1 << Width)-1; 7636 unsigned Mask2 = N11C->getZExtValue(); 7637 if ((Mask & (~Mask2)) == 0) 7638 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 7639 N->getOperand(0), N1.getOperand(0), 7640 N->getOperand(2)); 7641 } 7642 return SDValue(); 7643} 7644 7645/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 7646/// ARMISD::VMOVRRD. 7647static SDValue PerformVMOVRRDCombine(SDNode *N, 7648 TargetLowering::DAGCombinerInfo &DCI) { 7649 // vmovrrd(vmovdrr x, y) -> x,y 7650 SDValue InDouble = N->getOperand(0); 7651 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 7652 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 7653 7654 // vmovrrd(load f64) -> (load i32), (load i32) 7655 SDNode *InNode = InDouble.getNode(); 7656 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 7657 InNode->getValueType(0) == MVT::f64 && 7658 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 7659 !cast<LoadSDNode>(InNode)->isVolatile()) { 7660 // TODO: Should this be done for non-FrameIndex operands? 7661 LoadSDNode *LD = cast<LoadSDNode>(InNode); 7662 7663 SelectionDAG &DAG = DCI.DAG; 7664 DebugLoc DL = LD->getDebugLoc(); 7665 SDValue BasePtr = LD->getBasePtr(); 7666 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 7667 LD->getPointerInfo(), LD->isVolatile(), 7668 LD->isNonTemporal(), LD->isInvariant(), 7669 LD->getAlignment()); 7670 7671 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 7672 DAG.getConstant(4, MVT::i32)); 7673 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 7674 LD->getPointerInfo(), LD->isVolatile(), 7675 LD->isNonTemporal(), LD->isInvariant(), 7676 std::min(4U, LD->getAlignment() / 2)); 7677 7678 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 7679 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 7680 DCI.RemoveFromWorklist(LD); 7681 DAG.DeleteNode(LD); 7682 return Result; 7683 } 7684 7685 return SDValue(); 7686} 7687 7688/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 7689/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 7690static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 7691 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 7692 SDValue Op0 = N->getOperand(0); 7693 SDValue Op1 = N->getOperand(1); 7694 if (Op0.getOpcode() == ISD::BITCAST) 7695 Op0 = Op0.getOperand(0); 7696 if (Op1.getOpcode() == ISD::BITCAST) 7697 Op1 = Op1.getOperand(0); 7698 if (Op0.getOpcode() == ARMISD::VMOVRRD && 7699 Op0.getNode() == Op1.getNode() && 7700 Op0.getResNo() == 0 && Op1.getResNo() == 1) 7701 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 7702 N->getValueType(0), Op0.getOperand(0)); 7703 return SDValue(); 7704} 7705 7706/// PerformSTORECombine - Target-specific dag combine xforms for 7707/// ISD::STORE. 7708static SDValue PerformSTORECombine(SDNode *N, 7709 TargetLowering::DAGCombinerInfo &DCI) { 7710 StoreSDNode *St = cast<StoreSDNode>(N); 7711 if (St->isVolatile()) 7712 return SDValue(); 7713 7714 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 7715 // pack all of the elements in one place. Next, store to memory in fewer 7716 // chunks. 7717 SDValue StVal = St->getValue(); 7718 EVT VT = StVal.getValueType(); 7719 if (St->isTruncatingStore() && VT.isVector()) { 7720 SelectionDAG &DAG = DCI.DAG; 7721 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7722 EVT StVT = St->getMemoryVT(); 7723 unsigned NumElems = VT.getVectorNumElements(); 7724 assert(StVT != VT && "Cannot truncate to the same type"); 7725 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 7726 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 7727 7728 // From, To sizes and ElemCount must be pow of two 7729 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 7730 7731 // We are going to use the original vector elt for storing. 7732 // Accumulated smaller vector elements must be a multiple of the store size. 7733 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 7734 7735 unsigned SizeRatio = FromEltSz / ToEltSz; 7736 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 7737 7738 // Create a type on which we perform the shuffle. 7739 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 7740 NumElems*SizeRatio); 7741 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 7742 7743 DebugLoc DL = St->getDebugLoc(); 7744 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 7745 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 7746 for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; 7747 7748 // Can't shuffle using an illegal type. 7749 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 7750 7751 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 7752 DAG.getUNDEF(WideVec.getValueType()), 7753 ShuffleVec.data()); 7754 // At this point all of the data is stored at the bottom of the 7755 // register. We now need to save it to mem. 7756 7757 // Find the largest store unit 7758 MVT StoreType = MVT::i8; 7759 for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; 7760 tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { 7761 MVT Tp = (MVT::SimpleValueType)tp; 7762 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 7763 StoreType = Tp; 7764 } 7765 // Didn't find a legal store type. 7766 if (!TLI.isTypeLegal(StoreType)) 7767 return SDValue(); 7768 7769 // Bitcast the original vector into a vector of store-size units 7770 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 7771 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 7772 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 7773 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 7774 SmallVector<SDValue, 8> Chains; 7775 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, 7776 TLI.getPointerTy()); 7777 SDValue BasePtr = St->getBasePtr(); 7778 7779 // Perform one or more big stores into memory. 7780 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 7781 for (unsigned I = 0; I < E; I++) { 7782 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 7783 StoreType, ShuffWide, 7784 DAG.getIntPtrConstant(I)); 7785 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 7786 St->getPointerInfo(), St->isVolatile(), 7787 St->isNonTemporal(), St->getAlignment()); 7788 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 7789 Increment); 7790 Chains.push_back(Ch); 7791 } 7792 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], 7793 Chains.size()); 7794 } 7795 7796 if (!ISD::isNormalStore(St)) 7797 return SDValue(); 7798 7799 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 7800 // ARM stores of arguments in the same cache line. 7801 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 7802 StVal.getNode()->hasOneUse()) { 7803 SelectionDAG &DAG = DCI.DAG; 7804 DebugLoc DL = St->getDebugLoc(); 7805 SDValue BasePtr = St->getBasePtr(); 7806 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 7807 StVal.getNode()->getOperand(0), BasePtr, 7808 St->getPointerInfo(), St->isVolatile(), 7809 St->isNonTemporal(), St->getAlignment()); 7810 7811 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 7812 DAG.getConstant(4, MVT::i32)); 7813 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 7814 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 7815 St->isNonTemporal(), 7816 std::min(4U, St->getAlignment() / 2)); 7817 } 7818 7819 if (StVal.getValueType() != MVT::i64 || 7820 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 7821 return SDValue(); 7822 7823 // Bitcast an i64 store extracted from a vector to f64. 7824 // Otherwise, the i64 value will be legalized to a pair of i32 values. 7825 SelectionDAG &DAG = DCI.DAG; 7826 DebugLoc dl = StVal.getDebugLoc(); 7827 SDValue IntVec = StVal.getOperand(0); 7828 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 7829 IntVec.getValueType().getVectorNumElements()); 7830 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 7831 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 7832 Vec, StVal.getOperand(1)); 7833 dl = N->getDebugLoc(); 7834 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 7835 // Make the DAGCombiner fold the bitcasts. 7836 DCI.AddToWorklist(Vec.getNode()); 7837 DCI.AddToWorklist(ExtElt.getNode()); 7838 DCI.AddToWorklist(V.getNode()); 7839 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 7840 St->getPointerInfo(), St->isVolatile(), 7841 St->isNonTemporal(), St->getAlignment(), 7842 St->getTBAAInfo()); 7843} 7844 7845/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 7846/// are normal, non-volatile loads. If so, it is profitable to bitcast an 7847/// i64 vector to have f64 elements, since the value can then be loaded 7848/// directly into a VFP register. 7849static bool hasNormalLoadOperand(SDNode *N) { 7850 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 7851 for (unsigned i = 0; i < NumElts; ++i) { 7852 SDNode *Elt = N->getOperand(i).getNode(); 7853 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 7854 return true; 7855 } 7856 return false; 7857} 7858 7859/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 7860/// ISD::BUILD_VECTOR. 7861static SDValue PerformBUILD_VECTORCombine(SDNode *N, 7862 TargetLowering::DAGCombinerInfo &DCI){ 7863 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 7864 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 7865 // into a pair of GPRs, which is fine when the value is used as a scalar, 7866 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 7867 SelectionDAG &DAG = DCI.DAG; 7868 if (N->getNumOperands() == 2) { 7869 SDValue RV = PerformVMOVDRRCombine(N, DAG); 7870 if (RV.getNode()) 7871 return RV; 7872 } 7873 7874 // Load i64 elements as f64 values so that type legalization does not split 7875 // them up into i32 values. 7876 EVT VT = N->getValueType(0); 7877 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 7878 return SDValue(); 7879 DebugLoc dl = N->getDebugLoc(); 7880 SmallVector<SDValue, 8> Ops; 7881 unsigned NumElts = VT.getVectorNumElements(); 7882 for (unsigned i = 0; i < NumElts; ++i) { 7883 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 7884 Ops.push_back(V); 7885 // Make the DAGCombiner fold the bitcast. 7886 DCI.AddToWorklist(V.getNode()); 7887 } 7888 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 7889 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 7890 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 7891} 7892 7893/// PerformInsertEltCombine - Target-specific dag combine xforms for 7894/// ISD::INSERT_VECTOR_ELT. 7895static SDValue PerformInsertEltCombine(SDNode *N, 7896 TargetLowering::DAGCombinerInfo &DCI) { 7897 // Bitcast an i64 load inserted into a vector to f64. 7898 // Otherwise, the i64 value will be legalized to a pair of i32 values. 7899 EVT VT = N->getValueType(0); 7900 SDNode *Elt = N->getOperand(1).getNode(); 7901 if (VT.getVectorElementType() != MVT::i64 || 7902 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 7903 return SDValue(); 7904 7905 SelectionDAG &DAG = DCI.DAG; 7906 DebugLoc dl = N->getDebugLoc(); 7907 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 7908 VT.getVectorNumElements()); 7909 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 7910 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 7911 // Make the DAGCombiner fold the bitcasts. 7912 DCI.AddToWorklist(Vec.getNode()); 7913 DCI.AddToWorklist(V.getNode()); 7914 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 7915 Vec, V, N->getOperand(2)); 7916 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 7917} 7918 7919/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 7920/// ISD::VECTOR_SHUFFLE. 7921static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 7922 // The LLVM shufflevector instruction does not require the shuffle mask 7923 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 7924 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 7925 // operands do not match the mask length, they are extended by concatenating 7926 // them with undef vectors. That is probably the right thing for other 7927 // targets, but for NEON it is better to concatenate two double-register 7928 // size vector operands into a single quad-register size vector. Do that 7929 // transformation here: 7930 // shuffle(concat(v1, undef), concat(v2, undef)) -> 7931 // shuffle(concat(v1, v2), undef) 7932 SDValue Op0 = N->getOperand(0); 7933 SDValue Op1 = N->getOperand(1); 7934 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 7935 Op1.getOpcode() != ISD::CONCAT_VECTORS || 7936 Op0.getNumOperands() != 2 || 7937 Op1.getNumOperands() != 2) 7938 return SDValue(); 7939 SDValue Concat0Op1 = Op0.getOperand(1); 7940 SDValue Concat1Op1 = Op1.getOperand(1); 7941 if (Concat0Op1.getOpcode() != ISD::UNDEF || 7942 Concat1Op1.getOpcode() != ISD::UNDEF) 7943 return SDValue(); 7944 // Skip the transformation if any of the types are illegal. 7945 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7946 EVT VT = N->getValueType(0); 7947 if (!TLI.isTypeLegal(VT) || 7948 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 7949 !TLI.isTypeLegal(Concat1Op1.getValueType())) 7950 return SDValue(); 7951 7952 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 7953 Op0.getOperand(0), Op1.getOperand(0)); 7954 // Translate the shuffle mask. 7955 SmallVector<int, 16> NewMask; 7956 unsigned NumElts = VT.getVectorNumElements(); 7957 unsigned HalfElts = NumElts/2; 7958 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7959 for (unsigned n = 0; n < NumElts; ++n) { 7960 int MaskElt = SVN->getMaskElt(n); 7961 int NewElt = -1; 7962 if (MaskElt < (int)HalfElts) 7963 NewElt = MaskElt; 7964 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 7965 NewElt = HalfElts + MaskElt - NumElts; 7966 NewMask.push_back(NewElt); 7967 } 7968 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 7969 DAG.getUNDEF(VT), NewMask.data()); 7970} 7971 7972/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 7973/// NEON load/store intrinsics to merge base address updates. 7974static SDValue CombineBaseUpdate(SDNode *N, 7975 TargetLowering::DAGCombinerInfo &DCI) { 7976 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 7977 return SDValue(); 7978 7979 SelectionDAG &DAG = DCI.DAG; 7980 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 7981 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 7982 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 7983 SDValue Addr = N->getOperand(AddrOpIdx); 7984 7985 // Search for a use of the address operand that is an increment. 7986 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 7987 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 7988 SDNode *User = *UI; 7989 if (User->getOpcode() != ISD::ADD || 7990 UI.getUse().getResNo() != Addr.getResNo()) 7991 continue; 7992 7993 // Check that the add is independent of the load/store. Otherwise, folding 7994 // it would create a cycle. 7995 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 7996 continue; 7997 7998 // Find the new opcode for the updating load/store. 7999 bool isLoad = true; 8000 bool isLaneOp = false; 8001 unsigned NewOpc = 0; 8002 unsigned NumVecs = 0; 8003 if (isIntrinsic) { 8004 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8005 switch (IntNo) { 8006 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 8007 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 8008 NumVecs = 1; break; 8009 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 8010 NumVecs = 2; break; 8011 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 8012 NumVecs = 3; break; 8013 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 8014 NumVecs = 4; break; 8015 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 8016 NumVecs = 2; isLaneOp = true; break; 8017 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 8018 NumVecs = 3; isLaneOp = true; break; 8019 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 8020 NumVecs = 4; isLaneOp = true; break; 8021 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 8022 NumVecs = 1; isLoad = false; break; 8023 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 8024 NumVecs = 2; isLoad = false; break; 8025 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 8026 NumVecs = 3; isLoad = false; break; 8027 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 8028 NumVecs = 4; isLoad = false; break; 8029 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 8030 NumVecs = 2; isLoad = false; isLaneOp = true; break; 8031 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 8032 NumVecs = 3; isLoad = false; isLaneOp = true; break; 8033 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 8034 NumVecs = 4; isLoad = false; isLaneOp = true; break; 8035 } 8036 } else { 8037 isLaneOp = true; 8038 switch (N->getOpcode()) { 8039 default: llvm_unreachable("unexpected opcode for Neon base update"); 8040 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 8041 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 8042 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 8043 } 8044 } 8045 8046 // Find the size of memory referenced by the load/store. 8047 EVT VecTy; 8048 if (isLoad) 8049 VecTy = N->getValueType(0); 8050 else 8051 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 8052 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 8053 if (isLaneOp) 8054 NumBytes /= VecTy.getVectorNumElements(); 8055 8056 // If the increment is a constant, it must match the memory ref size. 8057 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 8058 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 8059 uint64_t IncVal = CInc->getZExtValue(); 8060 if (IncVal != NumBytes) 8061 continue; 8062 } else if (NumBytes >= 3 * 16) { 8063 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 8064 // separate instructions that make it harder to use a non-constant update. 8065 continue; 8066 } 8067 8068 // Create the new updating load/store node. 8069 EVT Tys[6]; 8070 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 8071 unsigned n; 8072 for (n = 0; n < NumResultVecs; ++n) 8073 Tys[n] = VecTy; 8074 Tys[n++] = MVT::i32; 8075 Tys[n] = MVT::Other; 8076 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 8077 SmallVector<SDValue, 8> Ops; 8078 Ops.push_back(N->getOperand(0)); // incoming chain 8079 Ops.push_back(N->getOperand(AddrOpIdx)); 8080 Ops.push_back(Inc); 8081 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 8082 Ops.push_back(N->getOperand(i)); 8083 } 8084 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 8085 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 8086 Ops.data(), Ops.size(), 8087 MemInt->getMemoryVT(), 8088 MemInt->getMemOperand()); 8089 8090 // Update the uses. 8091 std::vector<SDValue> NewResults; 8092 for (unsigned i = 0; i < NumResultVecs; ++i) { 8093 NewResults.push_back(SDValue(UpdN.getNode(), i)); 8094 } 8095 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 8096 DCI.CombineTo(N, NewResults); 8097 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 8098 8099 break; 8100 } 8101 return SDValue(); 8102} 8103 8104/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 8105/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 8106/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 8107/// return true. 8108static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 8109 SelectionDAG &DAG = DCI.DAG; 8110 EVT VT = N->getValueType(0); 8111 // vldN-dup instructions only support 64-bit vectors for N > 1. 8112 if (!VT.is64BitVector()) 8113 return false; 8114 8115 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 8116 SDNode *VLD = N->getOperand(0).getNode(); 8117 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 8118 return false; 8119 unsigned NumVecs = 0; 8120 unsigned NewOpc = 0; 8121 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 8122 if (IntNo == Intrinsic::arm_neon_vld2lane) { 8123 NumVecs = 2; 8124 NewOpc = ARMISD::VLD2DUP; 8125 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 8126 NumVecs = 3; 8127 NewOpc = ARMISD::VLD3DUP; 8128 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 8129 NumVecs = 4; 8130 NewOpc = ARMISD::VLD4DUP; 8131 } else { 8132 return false; 8133 } 8134 8135 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 8136 // numbers match the load. 8137 unsigned VLDLaneNo = 8138 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 8139 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 8140 UI != UE; ++UI) { 8141 // Ignore uses of the chain result. 8142 if (UI.getUse().getResNo() == NumVecs) 8143 continue; 8144 SDNode *User = *UI; 8145 if (User->getOpcode() != ARMISD::VDUPLANE || 8146 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 8147 return false; 8148 } 8149 8150 // Create the vldN-dup node. 8151 EVT Tys[5]; 8152 unsigned n; 8153 for (n = 0; n < NumVecs; ++n) 8154 Tys[n] = VT; 8155 Tys[n] = MVT::Other; 8156 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 8157 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 8158 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 8159 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 8160 Ops, 2, VLDMemInt->getMemoryVT(), 8161 VLDMemInt->getMemOperand()); 8162 8163 // Update the uses. 8164 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 8165 UI != UE; ++UI) { 8166 unsigned ResNo = UI.getUse().getResNo(); 8167 // Ignore uses of the chain result. 8168 if (ResNo == NumVecs) 8169 continue; 8170 SDNode *User = *UI; 8171 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 8172 } 8173 8174 // Now the vldN-lane intrinsic is dead except for its chain result. 8175 // Update uses of the chain. 8176 std::vector<SDValue> VLDDupResults; 8177 for (unsigned n = 0; n < NumVecs; ++n) 8178 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 8179 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 8180 DCI.CombineTo(VLD, VLDDupResults); 8181 8182 return true; 8183} 8184 8185/// PerformVDUPLANECombine - Target-specific dag combine xforms for 8186/// ARMISD::VDUPLANE. 8187static SDValue PerformVDUPLANECombine(SDNode *N, 8188 TargetLowering::DAGCombinerInfo &DCI) { 8189 SDValue Op = N->getOperand(0); 8190 8191 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 8192 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 8193 if (CombineVLDDUP(N, DCI)) 8194 return SDValue(N, 0); 8195 8196 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 8197 // redundant. Ignore bit_converts for now; element sizes are checked below. 8198 while (Op.getOpcode() == ISD::BITCAST) 8199 Op = Op.getOperand(0); 8200 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 8201 return SDValue(); 8202 8203 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 8204 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 8205 // The canonical VMOV for a zero vector uses a 32-bit element size. 8206 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 8207 unsigned EltBits; 8208 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 8209 EltSize = 8; 8210 EVT VT = N->getValueType(0); 8211 if (EltSize > VT.getVectorElementType().getSizeInBits()) 8212 return SDValue(); 8213 8214 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 8215} 8216 8217// isConstVecPow2 - Return true if each vector element is a power of 2, all 8218// elements are the same constant, C, and Log2(C) ranges from 1 to 32. 8219static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 8220{ 8221 integerPart cN; 8222 integerPart c0 = 0; 8223 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 8224 I != E; I++) { 8225 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 8226 if (!C) 8227 return false; 8228 8229 bool isExact; 8230 APFloat APF = C->getValueAPF(); 8231 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 8232 != APFloat::opOK || !isExact) 8233 return false; 8234 8235 c0 = (I == 0) ? cN : c0; 8236 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 8237 return false; 8238 } 8239 C = c0; 8240 return true; 8241} 8242 8243/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 8244/// can replace combinations of VMUL and VCVT (floating-point to integer) 8245/// when the VMUL has a constant operand that is a power of 2. 8246/// 8247/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 8248/// vmul.f32 d16, d17, d16 8249/// vcvt.s32.f32 d16, d16 8250/// becomes: 8251/// vcvt.s32.f32 d16, d16, #3 8252static SDValue PerformVCVTCombine(SDNode *N, 8253 TargetLowering::DAGCombinerInfo &DCI, 8254 const ARMSubtarget *Subtarget) { 8255 SelectionDAG &DAG = DCI.DAG; 8256 SDValue Op = N->getOperand(0); 8257 8258 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 8259 Op.getOpcode() != ISD::FMUL) 8260 return SDValue(); 8261 8262 uint64_t C; 8263 SDValue N0 = Op->getOperand(0); 8264 SDValue ConstVec = Op->getOperand(1); 8265 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 8266 8267 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 8268 !isConstVecPow2(ConstVec, isSigned, C)) 8269 return SDValue(); 8270 8271 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 8272 Intrinsic::arm_neon_vcvtfp2fxu; 8273 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 8274 N->getValueType(0), 8275 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 8276 DAG.getConstant(Log2_64(C), MVT::i32)); 8277} 8278 8279/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 8280/// can replace combinations of VCVT (integer to floating-point) and VDIV 8281/// when the VDIV has a constant operand that is a power of 2. 8282/// 8283/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 8284/// vcvt.f32.s32 d16, d16 8285/// vdiv.f32 d16, d17, d16 8286/// becomes: 8287/// vcvt.f32.s32 d16, d16, #3 8288static SDValue PerformVDIVCombine(SDNode *N, 8289 TargetLowering::DAGCombinerInfo &DCI, 8290 const ARMSubtarget *Subtarget) { 8291 SelectionDAG &DAG = DCI.DAG; 8292 SDValue Op = N->getOperand(0); 8293 unsigned OpOpcode = Op.getNode()->getOpcode(); 8294 8295 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 8296 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 8297 return SDValue(); 8298 8299 uint64_t C; 8300 SDValue ConstVec = N->getOperand(1); 8301 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 8302 8303 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 8304 !isConstVecPow2(ConstVec, isSigned, C)) 8305 return SDValue(); 8306 8307 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 8308 Intrinsic::arm_neon_vcvtfxu2fp; 8309 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 8310 Op.getValueType(), 8311 DAG.getConstant(IntrinsicOpcode, MVT::i32), 8312 Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); 8313} 8314 8315/// Getvshiftimm - Check if this is a valid build_vector for the immediate 8316/// operand of a vector shift operation, where all the elements of the 8317/// build_vector must have the same constant integer value. 8318static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 8319 // Ignore bit_converts. 8320 while (Op.getOpcode() == ISD::BITCAST) 8321 Op = Op.getOperand(0); 8322 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 8323 APInt SplatBits, SplatUndef; 8324 unsigned SplatBitSize; 8325 bool HasAnyUndefs; 8326 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 8327 HasAnyUndefs, ElementBits) || 8328 SplatBitSize > ElementBits) 8329 return false; 8330 Cnt = SplatBits.getSExtValue(); 8331 return true; 8332} 8333 8334/// isVShiftLImm - Check if this is a valid build_vector for the immediate 8335/// operand of a vector shift left operation. That value must be in the range: 8336/// 0 <= Value < ElementBits for a left shift; or 8337/// 0 <= Value <= ElementBits for a long left shift. 8338static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 8339 assert(VT.isVector() && "vector shift count is not a vector type"); 8340 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 8341 if (! getVShiftImm(Op, ElementBits, Cnt)) 8342 return false; 8343 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 8344} 8345 8346/// isVShiftRImm - Check if this is a valid build_vector for the immediate 8347/// operand of a vector shift right operation. For a shift opcode, the value 8348/// is positive, but for an intrinsic the value count must be negative. The 8349/// absolute value must be in the range: 8350/// 1 <= |Value| <= ElementBits for a right shift; or 8351/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 8352static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 8353 int64_t &Cnt) { 8354 assert(VT.isVector() && "vector shift count is not a vector type"); 8355 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 8356 if (! getVShiftImm(Op, ElementBits, Cnt)) 8357 return false; 8358 if (isIntrinsic) 8359 Cnt = -Cnt; 8360 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 8361} 8362 8363/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 8364static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 8365 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 8366 switch (IntNo) { 8367 default: 8368 // Don't do anything for most intrinsics. 8369 break; 8370 8371 // Vector shifts: check for immediate versions and lower them. 8372 // Note: This is done during DAG combining instead of DAG legalizing because 8373 // the build_vectors for 64-bit vector element shift counts are generally 8374 // not legal, and it is hard to see their values after they get legalized to 8375 // loads from a constant pool. 8376 case Intrinsic::arm_neon_vshifts: 8377 case Intrinsic::arm_neon_vshiftu: 8378 case Intrinsic::arm_neon_vshiftls: 8379 case Intrinsic::arm_neon_vshiftlu: 8380 case Intrinsic::arm_neon_vshiftn: 8381 case Intrinsic::arm_neon_vrshifts: 8382 case Intrinsic::arm_neon_vrshiftu: 8383 case Intrinsic::arm_neon_vrshiftn: 8384 case Intrinsic::arm_neon_vqshifts: 8385 case Intrinsic::arm_neon_vqshiftu: 8386 case Intrinsic::arm_neon_vqshiftsu: 8387 case Intrinsic::arm_neon_vqshiftns: 8388 case Intrinsic::arm_neon_vqshiftnu: 8389 case Intrinsic::arm_neon_vqshiftnsu: 8390 case Intrinsic::arm_neon_vqrshiftns: 8391 case Intrinsic::arm_neon_vqrshiftnu: 8392 case Intrinsic::arm_neon_vqrshiftnsu: { 8393 EVT VT = N->getOperand(1).getValueType(); 8394 int64_t Cnt; 8395 unsigned VShiftOpc = 0; 8396 8397 switch (IntNo) { 8398 case Intrinsic::arm_neon_vshifts: 8399 case Intrinsic::arm_neon_vshiftu: 8400 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 8401 VShiftOpc = ARMISD::VSHL; 8402 break; 8403 } 8404 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 8405 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 8406 ARMISD::VSHRs : ARMISD::VSHRu); 8407 break; 8408 } 8409 return SDValue(); 8410 8411 case Intrinsic::arm_neon_vshiftls: 8412 case Intrinsic::arm_neon_vshiftlu: 8413 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 8414 break; 8415 llvm_unreachable("invalid shift count for vshll intrinsic"); 8416 8417 case Intrinsic::arm_neon_vrshifts: 8418 case Intrinsic::arm_neon_vrshiftu: 8419 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 8420 break; 8421 return SDValue(); 8422 8423 case Intrinsic::arm_neon_vqshifts: 8424 case Intrinsic::arm_neon_vqshiftu: 8425 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 8426 break; 8427 return SDValue(); 8428 8429 case Intrinsic::arm_neon_vqshiftsu: 8430 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 8431 break; 8432 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 8433 8434 case Intrinsic::arm_neon_vshiftn: 8435 case Intrinsic::arm_neon_vrshiftn: 8436 case Intrinsic::arm_neon_vqshiftns: 8437 case Intrinsic::arm_neon_vqshiftnu: 8438 case Intrinsic::arm_neon_vqshiftnsu: 8439 case Intrinsic::arm_neon_vqrshiftns: 8440 case Intrinsic::arm_neon_vqrshiftnu: 8441 case Intrinsic::arm_neon_vqrshiftnsu: 8442 // Narrowing shifts require an immediate right shift. 8443 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 8444 break; 8445 llvm_unreachable("invalid shift count for narrowing vector shift " 8446 "intrinsic"); 8447 8448 default: 8449 llvm_unreachable("unhandled vector shift"); 8450 } 8451 8452 switch (IntNo) { 8453 case Intrinsic::arm_neon_vshifts: 8454 case Intrinsic::arm_neon_vshiftu: 8455 // Opcode already set above. 8456 break; 8457 case Intrinsic::arm_neon_vshiftls: 8458 case Intrinsic::arm_neon_vshiftlu: 8459 if (Cnt == VT.getVectorElementType().getSizeInBits()) 8460 VShiftOpc = ARMISD::VSHLLi; 8461 else 8462 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 8463 ARMISD::VSHLLs : ARMISD::VSHLLu); 8464 break; 8465 case Intrinsic::arm_neon_vshiftn: 8466 VShiftOpc = ARMISD::VSHRN; break; 8467 case Intrinsic::arm_neon_vrshifts: 8468 VShiftOpc = ARMISD::VRSHRs; break; 8469 case Intrinsic::arm_neon_vrshiftu: 8470 VShiftOpc = ARMISD::VRSHRu; break; 8471 case Intrinsic::arm_neon_vrshiftn: 8472 VShiftOpc = ARMISD::VRSHRN; break; 8473 case Intrinsic::arm_neon_vqshifts: 8474 VShiftOpc = ARMISD::VQSHLs; break; 8475 case Intrinsic::arm_neon_vqshiftu: 8476 VShiftOpc = ARMISD::VQSHLu; break; 8477 case Intrinsic::arm_neon_vqshiftsu: 8478 VShiftOpc = ARMISD::VQSHLsu; break; 8479 case Intrinsic::arm_neon_vqshiftns: 8480 VShiftOpc = ARMISD::VQSHRNs; break; 8481 case Intrinsic::arm_neon_vqshiftnu: 8482 VShiftOpc = ARMISD::VQSHRNu; break; 8483 case Intrinsic::arm_neon_vqshiftnsu: 8484 VShiftOpc = ARMISD::VQSHRNsu; break; 8485 case Intrinsic::arm_neon_vqrshiftns: 8486 VShiftOpc = ARMISD::VQRSHRNs; break; 8487 case Intrinsic::arm_neon_vqrshiftnu: 8488 VShiftOpc = ARMISD::VQRSHRNu; break; 8489 case Intrinsic::arm_neon_vqrshiftnsu: 8490 VShiftOpc = ARMISD::VQRSHRNsu; break; 8491 } 8492 8493 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 8494 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 8495 } 8496 8497 case Intrinsic::arm_neon_vshiftins: { 8498 EVT VT = N->getOperand(1).getValueType(); 8499 int64_t Cnt; 8500 unsigned VShiftOpc = 0; 8501 8502 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 8503 VShiftOpc = ARMISD::VSLI; 8504 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 8505 VShiftOpc = ARMISD::VSRI; 8506 else { 8507 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 8508 } 8509 8510 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 8511 N->getOperand(1), N->getOperand(2), 8512 DAG.getConstant(Cnt, MVT::i32)); 8513 } 8514 8515 case Intrinsic::arm_neon_vqrshifts: 8516 case Intrinsic::arm_neon_vqrshiftu: 8517 // No immediate versions of these to check for. 8518 break; 8519 } 8520 8521 return SDValue(); 8522} 8523 8524/// PerformShiftCombine - Checks for immediate versions of vector shifts and 8525/// lowers them. As with the vector shift intrinsics, this is done during DAG 8526/// combining instead of DAG legalizing because the build_vectors for 64-bit 8527/// vector element shift counts are generally not legal, and it is hard to see 8528/// their values after they get legalized to loads from a constant pool. 8529static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 8530 const ARMSubtarget *ST) { 8531 EVT VT = N->getValueType(0); 8532 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 8533 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 8534 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 8535 SDValue N1 = N->getOperand(1); 8536 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 8537 SDValue N0 = N->getOperand(0); 8538 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 8539 DAG.MaskedValueIsZero(N0.getOperand(0), 8540 APInt::getHighBitsSet(32, 16))) 8541 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); 8542 } 8543 } 8544 8545 // Nothing to be done for scalar shifts. 8546 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8547 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 8548 return SDValue(); 8549 8550 assert(ST->hasNEON() && "unexpected vector shift"); 8551 int64_t Cnt; 8552 8553 switch (N->getOpcode()) { 8554 default: llvm_unreachable("unexpected shift opcode"); 8555 8556 case ISD::SHL: 8557 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 8558 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 8559 DAG.getConstant(Cnt, MVT::i32)); 8560 break; 8561 8562 case ISD::SRA: 8563 case ISD::SRL: 8564 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 8565 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 8566 ARMISD::VSHRs : ARMISD::VSHRu); 8567 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 8568 DAG.getConstant(Cnt, MVT::i32)); 8569 } 8570 } 8571 return SDValue(); 8572} 8573 8574/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 8575/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 8576static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 8577 const ARMSubtarget *ST) { 8578 SDValue N0 = N->getOperand(0); 8579 8580 // Check for sign- and zero-extensions of vector extract operations of 8- 8581 // and 16-bit vector elements. NEON supports these directly. They are 8582 // handled during DAG combining because type legalization will promote them 8583 // to 32-bit types and it is messy to recognize the operations after that. 8584 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 8585 SDValue Vec = N0.getOperand(0); 8586 SDValue Lane = N0.getOperand(1); 8587 EVT VT = N->getValueType(0); 8588 EVT EltVT = N0.getValueType(); 8589 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8590 8591 if (VT == MVT::i32 && 8592 (EltVT == MVT::i8 || EltVT == MVT::i16) && 8593 TLI.isTypeLegal(Vec.getValueType()) && 8594 isa<ConstantSDNode>(Lane)) { 8595 8596 unsigned Opc = 0; 8597 switch (N->getOpcode()) { 8598 default: llvm_unreachable("unexpected opcode"); 8599 case ISD::SIGN_EXTEND: 8600 Opc = ARMISD::VGETLANEs; 8601 break; 8602 case ISD::ZERO_EXTEND: 8603 case ISD::ANY_EXTEND: 8604 Opc = ARMISD::VGETLANEu; 8605 break; 8606 } 8607 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 8608 } 8609 } 8610 8611 return SDValue(); 8612} 8613 8614/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 8615/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 8616static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 8617 const ARMSubtarget *ST) { 8618 // If the target supports NEON, try to use vmax/vmin instructions for f32 8619 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 8620 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 8621 // a NaN; only do the transformation when it matches that behavior. 8622 8623 // For now only do this when using NEON for FP operations; if using VFP, it 8624 // is not obvious that the benefit outweighs the cost of switching to the 8625 // NEON pipeline. 8626 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 8627 N->getValueType(0) != MVT::f32) 8628 return SDValue(); 8629 8630 SDValue CondLHS = N->getOperand(0); 8631 SDValue CondRHS = N->getOperand(1); 8632 SDValue LHS = N->getOperand(2); 8633 SDValue RHS = N->getOperand(3); 8634 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 8635 8636 unsigned Opcode = 0; 8637 bool IsReversed; 8638 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 8639 IsReversed = false; // x CC y ? x : y 8640 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 8641 IsReversed = true ; // x CC y ? y : x 8642 } else { 8643 return SDValue(); 8644 } 8645 8646 bool IsUnordered; 8647 switch (CC) { 8648 default: break; 8649 case ISD::SETOLT: 8650 case ISD::SETOLE: 8651 case ISD::SETLT: 8652 case ISD::SETLE: 8653 case ISD::SETULT: 8654 case ISD::SETULE: 8655 // If LHS is NaN, an ordered comparison will be false and the result will 8656 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 8657 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 8658 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 8659 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 8660 break; 8661 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 8662 // will return -0, so vmin can only be used for unsafe math or if one of 8663 // the operands is known to be nonzero. 8664 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 8665 !DAG.getTarget().Options.UnsafeFPMath && 8666 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 8667 break; 8668 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 8669 break; 8670 8671 case ISD::SETOGT: 8672 case ISD::SETOGE: 8673 case ISD::SETGT: 8674 case ISD::SETGE: 8675 case ISD::SETUGT: 8676 case ISD::SETUGE: 8677 // If LHS is NaN, an ordered comparison will be false and the result will 8678 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 8679 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 8680 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 8681 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 8682 break; 8683 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 8684 // will return +0, so vmax can only be used for unsafe math or if one of 8685 // the operands is known to be nonzero. 8686 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 8687 !DAG.getTarget().Options.UnsafeFPMath && 8688 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 8689 break; 8690 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 8691 break; 8692 } 8693 8694 if (!Opcode) 8695 return SDValue(); 8696 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 8697} 8698 8699/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 8700SDValue 8701ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 8702 SDValue Cmp = N->getOperand(4); 8703 if (Cmp.getOpcode() != ARMISD::CMPZ) 8704 // Only looking at EQ and NE cases. 8705 return SDValue(); 8706 8707 EVT VT = N->getValueType(0); 8708 DebugLoc dl = N->getDebugLoc(); 8709 SDValue LHS = Cmp.getOperand(0); 8710 SDValue RHS = Cmp.getOperand(1); 8711 SDValue FalseVal = N->getOperand(0); 8712 SDValue TrueVal = N->getOperand(1); 8713 SDValue ARMcc = N->getOperand(2); 8714 ARMCC::CondCodes CC = 8715 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 8716 8717 // Simplify 8718 // mov r1, r0 8719 // cmp r1, x 8720 // mov r0, y 8721 // moveq r0, x 8722 // to 8723 // cmp r0, x 8724 // movne r0, y 8725 // 8726 // mov r1, r0 8727 // cmp r1, x 8728 // mov r0, x 8729 // movne r0, y 8730 // to 8731 // cmp r0, x 8732 // movne r0, y 8733 /// FIXME: Turn this into a target neutral optimization? 8734 SDValue Res; 8735 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 8736 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 8737 N->getOperand(3), Cmp); 8738 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 8739 SDValue ARMcc; 8740 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 8741 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 8742 N->getOperand(3), NewCmp); 8743 } 8744 8745 if (Res.getNode()) { 8746 APInt KnownZero, KnownOne; 8747 DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); 8748 // Capture demanded bits information that would be otherwise lost. 8749 if (KnownZero == 0xfffffffe) 8750 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8751 DAG.getValueType(MVT::i1)); 8752 else if (KnownZero == 0xffffff00) 8753 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8754 DAG.getValueType(MVT::i8)); 8755 else if (KnownZero == 0xffff0000) 8756 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8757 DAG.getValueType(MVT::i16)); 8758 } 8759 8760 return Res; 8761} 8762 8763SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 8764 DAGCombinerInfo &DCI) const { 8765 switch (N->getOpcode()) { 8766 default: break; 8767 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 8768 case ISD::SUB: return PerformSUBCombine(N, DCI); 8769 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 8770 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 8771 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 8772 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 8773 case ARMISD::BFI: return PerformBFICombine(N, DCI); 8774 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 8775 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 8776 case ISD::STORE: return PerformSTORECombine(N, DCI); 8777 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 8778 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 8779 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 8780 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 8781 case ISD::FP_TO_SINT: 8782 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 8783 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 8784 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 8785 case ISD::SHL: 8786 case ISD::SRA: 8787 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 8788 case ISD::SIGN_EXTEND: 8789 case ISD::ZERO_EXTEND: 8790 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 8791 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 8792 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 8793 case ARMISD::VLD2DUP: 8794 case ARMISD::VLD3DUP: 8795 case ARMISD::VLD4DUP: 8796 return CombineBaseUpdate(N, DCI); 8797 case ISD::INTRINSIC_VOID: 8798 case ISD::INTRINSIC_W_CHAIN: 8799 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8800 case Intrinsic::arm_neon_vld1: 8801 case Intrinsic::arm_neon_vld2: 8802 case Intrinsic::arm_neon_vld3: 8803 case Intrinsic::arm_neon_vld4: 8804 case Intrinsic::arm_neon_vld2lane: 8805 case Intrinsic::arm_neon_vld3lane: 8806 case Intrinsic::arm_neon_vld4lane: 8807 case Intrinsic::arm_neon_vst1: 8808 case Intrinsic::arm_neon_vst2: 8809 case Intrinsic::arm_neon_vst3: 8810 case Intrinsic::arm_neon_vst4: 8811 case Intrinsic::arm_neon_vst2lane: 8812 case Intrinsic::arm_neon_vst3lane: 8813 case Intrinsic::arm_neon_vst4lane: 8814 return CombineBaseUpdate(N, DCI); 8815 default: break; 8816 } 8817 break; 8818 } 8819 return SDValue(); 8820} 8821 8822bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 8823 EVT VT) const { 8824 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 8825} 8826 8827bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 8828 if (!Subtarget->allowsUnalignedMem()) 8829 return false; 8830 8831 switch (VT.getSimpleVT().SimpleTy) { 8832 default: 8833 return false; 8834 case MVT::i8: 8835 case MVT::i16: 8836 case MVT::i32: 8837 return true; 8838 case MVT::f64: 8839 return Subtarget->hasNEON(); 8840 // FIXME: VLD1 etc with standard alignment is legal. 8841 } 8842} 8843 8844static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 8845 unsigned AlignCheck) { 8846 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 8847 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 8848} 8849 8850EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 8851 unsigned DstAlign, unsigned SrcAlign, 8852 bool IsZeroVal, 8853 bool MemcpyStrSrc, 8854 MachineFunction &MF) const { 8855 const Function *F = MF.getFunction(); 8856 8857 // See if we can use NEON instructions for this... 8858 if (IsZeroVal && 8859 !F->hasFnAttr(Attribute::NoImplicitFloat) && 8860 Subtarget->hasNEON()) { 8861 if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { 8862 return MVT::v4i32; 8863 } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { 8864 return MVT::v2i32; 8865 } 8866 } 8867 8868 // Lowering to i32/i16 if the size permits. 8869 if (Size >= 4) { 8870 return MVT::i32; 8871 } else if (Size >= 2) { 8872 return MVT::i16; 8873 } 8874 8875 // Let the target-independent logic figure it out. 8876 return MVT::Other; 8877} 8878 8879static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 8880 if (V < 0) 8881 return false; 8882 8883 unsigned Scale = 1; 8884 switch (VT.getSimpleVT().SimpleTy) { 8885 default: return false; 8886 case MVT::i1: 8887 case MVT::i8: 8888 // Scale == 1; 8889 break; 8890 case MVT::i16: 8891 // Scale == 2; 8892 Scale = 2; 8893 break; 8894 case MVT::i32: 8895 // Scale == 4; 8896 Scale = 4; 8897 break; 8898 } 8899 8900 if ((V & (Scale - 1)) != 0) 8901 return false; 8902 V /= Scale; 8903 return V == (V & ((1LL << 5) - 1)); 8904} 8905 8906static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 8907 const ARMSubtarget *Subtarget) { 8908 bool isNeg = false; 8909 if (V < 0) { 8910 isNeg = true; 8911 V = - V; 8912 } 8913 8914 switch (VT.getSimpleVT().SimpleTy) { 8915 default: return false; 8916 case MVT::i1: 8917 case MVT::i8: 8918 case MVT::i16: 8919 case MVT::i32: 8920 // + imm12 or - imm8 8921 if (isNeg) 8922 return V == (V & ((1LL << 8) - 1)); 8923 return V == (V & ((1LL << 12) - 1)); 8924 case MVT::f32: 8925 case MVT::f64: 8926 // Same as ARM mode. FIXME: NEON? 8927 if (!Subtarget->hasVFP2()) 8928 return false; 8929 if ((V & 3) != 0) 8930 return false; 8931 V >>= 2; 8932 return V == (V & ((1LL << 8) - 1)); 8933 } 8934} 8935 8936/// isLegalAddressImmediate - Return true if the integer value can be used 8937/// as the offset of the target addressing mode for load / store of the 8938/// given type. 8939static bool isLegalAddressImmediate(int64_t V, EVT VT, 8940 const ARMSubtarget *Subtarget) { 8941 if (V == 0) 8942 return true; 8943 8944 if (!VT.isSimple()) 8945 return false; 8946 8947 if (Subtarget->isThumb1Only()) 8948 return isLegalT1AddressImmediate(V, VT); 8949 else if (Subtarget->isThumb2()) 8950 return isLegalT2AddressImmediate(V, VT, Subtarget); 8951 8952 // ARM mode. 8953 if (V < 0) 8954 V = - V; 8955 switch (VT.getSimpleVT().SimpleTy) { 8956 default: return false; 8957 case MVT::i1: 8958 case MVT::i8: 8959 case MVT::i32: 8960 // +- imm12 8961 return V == (V & ((1LL << 12) - 1)); 8962 case MVT::i16: 8963 // +- imm8 8964 return V == (V & ((1LL << 8) - 1)); 8965 case MVT::f32: 8966 case MVT::f64: 8967 if (!Subtarget->hasVFP2()) // FIXME: NEON? 8968 return false; 8969 if ((V & 3) != 0) 8970 return false; 8971 V >>= 2; 8972 return V == (V & ((1LL << 8) - 1)); 8973 } 8974} 8975 8976bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 8977 EVT VT) const { 8978 int Scale = AM.Scale; 8979 if (Scale < 0) 8980 return false; 8981 8982 switch (VT.getSimpleVT().SimpleTy) { 8983 default: return false; 8984 case MVT::i1: 8985 case MVT::i8: 8986 case MVT::i16: 8987 case MVT::i32: 8988 if (Scale == 1) 8989 return true; 8990 // r + r << imm 8991 Scale = Scale & ~1; 8992 return Scale == 2 || Scale == 4 || Scale == 8; 8993 case MVT::i64: 8994 // r + r 8995 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 8996 return true; 8997 return false; 8998 case MVT::isVoid: 8999 // Note, we allow "void" uses (basically, uses that aren't loads or 9000 // stores), because arm allows folding a scale into many arithmetic 9001 // operations. This should be made more precise and revisited later. 9002 9003 // Allow r << imm, but the imm has to be a multiple of two. 9004 if (Scale & 1) return false; 9005 return isPowerOf2_32(Scale); 9006 } 9007} 9008 9009/// isLegalAddressingMode - Return true if the addressing mode represented 9010/// by AM is legal for this target, for a load/store of the specified type. 9011bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9012 Type *Ty) const { 9013 EVT VT = getValueType(Ty, true); 9014 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 9015 return false; 9016 9017 // Can never fold addr of global into load/store. 9018 if (AM.BaseGV) 9019 return false; 9020 9021 switch (AM.Scale) { 9022 case 0: // no scale reg, must be "r+i" or "r", or "i". 9023 break; 9024 case 1: 9025 if (Subtarget->isThumb1Only()) 9026 return false; 9027 // FALL THROUGH. 9028 default: 9029 // ARM doesn't support any R+R*scale+imm addr modes. 9030 if (AM.BaseOffs) 9031 return false; 9032 9033 if (!VT.isSimple()) 9034 return false; 9035 9036 if (Subtarget->isThumb2()) 9037 return isLegalT2ScaledAddressingMode(AM, VT); 9038 9039 int Scale = AM.Scale; 9040 switch (VT.getSimpleVT().SimpleTy) { 9041 default: return false; 9042 case MVT::i1: 9043 case MVT::i8: 9044 case MVT::i32: 9045 if (Scale < 0) Scale = -Scale; 9046 if (Scale == 1) 9047 return true; 9048 // r + r << imm 9049 return isPowerOf2_32(Scale & ~1); 9050 case MVT::i16: 9051 case MVT::i64: 9052 // r + r 9053 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 9054 return true; 9055 return false; 9056 9057 case MVT::isVoid: 9058 // Note, we allow "void" uses (basically, uses that aren't loads or 9059 // stores), because arm allows folding a scale into many arithmetic 9060 // operations. This should be made more precise and revisited later. 9061 9062 // Allow r << imm, but the imm has to be a multiple of two. 9063 if (Scale & 1) return false; 9064 return isPowerOf2_32(Scale); 9065 } 9066 } 9067 return true; 9068} 9069 9070/// isLegalICmpImmediate - Return true if the specified immediate is legal 9071/// icmp immediate, that is the target has icmp instructions which can compare 9072/// a register against the immediate without having to materialize the 9073/// immediate into a register. 9074bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9075 // Thumb2 and ARM modes can use cmn for negative immediates. 9076 if (!Subtarget->isThumb()) 9077 return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; 9078 if (Subtarget->isThumb2()) 9079 return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; 9080 // Thumb1 doesn't have cmn, and only 8-bit immediates. 9081 return Imm >= 0 && Imm <= 255; 9082} 9083 9084/// isLegalAddImmediate - Return true if the specified immediate is a legal add 9085/// *or sub* immediate, that is the target has add or sub instructions which can 9086/// add a register with the immediate without having to materialize the 9087/// immediate into a register. 9088bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 9089 // Same encoding for add/sub, just flip the sign. 9090 int64_t AbsImm = llvm::abs64(Imm); 9091 if (!Subtarget->isThumb()) 9092 return ARM_AM::getSOImmVal(AbsImm) != -1; 9093 if (Subtarget->isThumb2()) 9094 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 9095 // Thumb1 only has 8-bit unsigned immediate. 9096 return AbsImm >= 0 && AbsImm <= 255; 9097} 9098 9099static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 9100 bool isSEXTLoad, SDValue &Base, 9101 SDValue &Offset, bool &isInc, 9102 SelectionDAG &DAG) { 9103 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 9104 return false; 9105 9106 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 9107 // AddressingMode 3 9108 Base = Ptr->getOperand(0); 9109 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 9110 int RHSC = (int)RHS->getZExtValue(); 9111 if (RHSC < 0 && RHSC > -256) { 9112 assert(Ptr->getOpcode() == ISD::ADD); 9113 isInc = false; 9114 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 9115 return true; 9116 } 9117 } 9118 isInc = (Ptr->getOpcode() == ISD::ADD); 9119 Offset = Ptr->getOperand(1); 9120 return true; 9121 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 9122 // AddressingMode 2 9123 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 9124 int RHSC = (int)RHS->getZExtValue(); 9125 if (RHSC < 0 && RHSC > -0x1000) { 9126 assert(Ptr->getOpcode() == ISD::ADD); 9127 isInc = false; 9128 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 9129 Base = Ptr->getOperand(0); 9130 return true; 9131 } 9132 } 9133 9134 if (Ptr->getOpcode() == ISD::ADD) { 9135 isInc = true; 9136 ARM_AM::ShiftOpc ShOpcVal= 9137 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 9138 if (ShOpcVal != ARM_AM::no_shift) { 9139 Base = Ptr->getOperand(1); 9140 Offset = Ptr->getOperand(0); 9141 } else { 9142 Base = Ptr->getOperand(0); 9143 Offset = Ptr->getOperand(1); 9144 } 9145 return true; 9146 } 9147 9148 isInc = (Ptr->getOpcode() == ISD::ADD); 9149 Base = Ptr->getOperand(0); 9150 Offset = Ptr->getOperand(1); 9151 return true; 9152 } 9153 9154 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 9155 return false; 9156} 9157 9158static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 9159 bool isSEXTLoad, SDValue &Base, 9160 SDValue &Offset, bool &isInc, 9161 SelectionDAG &DAG) { 9162 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 9163 return false; 9164 9165 Base = Ptr->getOperand(0); 9166 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 9167 int RHSC = (int)RHS->getZExtValue(); 9168 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 9169 assert(Ptr->getOpcode() == ISD::ADD); 9170 isInc = false; 9171 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 9172 return true; 9173 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 9174 isInc = Ptr->getOpcode() == ISD::ADD; 9175 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 9176 return true; 9177 } 9178 } 9179 9180 return false; 9181} 9182 9183/// getPreIndexedAddressParts - returns true by value, base pointer and 9184/// offset pointer and addressing mode by reference if the node's address 9185/// can be legally represented as pre-indexed load / store address. 9186bool 9187ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 9188 SDValue &Offset, 9189 ISD::MemIndexedMode &AM, 9190 SelectionDAG &DAG) const { 9191 if (Subtarget->isThumb1Only()) 9192 return false; 9193 9194 EVT VT; 9195 SDValue Ptr; 9196 bool isSEXTLoad = false; 9197 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9198 Ptr = LD->getBasePtr(); 9199 VT = LD->getMemoryVT(); 9200 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 9201 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9202 Ptr = ST->getBasePtr(); 9203 VT = ST->getMemoryVT(); 9204 } else 9205 return false; 9206 9207 bool isInc; 9208 bool isLegal = false; 9209 if (Subtarget->isThumb2()) 9210 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 9211 Offset, isInc, DAG); 9212 else 9213 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 9214 Offset, isInc, DAG); 9215 if (!isLegal) 9216 return false; 9217 9218 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 9219 return true; 9220} 9221 9222/// getPostIndexedAddressParts - returns true by value, base pointer and 9223/// offset pointer and addressing mode by reference if this node can be 9224/// combined with a load / store to form a post-indexed load / store. 9225bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 9226 SDValue &Base, 9227 SDValue &Offset, 9228 ISD::MemIndexedMode &AM, 9229 SelectionDAG &DAG) const { 9230 if (Subtarget->isThumb1Only()) 9231 return false; 9232 9233 EVT VT; 9234 SDValue Ptr; 9235 bool isSEXTLoad = false; 9236 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9237 VT = LD->getMemoryVT(); 9238 Ptr = LD->getBasePtr(); 9239 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 9240 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9241 VT = ST->getMemoryVT(); 9242 Ptr = ST->getBasePtr(); 9243 } else 9244 return false; 9245 9246 bool isInc; 9247 bool isLegal = false; 9248 if (Subtarget->isThumb2()) 9249 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 9250 isInc, DAG); 9251 else 9252 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 9253 isInc, DAG); 9254 if (!isLegal) 9255 return false; 9256 9257 if (Ptr != Base) { 9258 // Swap base ptr and offset to catch more post-index load / store when 9259 // it's legal. In Thumb2 mode, offset must be an immediate. 9260 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 9261 !Subtarget->isThumb2()) 9262 std::swap(Base, Offset); 9263 9264 // Post-indexed load / store update the base pointer. 9265 if (Ptr != Base) 9266 return false; 9267 } 9268 9269 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 9270 return true; 9271} 9272 9273void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 9274 APInt &KnownZero, 9275 APInt &KnownOne, 9276 const SelectionDAG &DAG, 9277 unsigned Depth) const { 9278 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); 9279 switch (Op.getOpcode()) { 9280 default: break; 9281 case ARMISD::CMOV: { 9282 // Bits are known zero/one if known on the LHS and RHS. 9283 DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 9284 if (KnownZero == 0 && KnownOne == 0) return; 9285 9286 APInt KnownZeroRHS, KnownOneRHS; 9287 DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 9288 KnownZero &= KnownZeroRHS; 9289 KnownOne &= KnownOneRHS; 9290 return; 9291 } 9292 } 9293} 9294 9295//===----------------------------------------------------------------------===// 9296// ARM Inline Assembly Support 9297//===----------------------------------------------------------------------===// 9298 9299bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 9300 // Looking for "rev" which is V6+. 9301 if (!Subtarget->hasV6Ops()) 9302 return false; 9303 9304 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 9305 std::string AsmStr = IA->getAsmString(); 9306 SmallVector<StringRef, 4> AsmPieces; 9307 SplitString(AsmStr, AsmPieces, ";\n"); 9308 9309 switch (AsmPieces.size()) { 9310 default: return false; 9311 case 1: 9312 AsmStr = AsmPieces[0]; 9313 AsmPieces.clear(); 9314 SplitString(AsmStr, AsmPieces, " \t,"); 9315 9316 // rev $0, $1 9317 if (AsmPieces.size() == 3 && 9318 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 9319 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 9320 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 9321 if (Ty && Ty->getBitWidth() == 32) 9322 return IntrinsicLowering::LowerToByteSwap(CI); 9323 } 9324 break; 9325 } 9326 9327 return false; 9328} 9329 9330/// getConstraintType - Given a constraint letter, return the type of 9331/// constraint it is for this target. 9332ARMTargetLowering::ConstraintType 9333ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 9334 if (Constraint.size() == 1) { 9335 switch (Constraint[0]) { 9336 default: break; 9337 case 'l': return C_RegisterClass; 9338 case 'w': return C_RegisterClass; 9339 case 'h': return C_RegisterClass; 9340 case 'x': return C_RegisterClass; 9341 case 't': return C_RegisterClass; 9342 case 'j': return C_Other; // Constant for movw. 9343 // An address with a single base register. Due to the way we 9344 // currently handle addresses it is the same as an 'r' memory constraint. 9345 case 'Q': return C_Memory; 9346 } 9347 } else if (Constraint.size() == 2) { 9348 switch (Constraint[0]) { 9349 default: break; 9350 // All 'U+' constraints are addresses. 9351 case 'U': return C_Memory; 9352 } 9353 } 9354 return TargetLowering::getConstraintType(Constraint); 9355} 9356 9357/// Examine constraint type and operand type and determine a weight value. 9358/// This object must already have been set up with the operand type 9359/// and the current alternative constraint selected. 9360TargetLowering::ConstraintWeight 9361ARMTargetLowering::getSingleConstraintMatchWeight( 9362 AsmOperandInfo &info, const char *constraint) const { 9363 ConstraintWeight weight = CW_Invalid; 9364 Value *CallOperandVal = info.CallOperandVal; 9365 // If we don't have a value, we can't do a match, 9366 // but allow it at the lowest weight. 9367 if (CallOperandVal == NULL) 9368 return CW_Default; 9369 Type *type = CallOperandVal->getType(); 9370 // Look at the constraint type. 9371 switch (*constraint) { 9372 default: 9373 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 9374 break; 9375 case 'l': 9376 if (type->isIntegerTy()) { 9377 if (Subtarget->isThumb()) 9378 weight = CW_SpecificReg; 9379 else 9380 weight = CW_Register; 9381 } 9382 break; 9383 case 'w': 9384 if (type->isFloatingPointTy()) 9385 weight = CW_Register; 9386 break; 9387 } 9388 return weight; 9389} 9390 9391typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 9392RCPair 9393ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 9394 EVT VT) const { 9395 if (Constraint.size() == 1) { 9396 // GCC ARM Constraint Letters 9397 switch (Constraint[0]) { 9398 case 'l': // Low regs or general regs. 9399 if (Subtarget->isThumb()) 9400 return RCPair(0U, &ARM::tGPRRegClass); 9401 return RCPair(0U, &ARM::GPRRegClass); 9402 case 'h': // High regs or no regs. 9403 if (Subtarget->isThumb()) 9404 return RCPair(0U, &ARM::hGPRRegClass); 9405 break; 9406 case 'r': 9407 return RCPair(0U, &ARM::GPRRegClass); 9408 case 'w': 9409 if (VT == MVT::f32) 9410 return RCPair(0U, &ARM::SPRRegClass); 9411 if (VT.getSizeInBits() == 64) 9412 return RCPair(0U, &ARM::DPRRegClass); 9413 if (VT.getSizeInBits() == 128) 9414 return RCPair(0U, &ARM::QPRRegClass); 9415 break; 9416 case 'x': 9417 if (VT == MVT::f32) 9418 return RCPair(0U, &ARM::SPR_8RegClass); 9419 if (VT.getSizeInBits() == 64) 9420 return RCPair(0U, &ARM::DPR_8RegClass); 9421 if (VT.getSizeInBits() == 128) 9422 return RCPair(0U, &ARM::QPR_8RegClass); 9423 break; 9424 case 't': 9425 if (VT == MVT::f32) 9426 return RCPair(0U, &ARM::SPRRegClass); 9427 break; 9428 } 9429 } 9430 if (StringRef("{cc}").equals_lower(Constraint)) 9431 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 9432 9433 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 9434} 9435 9436/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 9437/// vector. If it is invalid, don't add anything to Ops. 9438void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 9439 std::string &Constraint, 9440 std::vector<SDValue>&Ops, 9441 SelectionDAG &DAG) const { 9442 SDValue Result(0, 0); 9443 9444 // Currently only support length 1 constraints. 9445 if (Constraint.length() != 1) return; 9446 9447 char ConstraintLetter = Constraint[0]; 9448 switch (ConstraintLetter) { 9449 default: break; 9450 case 'j': 9451 case 'I': case 'J': case 'K': case 'L': 9452 case 'M': case 'N': case 'O': 9453 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 9454 if (!C) 9455 return; 9456 9457 int64_t CVal64 = C->getSExtValue(); 9458 int CVal = (int) CVal64; 9459 // None of these constraints allow values larger than 32 bits. Check 9460 // that the value fits in an int. 9461 if (CVal != CVal64) 9462 return; 9463 9464 switch (ConstraintLetter) { 9465 case 'j': 9466 // Constant suitable for movw, must be between 0 and 9467 // 65535. 9468 if (Subtarget->hasV6T2Ops()) 9469 if (CVal >= 0 && CVal <= 65535) 9470 break; 9471 return; 9472 case 'I': 9473 if (Subtarget->isThumb1Only()) { 9474 // This must be a constant between 0 and 255, for ADD 9475 // immediates. 9476 if (CVal >= 0 && CVal <= 255) 9477 break; 9478 } else if (Subtarget->isThumb2()) { 9479 // A constant that can be used as an immediate value in a 9480 // data-processing instruction. 9481 if (ARM_AM::getT2SOImmVal(CVal) != -1) 9482 break; 9483 } else { 9484 // A constant that can be used as an immediate value in a 9485 // data-processing instruction. 9486 if (ARM_AM::getSOImmVal(CVal) != -1) 9487 break; 9488 } 9489 return; 9490 9491 case 'J': 9492 if (Subtarget->isThumb()) { // FIXME thumb2 9493 // This must be a constant between -255 and -1, for negated ADD 9494 // immediates. This can be used in GCC with an "n" modifier that 9495 // prints the negated value, for use with SUB instructions. It is 9496 // not useful otherwise but is implemented for compatibility. 9497 if (CVal >= -255 && CVal <= -1) 9498 break; 9499 } else { 9500 // This must be a constant between -4095 and 4095. It is not clear 9501 // what this constraint is intended for. Implemented for 9502 // compatibility with GCC. 9503 if (CVal >= -4095 && CVal <= 4095) 9504 break; 9505 } 9506 return; 9507 9508 case 'K': 9509 if (Subtarget->isThumb1Only()) { 9510 // A 32-bit value where only one byte has a nonzero value. Exclude 9511 // zero to match GCC. This constraint is used by GCC internally for 9512 // constants that can be loaded with a move/shift combination. 9513 // It is not useful otherwise but is implemented for compatibility. 9514 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 9515 break; 9516 } else if (Subtarget->isThumb2()) { 9517 // A constant whose bitwise inverse can be used as an immediate 9518 // value in a data-processing instruction. This can be used in GCC 9519 // with a "B" modifier that prints the inverted value, for use with 9520 // BIC and MVN instructions. It is not useful otherwise but is 9521 // implemented for compatibility. 9522 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 9523 break; 9524 } else { 9525 // A constant whose bitwise inverse can be used as an immediate 9526 // value in a data-processing instruction. This can be used in GCC 9527 // with a "B" modifier that prints the inverted value, for use with 9528 // BIC and MVN instructions. It is not useful otherwise but is 9529 // implemented for compatibility. 9530 if (ARM_AM::getSOImmVal(~CVal) != -1) 9531 break; 9532 } 9533 return; 9534 9535 case 'L': 9536 if (Subtarget->isThumb1Only()) { 9537 // This must be a constant between -7 and 7, 9538 // for 3-operand ADD/SUB immediate instructions. 9539 if (CVal >= -7 && CVal < 7) 9540 break; 9541 } else if (Subtarget->isThumb2()) { 9542 // A constant whose negation can be used as an immediate value in a 9543 // data-processing instruction. This can be used in GCC with an "n" 9544 // modifier that prints the negated value, for use with SUB 9545 // instructions. It is not useful otherwise but is implemented for 9546 // compatibility. 9547 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 9548 break; 9549 } else { 9550 // A constant whose negation can be used as an immediate value in a 9551 // data-processing instruction. This can be used in GCC with an "n" 9552 // modifier that prints the negated value, for use with SUB 9553 // instructions. It is not useful otherwise but is implemented for 9554 // compatibility. 9555 if (ARM_AM::getSOImmVal(-CVal) != -1) 9556 break; 9557 } 9558 return; 9559 9560 case 'M': 9561 if (Subtarget->isThumb()) { // FIXME thumb2 9562 // This must be a multiple of 4 between 0 and 1020, for 9563 // ADD sp + immediate. 9564 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 9565 break; 9566 } else { 9567 // A power of two or a constant between 0 and 32. This is used in 9568 // GCC for the shift amount on shifted register operands, but it is 9569 // useful in general for any shift amounts. 9570 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 9571 break; 9572 } 9573 return; 9574 9575 case 'N': 9576 if (Subtarget->isThumb()) { // FIXME thumb2 9577 // This must be a constant between 0 and 31, for shift amounts. 9578 if (CVal >= 0 && CVal <= 31) 9579 break; 9580 } 9581 return; 9582 9583 case 'O': 9584 if (Subtarget->isThumb()) { // FIXME thumb2 9585 // This must be a multiple of 4 between -508 and 508, for 9586 // ADD/SUB sp = sp + immediate. 9587 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 9588 break; 9589 } 9590 return; 9591 } 9592 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 9593 break; 9594 } 9595 9596 if (Result.getNode()) { 9597 Ops.push_back(Result); 9598 return; 9599 } 9600 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9601} 9602 9603bool 9604ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 9605 // The ARM target isn't yet aware of offsets. 9606 return false; 9607} 9608 9609bool ARM::isBitFieldInvertedMask(unsigned v) { 9610 if (v == 0xffffffff) 9611 return 0; 9612 // there can be 1's on either or both "outsides", all the "inside" 9613 // bits must be 0's 9614 unsigned int lsb = 0, msb = 31; 9615 while (v & (1 << msb)) --msb; 9616 while (v & (1 << lsb)) ++lsb; 9617 for (unsigned int i = lsb; i <= msb; ++i) { 9618 if (v & (1 << i)) 9619 return 0; 9620 } 9621 return 1; 9622} 9623 9624/// isFPImmLegal - Returns true if the target can instruction select the 9625/// specified FP immediate natively. If false, the legalizer will 9626/// materialize the FP immediate as a load from a constant pool. 9627bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 9628 if (!Subtarget->hasVFP3()) 9629 return false; 9630 if (VT == MVT::f32) 9631 return ARM_AM::getFP32Imm(Imm) != -1; 9632 if (VT == MVT::f64) 9633 return ARM_AM::getFP64Imm(Imm) != -1; 9634 return false; 9635} 9636 9637/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 9638/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 9639/// specified in the intrinsic calls. 9640bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 9641 const CallInst &I, 9642 unsigned Intrinsic) const { 9643 switch (Intrinsic) { 9644 case Intrinsic::arm_neon_vld1: 9645 case Intrinsic::arm_neon_vld2: 9646 case Intrinsic::arm_neon_vld3: 9647 case Intrinsic::arm_neon_vld4: 9648 case Intrinsic::arm_neon_vld2lane: 9649 case Intrinsic::arm_neon_vld3lane: 9650 case Intrinsic::arm_neon_vld4lane: { 9651 Info.opc = ISD::INTRINSIC_W_CHAIN; 9652 // Conservatively set memVT to the entire set of vectors loaded. 9653 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 9654 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 9655 Info.ptrVal = I.getArgOperand(0); 9656 Info.offset = 0; 9657 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 9658 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 9659 Info.vol = false; // volatile loads with NEON intrinsics not supported 9660 Info.readMem = true; 9661 Info.writeMem = false; 9662 return true; 9663 } 9664 case Intrinsic::arm_neon_vst1: 9665 case Intrinsic::arm_neon_vst2: 9666 case Intrinsic::arm_neon_vst3: 9667 case Intrinsic::arm_neon_vst4: 9668 case Intrinsic::arm_neon_vst2lane: 9669 case Intrinsic::arm_neon_vst3lane: 9670 case Intrinsic::arm_neon_vst4lane: { 9671 Info.opc = ISD::INTRINSIC_VOID; 9672 // Conservatively set memVT to the entire set of vectors stored. 9673 unsigned NumElts = 0; 9674 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 9675 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 9676 if (!ArgTy->isVectorTy()) 9677 break; 9678 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 9679 } 9680 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 9681 Info.ptrVal = I.getArgOperand(0); 9682 Info.offset = 0; 9683 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 9684 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 9685 Info.vol = false; // volatile stores with NEON intrinsics not supported 9686 Info.readMem = false; 9687 Info.writeMem = true; 9688 return true; 9689 } 9690 case Intrinsic::arm_strexd: { 9691 Info.opc = ISD::INTRINSIC_W_CHAIN; 9692 Info.memVT = MVT::i64; 9693 Info.ptrVal = I.getArgOperand(2); 9694 Info.offset = 0; 9695 Info.align = 8; 9696 Info.vol = true; 9697 Info.readMem = false; 9698 Info.writeMem = true; 9699 return true; 9700 } 9701 case Intrinsic::arm_ldrexd: { 9702 Info.opc = ISD::INTRINSIC_W_CHAIN; 9703 Info.memVT = MVT::i64; 9704 Info.ptrVal = I.getArgOperand(0); 9705 Info.offset = 0; 9706 Info.align = 8; 9707 Info.vol = true; 9708 Info.readMem = true; 9709 Info.writeMem = false; 9710 return true; 9711 } 9712 default: 9713 break; 9714 } 9715 9716 return false; 9717} 9718