ARMISelLowering.cpp revision fc501a3ec9d97e372ecb1bd9cf32d861da46b2c9
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMCallingConv.h" 18#include "ARMConstantPoolValue.h" 19#include "ARMISelLowering.h" 20#include "ARMMachineFunctionInfo.h" 21#include "ARMPerfectShuffle.h" 22#include "ARMRegisterInfo.h" 23#include "ARMSubtarget.h" 24#include "ARMTargetMachine.h" 25#include "ARMTargetObjectFile.h" 26#include "MCTargetDesc/ARMAddressingModes.h" 27#include "llvm/CallingConv.h" 28#include "llvm/Constants.h" 29#include "llvm/Function.h" 30#include "llvm/GlobalValue.h" 31#include "llvm/Instruction.h" 32#include "llvm/Instructions.h" 33#include "llvm/Intrinsics.h" 34#include "llvm/Type.h" 35#include "llvm/CodeGen/CallingConvLower.h" 36#include "llvm/CodeGen/IntrinsicLowering.h" 37#include "llvm/CodeGen/MachineBasicBlock.h" 38#include "llvm/CodeGen/MachineFrameInfo.h" 39#include "llvm/CodeGen/MachineFunction.h" 40#include "llvm/CodeGen/MachineInstrBuilder.h" 41#include "llvm/CodeGen/MachineModuleInfo.h" 42#include "llvm/CodeGen/MachineRegisterInfo.h" 43#include "llvm/CodeGen/SelectionDAG.h" 44#include "llvm/MC/MCSectionMachO.h" 45#include "llvm/Target/TargetOptions.h" 46#include "llvm/ADT/StringExtras.h" 47#include "llvm/ADT/Statistic.h" 48#include "llvm/Support/CommandLine.h" 49#include "llvm/Support/ErrorHandling.h" 50#include "llvm/Support/MathExtras.h" 51#include "llvm/Support/raw_ostream.h" 52#include <sstream> 53using namespace llvm; 54 55STATISTIC(NumTailCalls, "Number of tail calls"); 56STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 57 58// This option should go away when tail calls fully work. 59static cl::opt<bool> 60EnableARMTailCalls("arm-tail-calls", cl::Hidden, 61 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 62 cl::init(false)); 63 64cl::opt<bool> 65EnableARMLongCalls("arm-long-calls", cl::Hidden, 66 cl::desc("Generate calls via indirect call instructions"), 67 cl::init(false)); 68 69static cl::opt<bool> 70ARMInterworking("arm-interworking", cl::Hidden, 71 cl::desc("Enable / disable ARM interworking (for debugging only)"), 72 cl::init(true)); 73 74namespace { 75 class ARMCCState : public CCState { 76 public: 77 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 78 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 79 LLVMContext &C, ParmContext PC) 80 : CCState(CC, isVarArg, MF, TM, locs, C) { 81 assert(((PC == Call) || (PC == Prologue)) && 82 "ARMCCState users must specify whether their context is call" 83 "or prologue generation."); 84 CallOrPrologue = PC; 85 } 86 }; 87} 88 89// The APCS parameter registers. 90static const unsigned GPRArgRegs[] = { 91 ARM::R0, ARM::R1, ARM::R2, ARM::R3 92}; 93 94void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 95 EVT PromotedBitwiseVT) { 96 if (VT != PromotedLdStVT) { 97 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 98 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 99 PromotedLdStVT.getSimpleVT()); 100 101 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 102 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 103 PromotedLdStVT.getSimpleVT()); 104 } 105 106 EVT ElemTy = VT.getVectorElementType(); 107 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 108 setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); 109 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); 110 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 111 if (ElemTy == MVT::i32) { 112 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); 113 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); 114 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); 115 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); 116 } else { 117 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 118 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 119 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 120 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 121 } 122 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 123 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 124 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 125 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); 126 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 127 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 128 setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); 129 if (VT.isInteger()) { 130 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 131 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 132 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 133 } 134 135 // Promote all bit-wise operations. 136 if (VT.isInteger() && VT != PromotedBitwiseVT) { 137 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 138 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 139 PromotedBitwiseVT.getSimpleVT()); 140 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 141 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 142 PromotedBitwiseVT.getSimpleVT()); 143 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 144 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 145 PromotedBitwiseVT.getSimpleVT()); 146 } 147 148 // Neon does not support vector divide/remainder operations. 149 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 150 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 151 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 152 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 153 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 154 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 155} 156 157void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 158 addRegisterClass(VT, ARM::DPRRegisterClass); 159 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 160} 161 162void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 163 addRegisterClass(VT, ARM::QPRRegisterClass); 164 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 165} 166 167static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 168 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 169 return new TargetLoweringObjectFileMachO(); 170 171 return new ARMElfTargetObjectFile(); 172} 173 174ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 175 : TargetLowering(TM, createTLOF(TM)) { 176 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 177 RegInfo = TM.getRegisterInfo(); 178 Itins = TM.getInstrItineraryData(); 179 180 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 181 182 if (Subtarget->isTargetDarwin()) { 183 // Uses VFP for Thumb libfuncs if available. 184 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 185 // Single-precision floating-point arithmetic. 186 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 187 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 188 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 189 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 190 191 // Double-precision floating-point arithmetic. 192 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 193 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 194 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 195 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 196 197 // Single-precision comparisons. 198 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 199 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 200 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 201 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 202 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 203 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 204 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 205 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 206 207 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 214 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 215 216 // Double-precision comparisons. 217 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 218 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 219 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 220 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 221 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 222 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 223 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 224 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 225 226 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 229 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 230 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 231 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 232 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 233 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 234 235 // Floating-point to integer conversions. 236 // i64 conversions are done via library routines even when generating VFP 237 // instructions, so use the same ones. 238 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 239 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 240 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 241 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 242 243 // Conversions between floating types. 244 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 245 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 246 247 // Integer to floating-point conversions. 248 // i64 conversions are done via library routines even when generating VFP 249 // instructions, so use the same ones. 250 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 251 // e.g., __floatunsidf vs. __floatunssidfvfp. 252 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 253 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 254 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 255 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 256 } 257 } 258 259 // These libcalls are not available in 32-bit. 260 setLibcallName(RTLIB::SHL_I128, 0); 261 setLibcallName(RTLIB::SRL_I128, 0); 262 setLibcallName(RTLIB::SRA_I128, 0); 263 264 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 265 // Double-precision floating-point arithmetic helper functions 266 // RTABI chapter 4.1.2, Table 2 267 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 268 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 269 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 270 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 271 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 272 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 273 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 274 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 275 276 // Double-precision floating-point comparison helper functions 277 // RTABI chapter 4.1.2, Table 3 278 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 279 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 280 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 281 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 282 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 283 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 284 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 285 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 286 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 287 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 288 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 289 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 290 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 291 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 292 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 293 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 294 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 297 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 298 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 299 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 300 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 301 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 302 303 // Single-precision floating-point arithmetic helper functions 304 // RTABI chapter 4.1.2, Table 4 305 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 306 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 307 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 308 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 309 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 310 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 311 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 312 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 313 314 // Single-precision floating-point comparison helper functions 315 // RTABI chapter 4.1.2, Table 5 316 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 317 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 318 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 319 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 320 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 321 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 322 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 323 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 324 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 325 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 326 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 327 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 328 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 329 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 330 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 331 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 332 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 338 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 339 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 340 341 // Floating-point to integer conversions. 342 // RTABI chapter 4.1.2, Table 6 343 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 344 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 345 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 346 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 347 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 348 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 349 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 350 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 351 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 354 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 355 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 356 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 357 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 358 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 359 360 // Conversions between floating types. 361 // RTABI chapter 4.1.2, Table 7 362 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 363 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 364 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 365 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 366 367 // Integer to floating-point conversions. 368 // RTABI chapter 4.1.2, Table 8 369 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 370 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 371 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 372 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 373 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 374 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 375 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 376 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 377 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 382 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 383 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 384 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 385 386 // Long long helper functions 387 // RTABI chapter 4.2, Table 9 388 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 389 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 390 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 391 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 392 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 393 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 394 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 395 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 396 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 397 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 398 399 // Integer division functions 400 // RTABI chapter 4.3.1 401 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 402 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 403 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 404 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 405 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 406 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 407 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 408 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 409 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 412 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 413 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 414 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 415 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 416 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 417 418 // Memory operations 419 // RTABI chapter 4.3.4 420 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 421 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 422 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 423 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 424 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 425 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 426 } 427 428 // Use divmod compiler-rt calls for iOS 5.0 and later. 429 if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 430 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 431 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 432 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 433 } 434 435 if (Subtarget->isThumb1Only()) 436 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 437 else 438 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 439 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 440 !Subtarget->isThumb1Only()) { 441 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 442 if (!Subtarget->isFPOnlySP()) 443 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 444 445 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 446 } 447 448 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 449 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 450 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 451 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 452 setTruncStoreAction((MVT::SimpleValueType)VT, 453 (MVT::SimpleValueType)InnerVT, Expand); 454 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 455 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 456 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 457 } 458 459 if (Subtarget->hasNEON()) { 460 addDRTypeForNEON(MVT::v2f32); 461 addDRTypeForNEON(MVT::v8i8); 462 addDRTypeForNEON(MVT::v4i16); 463 addDRTypeForNEON(MVT::v2i32); 464 addDRTypeForNEON(MVT::v1i64); 465 466 addQRTypeForNEON(MVT::v4f32); 467 addQRTypeForNEON(MVT::v2f64); 468 addQRTypeForNEON(MVT::v16i8); 469 addQRTypeForNEON(MVT::v8i16); 470 addQRTypeForNEON(MVT::v4i32); 471 addQRTypeForNEON(MVT::v2i64); 472 473 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 474 // neither Neon nor VFP support any arithmetic operations on it. 475 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 476 // supported for v4f32. 477 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 478 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 479 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 480 // FIXME: Code duplication: FDIV and FREM are expanded always, see 481 // ARMTargetLowering::addTypeForNEON method for details. 482 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 483 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 484 // FIXME: Create unittest. 485 // In another words, find a way when "copysign" appears in DAG with vector 486 // operands. 487 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 488 // FIXME: Code duplication: SETCC has custom operation action, see 489 // ARMTargetLowering::addTypeForNEON method for details. 490 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 491 // FIXME: Create unittest for FNEG and for FABS. 492 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 493 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 494 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 495 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 496 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 497 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 498 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 499 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 500 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 501 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 502 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 503 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 504 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 505 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 506 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 507 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 508 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 509 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 510 511 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 512 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 513 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 514 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 515 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 516 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 517 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 518 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 519 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 520 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 521 522 // Neon does not support some operations on v1i64 and v2i64 types. 523 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 524 // Custom handling for some quad-vector types to detect VMULL. 525 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 526 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 527 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 528 // Custom handling for some vector types to avoid expensive expansions 529 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 530 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 531 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 532 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 533 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 534 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 535 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 536 // a destination type that is wider than the source, and nor does 537 // it have a FP_TO_[SU]INT instruction with a narrower destination than 538 // source. 539 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 540 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 541 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 542 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 543 544 setTargetDAGCombine(ISD::INTRINSIC_VOID); 545 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 546 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 547 setTargetDAGCombine(ISD::SHL); 548 setTargetDAGCombine(ISD::SRL); 549 setTargetDAGCombine(ISD::SRA); 550 setTargetDAGCombine(ISD::SIGN_EXTEND); 551 setTargetDAGCombine(ISD::ZERO_EXTEND); 552 setTargetDAGCombine(ISD::ANY_EXTEND); 553 setTargetDAGCombine(ISD::SELECT_CC); 554 setTargetDAGCombine(ISD::BUILD_VECTOR); 555 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 556 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 557 setTargetDAGCombine(ISD::STORE); 558 setTargetDAGCombine(ISD::FP_TO_SINT); 559 setTargetDAGCombine(ISD::FP_TO_UINT); 560 setTargetDAGCombine(ISD::FDIV); 561 562 // It is legal to extload from v4i8 to v4i16 or v4i32. 563 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 564 MVT::v4i16, MVT::v2i16, 565 MVT::v2i32}; 566 for (unsigned i = 0; i < 6; ++i) { 567 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 568 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 569 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 570 } 571 } 572 573 computeRegisterProperties(); 574 575 // ARM does not have f32 extending load. 576 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 577 578 // ARM does not have i1 sign extending load. 579 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 580 581 // ARM supports all 4 flavors of integer indexed load / store. 582 if (!Subtarget->isThumb1Only()) { 583 for (unsigned im = (unsigned)ISD::PRE_INC; 584 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 585 setIndexedLoadAction(im, MVT::i1, Legal); 586 setIndexedLoadAction(im, MVT::i8, Legal); 587 setIndexedLoadAction(im, MVT::i16, Legal); 588 setIndexedLoadAction(im, MVT::i32, Legal); 589 setIndexedStoreAction(im, MVT::i1, Legal); 590 setIndexedStoreAction(im, MVT::i8, Legal); 591 setIndexedStoreAction(im, MVT::i16, Legal); 592 setIndexedStoreAction(im, MVT::i32, Legal); 593 } 594 } 595 596 // i64 operation support. 597 setOperationAction(ISD::MUL, MVT::i64, Expand); 598 setOperationAction(ISD::MULHU, MVT::i32, Expand); 599 if (Subtarget->isThumb1Only()) { 600 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 601 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 602 } 603 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 604 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 605 setOperationAction(ISD::MULHS, MVT::i32, Expand); 606 607 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 608 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 609 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 610 setOperationAction(ISD::SRL, MVT::i64, Custom); 611 setOperationAction(ISD::SRA, MVT::i64, Custom); 612 613 if (!Subtarget->isThumb1Only()) { 614 // FIXME: We should do this for Thumb1 as well. 615 setOperationAction(ISD::ADDC, MVT::i32, Custom); 616 setOperationAction(ISD::ADDE, MVT::i32, Custom); 617 setOperationAction(ISD::SUBC, MVT::i32, Custom); 618 setOperationAction(ISD::SUBE, MVT::i32, Custom); 619 } 620 621 // ARM does not have ROTL. 622 setOperationAction(ISD::ROTL, MVT::i32, Expand); 623 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 624 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 625 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 626 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 627 628 // These just redirect to CTTZ and CTLZ on ARM. 629 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 630 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 631 632 // Only ARMv6 has BSWAP. 633 if (!Subtarget->hasV6Ops()) 634 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 635 636 // These are expanded into libcalls. 637 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 638 // v7M has a hardware divider 639 setOperationAction(ISD::SDIV, MVT::i32, Expand); 640 setOperationAction(ISD::UDIV, MVT::i32, Expand); 641 } 642 setOperationAction(ISD::SREM, MVT::i32, Expand); 643 setOperationAction(ISD::UREM, MVT::i32, Expand); 644 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 645 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 646 647 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 648 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 649 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 650 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 651 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 652 653 setOperationAction(ISD::TRAP, MVT::Other, Legal); 654 655 // Use the default implementation. 656 setOperationAction(ISD::VASTART, MVT::Other, Custom); 657 setOperationAction(ISD::VAARG, MVT::Other, Expand); 658 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 659 setOperationAction(ISD::VAEND, MVT::Other, Expand); 660 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 661 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 662 663 if (!Subtarget->isTargetDarwin()) { 664 // Non-Darwin platforms may return values in these registers via the 665 // personality function. 666 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 667 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 668 setExceptionPointerRegister(ARM::R0); 669 setExceptionSelectorRegister(ARM::R1); 670 } 671 672 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 673 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 674 // the default expansion. 675 // FIXME: This should be checking for v6k, not just v6. 676 if (Subtarget->hasDataBarrier() || 677 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 678 // membarrier needs custom lowering; the rest are legal and handled 679 // normally. 680 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 681 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 682 // Custom lowering for 64-bit ops 683 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 684 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 685 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 686 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 687 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 688 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 689 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 690 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 691 setInsertFencesForAtomic(true); 692 } else { 693 // Set them all for expansion, which will force libcalls. 694 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 695 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 696 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 697 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 698 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 699 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 700 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 701 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 702 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 703 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 704 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 705 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 706 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 707 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 708 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 709 // Unordered/Monotonic case. 710 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 711 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 712 // Since the libcalls include locking, fold in the fences 713 setShouldFoldAtomicFences(true); 714 } 715 716 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 717 718 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 719 if (!Subtarget->hasV6Ops()) { 720 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 721 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 722 } 723 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 724 725 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 726 !Subtarget->isThumb1Only()) { 727 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 728 // iff target supports vfp2. 729 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 730 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 731 } 732 733 // We want to custom lower some of our intrinsics. 734 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 735 if (Subtarget->isTargetDarwin()) { 736 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 737 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 738 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 739 } 740 741 setOperationAction(ISD::SETCC, MVT::i32, Expand); 742 setOperationAction(ISD::SETCC, MVT::f32, Expand); 743 setOperationAction(ISD::SETCC, MVT::f64, Expand); 744 setOperationAction(ISD::SELECT, MVT::i32, Custom); 745 setOperationAction(ISD::SELECT, MVT::f32, Custom); 746 setOperationAction(ISD::SELECT, MVT::f64, Custom); 747 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 748 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 749 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 750 751 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 752 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 753 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 754 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 755 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 756 757 // We don't support sin/cos/fmod/copysign/pow 758 setOperationAction(ISD::FSIN, MVT::f64, Expand); 759 setOperationAction(ISD::FSIN, MVT::f32, Expand); 760 setOperationAction(ISD::FCOS, MVT::f32, Expand); 761 setOperationAction(ISD::FCOS, MVT::f64, Expand); 762 setOperationAction(ISD::FREM, MVT::f64, Expand); 763 setOperationAction(ISD::FREM, MVT::f32, Expand); 764 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 765 !Subtarget->isThumb1Only()) { 766 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 767 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 768 } 769 setOperationAction(ISD::FPOW, MVT::f64, Expand); 770 setOperationAction(ISD::FPOW, MVT::f32, Expand); 771 772 setOperationAction(ISD::FMA, MVT::f64, Expand); 773 setOperationAction(ISD::FMA, MVT::f32, Expand); 774 775 // Various VFP goodness 776 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 777 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 778 if (Subtarget->hasVFP2()) { 779 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 780 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 781 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 782 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 783 } 784 // Special handling for half-precision FP. 785 if (!Subtarget->hasFP16()) { 786 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 787 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 788 } 789 } 790 791 // We have target-specific dag combine patterns for the following nodes: 792 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 793 setTargetDAGCombine(ISD::ADD); 794 setTargetDAGCombine(ISD::SUB); 795 setTargetDAGCombine(ISD::MUL); 796 797 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { 798 setTargetDAGCombine(ISD::AND); 799 setTargetDAGCombine(ISD::OR); 800 setTargetDAGCombine(ISD::XOR); 801 } 802 803 if (Subtarget->hasV6Ops()) 804 setTargetDAGCombine(ISD::SRL); 805 806 setStackPointerRegisterToSaveRestore(ARM::SP); 807 808 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 809 !Subtarget->hasVFP2()) 810 setSchedulingPreference(Sched::RegPressure); 811 else 812 setSchedulingPreference(Sched::Hybrid); 813 814 //// temporary - rewrite interface to use type 815 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 816 maxStoresPerMemset = 16; 817 maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 818 819 // On ARM arguments smaller than 4 bytes are extended, so all arguments 820 // are at least 4 bytes aligned. 821 setMinStackArgumentAlignment(4); 822 823 benefitFromCodePlacementOpt = true; 824 825 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 826} 827 828// FIXME: It might make sense to define the representative register class as the 829// nearest super-register that has a non-null superset. For example, DPR_VFP2 is 830// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 831// SPR's representative would be DPR_VFP2. This should work well if register 832// pressure tracking were modified such that a register use would increment the 833// pressure of the register class's representative and all of it's super 834// classes' representatives transitively. We have not implemented this because 835// of the difficulty prior to coalescing of modeling operand register classes 836// due to the common occurrence of cross class copies and subregister insertions 837// and extractions. 838std::pair<const TargetRegisterClass*, uint8_t> 839ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 840 const TargetRegisterClass *RRC = 0; 841 uint8_t Cost = 1; 842 switch (VT.getSimpleVT().SimpleTy) { 843 default: 844 return TargetLowering::findRepresentativeClass(VT); 845 // Use DPR as representative register class for all floating point 846 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 847 // the cost is 1 for both f32 and f64. 848 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 849 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 850 RRC = ARM::DPRRegisterClass; 851 // When NEON is used for SP, only half of the register file is available 852 // because operations that define both SP and DP results will be constrained 853 // to the VFP2 class (D0-D15). We currently model this constraint prior to 854 // coalescing by double-counting the SP regs. See the FIXME above. 855 if (Subtarget->useNEONForSinglePrecisionFP()) 856 Cost = 2; 857 break; 858 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 859 case MVT::v4f32: case MVT::v2f64: 860 RRC = ARM::DPRRegisterClass; 861 Cost = 2; 862 break; 863 case MVT::v4i64: 864 RRC = ARM::DPRRegisterClass; 865 Cost = 4; 866 break; 867 case MVT::v8i64: 868 RRC = ARM::DPRRegisterClass; 869 Cost = 8; 870 break; 871 } 872 return std::make_pair(RRC, Cost); 873} 874 875const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 876 switch (Opcode) { 877 default: return 0; 878 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 879 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 880 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 881 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 882 case ARMISD::CALL: return "ARMISD::CALL"; 883 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 884 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 885 case ARMISD::tCALL: return "ARMISD::tCALL"; 886 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 887 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 888 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 889 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 890 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 891 case ARMISD::CMP: return "ARMISD::CMP"; 892 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 893 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 894 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 895 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 896 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 897 898 case ARMISD::CMOV: return "ARMISD::CMOV"; 899 case ARMISD::CAND: return "ARMISD::CAND"; 900 case ARMISD::COR: return "ARMISD::COR"; 901 case ARMISD::CXOR: return "ARMISD::CXOR"; 902 903 case ARMISD::RBIT: return "ARMISD::RBIT"; 904 905 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 906 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 907 case ARMISD::SITOF: return "ARMISD::SITOF"; 908 case ARMISD::UITOF: return "ARMISD::UITOF"; 909 910 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 911 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 912 case ARMISD::RRX: return "ARMISD::RRX"; 913 914 case ARMISD::ADDC: return "ARMISD::ADDC"; 915 case ARMISD::ADDE: return "ARMISD::ADDE"; 916 case ARMISD::SUBC: return "ARMISD::SUBC"; 917 case ARMISD::SUBE: return "ARMISD::SUBE"; 918 919 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 920 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 921 922 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 923 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 924 925 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 926 927 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 928 929 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 930 931 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 932 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 933 934 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 935 936 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 937 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 938 case ARMISD::VCGE: return "ARMISD::VCGE"; 939 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 940 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 941 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 942 case ARMISD::VCGT: return "ARMISD::VCGT"; 943 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 944 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 945 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 946 case ARMISD::VTST: return "ARMISD::VTST"; 947 948 case ARMISD::VSHL: return "ARMISD::VSHL"; 949 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 950 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 951 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 952 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 953 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 954 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 955 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 956 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 957 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 958 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 959 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 960 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 961 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 962 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 963 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 964 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 965 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 966 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 967 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 968 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 969 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 970 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 971 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 972 case ARMISD::VDUP: return "ARMISD::VDUP"; 973 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 974 case ARMISD::VEXT: return "ARMISD::VEXT"; 975 case ARMISD::VREV64: return "ARMISD::VREV64"; 976 case ARMISD::VREV32: return "ARMISD::VREV32"; 977 case ARMISD::VREV16: return "ARMISD::VREV16"; 978 case ARMISD::VZIP: return "ARMISD::VZIP"; 979 case ARMISD::VUZP: return "ARMISD::VUZP"; 980 case ARMISD::VTRN: return "ARMISD::VTRN"; 981 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 982 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 983 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 984 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 985 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 986 case ARMISD::FMAX: return "ARMISD::FMAX"; 987 case ARMISD::FMIN: return "ARMISD::FMIN"; 988 case ARMISD::BFI: return "ARMISD::BFI"; 989 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 990 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 991 case ARMISD::VBSL: return "ARMISD::VBSL"; 992 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 993 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 994 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 995 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 996 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 997 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 998 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 999 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1000 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1001 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1002 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1003 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1004 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1005 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1006 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1007 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1008 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1009 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1010 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1011 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1012 } 1013} 1014 1015EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { 1016 if (!VT.isVector()) return getPointerTy(); 1017 return VT.changeVectorElementTypeToInteger(); 1018} 1019 1020/// getRegClassFor - Return the register class that should be used for the 1021/// specified value type. 1022const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 1023 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1024 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1025 // load / store 4 to 8 consecutive D registers. 1026 if (Subtarget->hasNEON()) { 1027 if (VT == MVT::v4i64) 1028 return ARM::QQPRRegisterClass; 1029 else if (VT == MVT::v8i64) 1030 return ARM::QQQQPRRegisterClass; 1031 } 1032 return TargetLowering::getRegClassFor(VT); 1033} 1034 1035// Create a fast isel object. 1036FastISel * 1037ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 1038 return ARM::createFastISel(funcInfo); 1039} 1040 1041/// getMaximalGlobalOffset - Returns the maximal possible offset which can 1042/// be used for loads / stores from the global. 1043unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1044 return (Subtarget->isThumb1Only() ? 127 : 4095); 1045} 1046 1047Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1048 unsigned NumVals = N->getNumValues(); 1049 if (!NumVals) 1050 return Sched::RegPressure; 1051 1052 for (unsigned i = 0; i != NumVals; ++i) { 1053 EVT VT = N->getValueType(i); 1054 if (VT == MVT::Glue || VT == MVT::Other) 1055 continue; 1056 if (VT.isFloatingPoint() || VT.isVector()) 1057 return Sched::ILP; 1058 } 1059 1060 if (!N->isMachineOpcode()) 1061 return Sched::RegPressure; 1062 1063 // Load are scheduled for latency even if there instruction itinerary 1064 // is not available. 1065 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1066 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1067 1068 if (MCID.getNumDefs() == 0) 1069 return Sched::RegPressure; 1070 if (!Itins->isEmpty() && 1071 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1072 return Sched::ILP; 1073 1074 return Sched::RegPressure; 1075} 1076 1077//===----------------------------------------------------------------------===// 1078// Lowering Code 1079//===----------------------------------------------------------------------===// 1080 1081/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1082static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1083 switch (CC) { 1084 default: llvm_unreachable("Unknown condition code!"); 1085 case ISD::SETNE: return ARMCC::NE; 1086 case ISD::SETEQ: return ARMCC::EQ; 1087 case ISD::SETGT: return ARMCC::GT; 1088 case ISD::SETGE: return ARMCC::GE; 1089 case ISD::SETLT: return ARMCC::LT; 1090 case ISD::SETLE: return ARMCC::LE; 1091 case ISD::SETUGT: return ARMCC::HI; 1092 case ISD::SETUGE: return ARMCC::HS; 1093 case ISD::SETULT: return ARMCC::LO; 1094 case ISD::SETULE: return ARMCC::LS; 1095 } 1096} 1097 1098/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1099static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1100 ARMCC::CondCodes &CondCode2) { 1101 CondCode2 = ARMCC::AL; 1102 switch (CC) { 1103 default: llvm_unreachable("Unknown FP condition!"); 1104 case ISD::SETEQ: 1105 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1106 case ISD::SETGT: 1107 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1108 case ISD::SETGE: 1109 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1110 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1111 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1112 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1113 case ISD::SETO: CondCode = ARMCC::VC; break; 1114 case ISD::SETUO: CondCode = ARMCC::VS; break; 1115 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1116 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1117 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1118 case ISD::SETLT: 1119 case ISD::SETULT: CondCode = ARMCC::LT; break; 1120 case ISD::SETLE: 1121 case ISD::SETULE: CondCode = ARMCC::LE; break; 1122 case ISD::SETNE: 1123 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1124 } 1125} 1126 1127//===----------------------------------------------------------------------===// 1128// Calling Convention Implementation 1129//===----------------------------------------------------------------------===// 1130 1131#include "ARMGenCallingConv.inc" 1132 1133/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1134/// given CallingConvention value. 1135CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1136 bool Return, 1137 bool isVarArg) const { 1138 switch (CC) { 1139 default: 1140 llvm_unreachable("Unsupported calling convention"); 1141 case CallingConv::Fast: 1142 if (Subtarget->hasVFP2() && !isVarArg) { 1143 if (!Subtarget->isAAPCS_ABI()) 1144 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1145 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1146 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1147 } 1148 // Fallthrough 1149 case CallingConv::C: { 1150 // Use target triple & subtarget features to do actual dispatch. 1151 if (!Subtarget->isAAPCS_ABI()) 1152 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1153 else if (Subtarget->hasVFP2() && 1154 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1155 !isVarArg) 1156 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1157 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1158 } 1159 case CallingConv::ARM_AAPCS_VFP: 1160 if (!isVarArg) 1161 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1162 // Fallthrough 1163 case CallingConv::ARM_AAPCS: 1164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1165 case CallingConv::ARM_APCS: 1166 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1167 } 1168} 1169 1170/// LowerCallResult - Lower the result values of a call into the 1171/// appropriate copies out of appropriate physical registers. 1172SDValue 1173ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1174 CallingConv::ID CallConv, bool isVarArg, 1175 const SmallVectorImpl<ISD::InputArg> &Ins, 1176 DebugLoc dl, SelectionDAG &DAG, 1177 SmallVectorImpl<SDValue> &InVals) const { 1178 1179 // Assign locations to each value returned by this call. 1180 SmallVector<CCValAssign, 16> RVLocs; 1181 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1182 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1183 CCInfo.AnalyzeCallResult(Ins, 1184 CCAssignFnForNode(CallConv, /* Return*/ true, 1185 isVarArg)); 1186 1187 // Copy all of the result registers out of their specified physreg. 1188 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1189 CCValAssign VA = RVLocs[i]; 1190 1191 SDValue Val; 1192 if (VA.needsCustom()) { 1193 // Handle f64 or half of a v2f64. 1194 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1195 InFlag); 1196 Chain = Lo.getValue(1); 1197 InFlag = Lo.getValue(2); 1198 VA = RVLocs[++i]; // skip ahead to next loc 1199 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1200 InFlag); 1201 Chain = Hi.getValue(1); 1202 InFlag = Hi.getValue(2); 1203 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1204 1205 if (VA.getLocVT() == MVT::v2f64) { 1206 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1207 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1208 DAG.getConstant(0, MVT::i32)); 1209 1210 VA = RVLocs[++i]; // skip ahead to next loc 1211 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1212 Chain = Lo.getValue(1); 1213 InFlag = Lo.getValue(2); 1214 VA = RVLocs[++i]; // skip ahead to next loc 1215 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1216 Chain = Hi.getValue(1); 1217 InFlag = Hi.getValue(2); 1218 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1219 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1220 DAG.getConstant(1, MVT::i32)); 1221 } 1222 } else { 1223 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1224 InFlag); 1225 Chain = Val.getValue(1); 1226 InFlag = Val.getValue(2); 1227 } 1228 1229 switch (VA.getLocInfo()) { 1230 default: llvm_unreachable("Unknown loc info!"); 1231 case CCValAssign::Full: break; 1232 case CCValAssign::BCvt: 1233 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1234 break; 1235 } 1236 1237 InVals.push_back(Val); 1238 } 1239 1240 return Chain; 1241} 1242 1243/// LowerMemOpCallTo - Store the argument to the stack. 1244SDValue 1245ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1246 SDValue StackPtr, SDValue Arg, 1247 DebugLoc dl, SelectionDAG &DAG, 1248 const CCValAssign &VA, 1249 ISD::ArgFlagsTy Flags) const { 1250 unsigned LocMemOffset = VA.getLocMemOffset(); 1251 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1252 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1253 return DAG.getStore(Chain, dl, Arg, PtrOff, 1254 MachinePointerInfo::getStack(LocMemOffset), 1255 false, false, 0); 1256} 1257 1258void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1259 SDValue Chain, SDValue &Arg, 1260 RegsToPassVector &RegsToPass, 1261 CCValAssign &VA, CCValAssign &NextVA, 1262 SDValue &StackPtr, 1263 SmallVector<SDValue, 8> &MemOpChains, 1264 ISD::ArgFlagsTy Flags) const { 1265 1266 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1267 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1268 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1269 1270 if (NextVA.isRegLoc()) 1271 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1272 else { 1273 assert(NextVA.isMemLoc()); 1274 if (StackPtr.getNode() == 0) 1275 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1276 1277 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1278 dl, DAG, NextVA, 1279 Flags)); 1280 } 1281} 1282 1283/// LowerCall - Lowering a call into a callseq_start <- 1284/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1285/// nodes. 1286SDValue 1287ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1288 CallingConv::ID CallConv, bool isVarArg, 1289 bool doesNotRet, bool &isTailCall, 1290 const SmallVectorImpl<ISD::OutputArg> &Outs, 1291 const SmallVectorImpl<SDValue> &OutVals, 1292 const SmallVectorImpl<ISD::InputArg> &Ins, 1293 DebugLoc dl, SelectionDAG &DAG, 1294 SmallVectorImpl<SDValue> &InVals) const { 1295 MachineFunction &MF = DAG.getMachineFunction(); 1296 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1297 bool IsSibCall = false; 1298 // Disable tail calls if they're not supported. 1299 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1300 isTailCall = false; 1301 if (isTailCall) { 1302 // Check if it's really possible to do a tail call. 1303 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1304 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1305 Outs, OutVals, Ins, DAG); 1306 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1307 // detected sibcalls. 1308 if (isTailCall) { 1309 ++NumTailCalls; 1310 IsSibCall = true; 1311 } 1312 } 1313 1314 // Analyze operands of the call, assigning locations to each operand. 1315 SmallVector<CCValAssign, 16> ArgLocs; 1316 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1317 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1318 CCInfo.AnalyzeCallOperands(Outs, 1319 CCAssignFnForNode(CallConv, /* Return*/ false, 1320 isVarArg)); 1321 1322 // Get a count of how many bytes are to be pushed on the stack. 1323 unsigned NumBytes = CCInfo.getNextStackOffset(); 1324 1325 // For tail calls, memory operands are available in our caller's stack. 1326 if (IsSibCall) 1327 NumBytes = 0; 1328 1329 // Adjust the stack pointer for the new arguments... 1330 // These operations are automatically eliminated by the prolog/epilog pass 1331 if (!IsSibCall) 1332 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1333 1334 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1335 1336 RegsToPassVector RegsToPass; 1337 SmallVector<SDValue, 8> MemOpChains; 1338 1339 // Walk the register/memloc assignments, inserting copies/loads. In the case 1340 // of tail call optimization, arguments are handled later. 1341 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1342 i != e; 1343 ++i, ++realArgIdx) { 1344 CCValAssign &VA = ArgLocs[i]; 1345 SDValue Arg = OutVals[realArgIdx]; 1346 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1347 bool isByVal = Flags.isByVal(); 1348 1349 // Promote the value if needed. 1350 switch (VA.getLocInfo()) { 1351 default: llvm_unreachable("Unknown loc info!"); 1352 case CCValAssign::Full: break; 1353 case CCValAssign::SExt: 1354 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1355 break; 1356 case CCValAssign::ZExt: 1357 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1358 break; 1359 case CCValAssign::AExt: 1360 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1361 break; 1362 case CCValAssign::BCvt: 1363 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1364 break; 1365 } 1366 1367 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1368 if (VA.needsCustom()) { 1369 if (VA.getLocVT() == MVT::v2f64) { 1370 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1371 DAG.getConstant(0, MVT::i32)); 1372 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1373 DAG.getConstant(1, MVT::i32)); 1374 1375 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1376 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1377 1378 VA = ArgLocs[++i]; // skip ahead to next loc 1379 if (VA.isRegLoc()) { 1380 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1381 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1382 } else { 1383 assert(VA.isMemLoc()); 1384 1385 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1386 dl, DAG, VA, Flags)); 1387 } 1388 } else { 1389 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1390 StackPtr, MemOpChains, Flags); 1391 } 1392 } else if (VA.isRegLoc()) { 1393 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1394 } else if (isByVal) { 1395 assert(VA.isMemLoc()); 1396 unsigned offset = 0; 1397 1398 // True if this byval aggregate will be split between registers 1399 // and memory. 1400 if (CCInfo.isFirstByValRegValid()) { 1401 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1402 unsigned int i, j; 1403 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1404 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1405 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1406 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1407 MachinePointerInfo(), 1408 false, false, false, 0); 1409 MemOpChains.push_back(Load.getValue(1)); 1410 RegsToPass.push_back(std::make_pair(j, Load)); 1411 } 1412 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1413 CCInfo.clearFirstByValReg(); 1414 } 1415 1416 unsigned LocMemOffset = VA.getLocMemOffset(); 1417 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1418 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1419 StkPtrOff); 1420 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1421 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1422 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1423 MVT::i32); 1424 MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, 1425 Flags.getByValAlign(), 1426 /*isVolatile=*/false, 1427 /*AlwaysInline=*/false, 1428 MachinePointerInfo(0), 1429 MachinePointerInfo(0))); 1430 1431 } else if (!IsSibCall) { 1432 assert(VA.isMemLoc()); 1433 1434 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1435 dl, DAG, VA, Flags)); 1436 } 1437 } 1438 1439 if (!MemOpChains.empty()) 1440 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1441 &MemOpChains[0], MemOpChains.size()); 1442 1443 // Build a sequence of copy-to-reg nodes chained together with token chain 1444 // and flag operands which copy the outgoing args into the appropriate regs. 1445 SDValue InFlag; 1446 // Tail call byval lowering might overwrite argument registers so in case of 1447 // tail call optimization the copies to registers are lowered later. 1448 if (!isTailCall) 1449 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1450 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1451 RegsToPass[i].second, InFlag); 1452 InFlag = Chain.getValue(1); 1453 } 1454 1455 // For tail calls lower the arguments to the 'real' stack slot. 1456 if (isTailCall) { 1457 // Force all the incoming stack arguments to be loaded from the stack 1458 // before any new outgoing arguments are stored to the stack, because the 1459 // outgoing stack slots may alias the incoming argument stack slots, and 1460 // the alias isn't otherwise explicit. This is slightly more conservative 1461 // than necessary, because it means that each store effectively depends 1462 // on every argument instead of just those arguments it would clobber. 1463 1464 // Do not flag preceding copytoreg stuff together with the following stuff. 1465 InFlag = SDValue(); 1466 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1467 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1468 RegsToPass[i].second, InFlag); 1469 InFlag = Chain.getValue(1); 1470 } 1471 InFlag =SDValue(); 1472 } 1473 1474 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1475 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1476 // node so that legalize doesn't hack it. 1477 bool isDirect = false; 1478 bool isARMFunc = false; 1479 bool isLocalARMFunc = false; 1480 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1481 1482 if (EnableARMLongCalls) { 1483 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1484 && "long-calls with non-static relocation model!"); 1485 // Handle a global address or an external symbol. If it's not one of 1486 // those, the target's already in a register, so we don't need to do 1487 // anything extra. 1488 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1489 const GlobalValue *GV = G->getGlobal(); 1490 // Create a constant pool entry for the callee address 1491 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1492 ARMConstantPoolValue *CPV = 1493 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1494 1495 // Get the address of the callee into a register 1496 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1497 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1498 Callee = DAG.getLoad(getPointerTy(), dl, 1499 DAG.getEntryNode(), CPAddr, 1500 MachinePointerInfo::getConstantPool(), 1501 false, false, false, 0); 1502 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1503 const char *Sym = S->getSymbol(); 1504 1505 // Create a constant pool entry for the callee address 1506 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1507 ARMConstantPoolValue *CPV = 1508 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1509 ARMPCLabelIndex, 0); 1510 // Get the address of the callee into a register 1511 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1512 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1513 Callee = DAG.getLoad(getPointerTy(), dl, 1514 DAG.getEntryNode(), CPAddr, 1515 MachinePointerInfo::getConstantPool(), 1516 false, false, false, 0); 1517 } 1518 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1519 const GlobalValue *GV = G->getGlobal(); 1520 isDirect = true; 1521 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1522 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1523 getTargetMachine().getRelocationModel() != Reloc::Static; 1524 isARMFunc = !Subtarget->isThumb() || isStub; 1525 // ARM call to a local ARM function is predicable. 1526 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1527 // tBX takes a register source operand. 1528 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1529 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1530 ARMConstantPoolValue *CPV = 1531 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1532 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1533 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1534 Callee = DAG.getLoad(getPointerTy(), dl, 1535 DAG.getEntryNode(), CPAddr, 1536 MachinePointerInfo::getConstantPool(), 1537 false, false, false, 0); 1538 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1539 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1540 getPointerTy(), Callee, PICLabel); 1541 } else { 1542 // On ELF targets for PIC code, direct calls should go through the PLT 1543 unsigned OpFlags = 0; 1544 if (Subtarget->isTargetELF() && 1545 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1546 OpFlags = ARMII::MO_PLT; 1547 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1548 } 1549 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1550 isDirect = true; 1551 bool isStub = Subtarget->isTargetDarwin() && 1552 getTargetMachine().getRelocationModel() != Reloc::Static; 1553 isARMFunc = !Subtarget->isThumb() || isStub; 1554 // tBX takes a register source operand. 1555 const char *Sym = S->getSymbol(); 1556 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1557 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1558 ARMConstantPoolValue *CPV = 1559 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1560 ARMPCLabelIndex, 4); 1561 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1562 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1563 Callee = DAG.getLoad(getPointerTy(), dl, 1564 DAG.getEntryNode(), CPAddr, 1565 MachinePointerInfo::getConstantPool(), 1566 false, false, false, 0); 1567 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1568 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1569 getPointerTy(), Callee, PICLabel); 1570 } else { 1571 unsigned OpFlags = 0; 1572 // On ELF targets for PIC code, direct calls should go through the PLT 1573 if (Subtarget->isTargetELF() && 1574 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1575 OpFlags = ARMII::MO_PLT; 1576 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1577 } 1578 } 1579 1580 // FIXME: handle tail calls differently. 1581 unsigned CallOpc; 1582 if (Subtarget->isThumb()) { 1583 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1584 CallOpc = ARMISD::CALL_NOLINK; 1585 else if (doesNotRet && isDirect && !isARMFunc && 1586 Subtarget->hasRAS() && !Subtarget->isThumb1Only()) 1587 // "mov lr, pc; b _foo" to avoid confusing the RSP 1588 CallOpc = ARMISD::CALL_NOLINK; 1589 else 1590 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1591 } else { 1592 if (!isDirect && !Subtarget->hasV5TOps()) { 1593 CallOpc = ARMISD::CALL_NOLINK; 1594 } else if (doesNotRet && isDirect && Subtarget->hasRAS()) 1595 // "mov lr, pc; b _foo" to avoid confusing the RSP 1596 CallOpc = ARMISD::CALL_NOLINK; 1597 else 1598 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1599 } 1600 1601 std::vector<SDValue> Ops; 1602 Ops.push_back(Chain); 1603 Ops.push_back(Callee); 1604 1605 // Add argument registers to the end of the list so that they are known live 1606 // into the call. 1607 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1608 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1609 RegsToPass[i].second.getValueType())); 1610 1611 // Add a register mask operand representing the call-preserved registers. 1612 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1613 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 1614 assert(Mask && "Missing call preserved mask for calling convention"); 1615 Ops.push_back(DAG.getRegisterMask(Mask)); 1616 1617 if (InFlag.getNode()) 1618 Ops.push_back(InFlag); 1619 1620 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1621 if (isTailCall) 1622 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1623 1624 // Returns a chain and a flag for retval copy to use. 1625 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1626 InFlag = Chain.getValue(1); 1627 1628 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1629 DAG.getIntPtrConstant(0, true), InFlag); 1630 if (!Ins.empty()) 1631 InFlag = Chain.getValue(1); 1632 1633 // Handle result values, copying them out of physregs into vregs that we 1634 // return. 1635 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1636 dl, DAG, InVals); 1637} 1638 1639/// HandleByVal - Every parameter *after* a byval parameter is passed 1640/// on the stack. Remember the next parameter register to allocate, 1641/// and then confiscate the rest of the parameter registers to insure 1642/// this. 1643void 1644llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { 1645 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1646 assert((State->getCallOrPrologue() == Prologue || 1647 State->getCallOrPrologue() == Call) && 1648 "unhandled ParmContext"); 1649 if ((!State->isFirstByValRegValid()) && 1650 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1651 State->setFirstByValReg(reg); 1652 // At a call site, a byval parameter that is split between 1653 // registers and memory needs its size truncated here. In a 1654 // function prologue, such byval parameters are reassembled in 1655 // memory, and are not truncated. 1656 if (State->getCallOrPrologue() == Call) { 1657 unsigned excess = 4 * (ARM::R4 - reg); 1658 assert(size >= excess && "expected larger existing stack allocation"); 1659 size -= excess; 1660 } 1661 } 1662 // Confiscate any remaining parameter registers to preclude their 1663 // assignment to subsequent parameters. 1664 while (State->AllocateReg(GPRArgRegs, 4)) 1665 ; 1666} 1667 1668/// MatchingStackOffset - Return true if the given stack call argument is 1669/// already available in the same position (relatively) of the caller's 1670/// incoming argument stack. 1671static 1672bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1673 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1674 const ARMInstrInfo *TII) { 1675 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1676 int FI = INT_MAX; 1677 if (Arg.getOpcode() == ISD::CopyFromReg) { 1678 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1679 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1680 return false; 1681 MachineInstr *Def = MRI->getVRegDef(VR); 1682 if (!Def) 1683 return false; 1684 if (!Flags.isByVal()) { 1685 if (!TII->isLoadFromStackSlot(Def, FI)) 1686 return false; 1687 } else { 1688 return false; 1689 } 1690 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1691 if (Flags.isByVal()) 1692 // ByVal argument is passed in as a pointer but it's now being 1693 // dereferenced. e.g. 1694 // define @foo(%struct.X* %A) { 1695 // tail call @bar(%struct.X* byval %A) 1696 // } 1697 return false; 1698 SDValue Ptr = Ld->getBasePtr(); 1699 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1700 if (!FINode) 1701 return false; 1702 FI = FINode->getIndex(); 1703 } else 1704 return false; 1705 1706 assert(FI != INT_MAX); 1707 if (!MFI->isFixedObjectIndex(FI)) 1708 return false; 1709 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1710} 1711 1712/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1713/// for tail call optimization. Targets which want to do tail call 1714/// optimization should implement this function. 1715bool 1716ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1717 CallingConv::ID CalleeCC, 1718 bool isVarArg, 1719 bool isCalleeStructRet, 1720 bool isCallerStructRet, 1721 const SmallVectorImpl<ISD::OutputArg> &Outs, 1722 const SmallVectorImpl<SDValue> &OutVals, 1723 const SmallVectorImpl<ISD::InputArg> &Ins, 1724 SelectionDAG& DAG) const { 1725 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1726 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1727 bool CCMatch = CallerCC == CalleeCC; 1728 1729 // Look for obvious safe cases to perform tail call optimization that do not 1730 // require ABI changes. This is what gcc calls sibcall. 1731 1732 // Do not sibcall optimize vararg calls unless the call site is not passing 1733 // any arguments. 1734 if (isVarArg && !Outs.empty()) 1735 return false; 1736 1737 // Also avoid sibcall optimization if either caller or callee uses struct 1738 // return semantics. 1739 if (isCalleeStructRet || isCallerStructRet) 1740 return false; 1741 1742 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1743 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1744 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1745 // support in the assembler and linker to be used. This would need to be 1746 // fixed to fully support tail calls in Thumb1. 1747 // 1748 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1749 // LR. This means if we need to reload LR, it takes an extra instructions, 1750 // which outweighs the value of the tail call; but here we don't know yet 1751 // whether LR is going to be used. Probably the right approach is to 1752 // generate the tail call here and turn it back into CALL/RET in 1753 // emitEpilogue if LR is used. 1754 1755 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1756 // but we need to make sure there are enough registers; the only valid 1757 // registers are the 4 used for parameters. We don't currently do this 1758 // case. 1759 if (Subtarget->isThumb1Only()) 1760 return false; 1761 1762 // If the calling conventions do not match, then we'd better make sure the 1763 // results are returned in the same way as what the caller expects. 1764 if (!CCMatch) { 1765 SmallVector<CCValAssign, 16> RVLocs1; 1766 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1767 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1768 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1769 1770 SmallVector<CCValAssign, 16> RVLocs2; 1771 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1772 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1773 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1774 1775 if (RVLocs1.size() != RVLocs2.size()) 1776 return false; 1777 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1778 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1779 return false; 1780 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1781 return false; 1782 if (RVLocs1[i].isRegLoc()) { 1783 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1784 return false; 1785 } else { 1786 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1787 return false; 1788 } 1789 } 1790 } 1791 1792 // If the callee takes no arguments then go on to check the results of the 1793 // call. 1794 if (!Outs.empty()) { 1795 // Check if stack adjustment is needed. For now, do not do this if any 1796 // argument is passed on the stack. 1797 SmallVector<CCValAssign, 16> ArgLocs; 1798 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1799 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1800 CCInfo.AnalyzeCallOperands(Outs, 1801 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1802 if (CCInfo.getNextStackOffset()) { 1803 MachineFunction &MF = DAG.getMachineFunction(); 1804 1805 // Check if the arguments are already laid out in the right way as 1806 // the caller's fixed stack objects. 1807 MachineFrameInfo *MFI = MF.getFrameInfo(); 1808 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1809 const ARMInstrInfo *TII = 1810 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1811 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1812 i != e; 1813 ++i, ++realArgIdx) { 1814 CCValAssign &VA = ArgLocs[i]; 1815 EVT RegVT = VA.getLocVT(); 1816 SDValue Arg = OutVals[realArgIdx]; 1817 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1818 if (VA.getLocInfo() == CCValAssign::Indirect) 1819 return false; 1820 if (VA.needsCustom()) { 1821 // f64 and vector types are split into multiple registers or 1822 // register/stack-slot combinations. The types will not match 1823 // the registers; give up on memory f64 refs until we figure 1824 // out what to do about this. 1825 if (!VA.isRegLoc()) 1826 return false; 1827 if (!ArgLocs[++i].isRegLoc()) 1828 return false; 1829 if (RegVT == MVT::v2f64) { 1830 if (!ArgLocs[++i].isRegLoc()) 1831 return false; 1832 if (!ArgLocs[++i].isRegLoc()) 1833 return false; 1834 } 1835 } else if (!VA.isRegLoc()) { 1836 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1837 MFI, MRI, TII)) 1838 return false; 1839 } 1840 } 1841 } 1842 } 1843 1844 return true; 1845} 1846 1847SDValue 1848ARMTargetLowering::LowerReturn(SDValue Chain, 1849 CallingConv::ID CallConv, bool isVarArg, 1850 const SmallVectorImpl<ISD::OutputArg> &Outs, 1851 const SmallVectorImpl<SDValue> &OutVals, 1852 DebugLoc dl, SelectionDAG &DAG) const { 1853 1854 // CCValAssign - represent the assignment of the return value to a location. 1855 SmallVector<CCValAssign, 16> RVLocs; 1856 1857 // CCState - Info about the registers and stack slots. 1858 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1859 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1860 1861 // Analyze outgoing return values. 1862 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1863 isVarArg)); 1864 1865 // If this is the first return lowered for this function, add 1866 // the regs to the liveout set for the function. 1867 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1868 for (unsigned i = 0; i != RVLocs.size(); ++i) 1869 if (RVLocs[i].isRegLoc()) 1870 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1871 } 1872 1873 SDValue Flag; 1874 1875 // Copy the result values into the output registers. 1876 for (unsigned i = 0, realRVLocIdx = 0; 1877 i != RVLocs.size(); 1878 ++i, ++realRVLocIdx) { 1879 CCValAssign &VA = RVLocs[i]; 1880 assert(VA.isRegLoc() && "Can only return in registers!"); 1881 1882 SDValue Arg = OutVals[realRVLocIdx]; 1883 1884 switch (VA.getLocInfo()) { 1885 default: llvm_unreachable("Unknown loc info!"); 1886 case CCValAssign::Full: break; 1887 case CCValAssign::BCvt: 1888 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1889 break; 1890 } 1891 1892 if (VA.needsCustom()) { 1893 if (VA.getLocVT() == MVT::v2f64) { 1894 // Extract the first half and return it in two registers. 1895 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1896 DAG.getConstant(0, MVT::i32)); 1897 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1898 DAG.getVTList(MVT::i32, MVT::i32), Half); 1899 1900 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1901 Flag = Chain.getValue(1); 1902 VA = RVLocs[++i]; // skip ahead to next loc 1903 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1904 HalfGPRs.getValue(1), Flag); 1905 Flag = Chain.getValue(1); 1906 VA = RVLocs[++i]; // skip ahead to next loc 1907 1908 // Extract the 2nd half and fall through to handle it as an f64 value. 1909 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1910 DAG.getConstant(1, MVT::i32)); 1911 } 1912 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1913 // available. 1914 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1915 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1916 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1917 Flag = Chain.getValue(1); 1918 VA = RVLocs[++i]; // skip ahead to next loc 1919 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1920 Flag); 1921 } else 1922 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1923 1924 // Guarantee that all emitted copies are 1925 // stuck together, avoiding something bad. 1926 Flag = Chain.getValue(1); 1927 } 1928 1929 SDValue result; 1930 if (Flag.getNode()) 1931 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1932 else // Return Void 1933 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1934 1935 return result; 1936} 1937 1938bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { 1939 if (N->getNumValues() != 1) 1940 return false; 1941 if (!N->hasNUsesOfValue(1, 0)) 1942 return false; 1943 1944 unsigned NumCopies = 0; 1945 SDNode* Copies[2] = { 0, 0 }; 1946 SDNode *Use = *N->use_begin(); 1947 if (Use->getOpcode() == ISD::CopyToReg) { 1948 Copies[NumCopies++] = Use; 1949 } else if (Use->getOpcode() == ARMISD::VMOVRRD) { 1950 // f64 returned in a pair of GPRs. 1951 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); 1952 UI != UE; ++UI) { 1953 if (UI->getOpcode() != ISD::CopyToReg) 1954 return false; 1955 Copies[UI.getUse().getResNo()] = *UI; 1956 ++NumCopies; 1957 } 1958 } else if (Use->getOpcode() == ISD::BITCAST) { 1959 // f32 returned in a single GPR. 1960 if (!Use->hasNUsesOfValue(1, 0)) 1961 return false; 1962 Use = *Use->use_begin(); 1963 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) 1964 return false; 1965 Copies[NumCopies++] = Use; 1966 } else { 1967 return false; 1968 } 1969 1970 if (NumCopies != 1 && NumCopies != 2) 1971 return false; 1972 1973 bool HasRet = false; 1974 for (unsigned i = 0; i < NumCopies; ++i) { 1975 SDNode *Copy = Copies[i]; 1976 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1977 UI != UE; ++UI) { 1978 if (UI->getOpcode() == ISD::CopyToReg) { 1979 SDNode *Use = *UI; 1980 if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) 1981 continue; 1982 return false; 1983 } 1984 if (UI->getOpcode() != ARMISD::RET_FLAG) 1985 return false; 1986 HasRet = true; 1987 } 1988 } 1989 1990 return HasRet; 1991} 1992 1993bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 1994 if (!EnableARMTailCalls) 1995 return false; 1996 1997 if (!CI->isTailCall()) 1998 return false; 1999 2000 return !Subtarget->isThumb1Only(); 2001} 2002 2003// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2004// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2005// one of the above mentioned nodes. It has to be wrapped because otherwise 2006// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2007// be used to form addressing mode. These wrapped nodes will be selected 2008// into MOVi. 2009static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2010 EVT PtrVT = Op.getValueType(); 2011 // FIXME there is no actual debug info here 2012 DebugLoc dl = Op.getDebugLoc(); 2013 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2014 SDValue Res; 2015 if (CP->isMachineConstantPoolEntry()) 2016 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2017 CP->getAlignment()); 2018 else 2019 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2020 CP->getAlignment()); 2021 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2022} 2023 2024unsigned ARMTargetLowering::getJumpTableEncoding() const { 2025 return MachineJumpTableInfo::EK_Inline; 2026} 2027 2028SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2029 SelectionDAG &DAG) const { 2030 MachineFunction &MF = DAG.getMachineFunction(); 2031 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2032 unsigned ARMPCLabelIndex = 0; 2033 DebugLoc DL = Op.getDebugLoc(); 2034 EVT PtrVT = getPointerTy(); 2035 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2036 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2037 SDValue CPAddr; 2038 if (RelocM == Reloc::Static) { 2039 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2040 } else { 2041 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2042 ARMPCLabelIndex = AFI->createPICLabelUId(); 2043 ARMConstantPoolValue *CPV = 2044 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2045 ARMCP::CPBlockAddress, PCAdj); 2046 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2047 } 2048 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2049 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2050 MachinePointerInfo::getConstantPool(), 2051 false, false, false, 0); 2052 if (RelocM == Reloc::Static) 2053 return Result; 2054 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2055 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2056} 2057 2058// Lower ISD::GlobalTLSAddress using the "general dynamic" model 2059SDValue 2060ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2061 SelectionDAG &DAG) const { 2062 DebugLoc dl = GA->getDebugLoc(); 2063 EVT PtrVT = getPointerTy(); 2064 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2065 MachineFunction &MF = DAG.getMachineFunction(); 2066 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2067 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2068 ARMConstantPoolValue *CPV = 2069 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2070 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2071 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2072 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2073 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2074 MachinePointerInfo::getConstantPool(), 2075 false, false, false, 0); 2076 SDValue Chain = Argument.getValue(1); 2077 2078 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2079 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2080 2081 // call __tls_get_addr. 2082 ArgListTy Args; 2083 ArgListEntry Entry; 2084 Entry.Node = Argument; 2085 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2086 Args.push_back(Entry); 2087 // FIXME: is there useful debug info available here? 2088 std::pair<SDValue, SDValue> CallResult = 2089 LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), 2090 false, false, false, false, 2091 0, CallingConv::C, /*isTailCall=*/false, 2092 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2093 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2094 return CallResult.first; 2095} 2096 2097// Lower ISD::GlobalTLSAddress using the "initial exec" or 2098// "local exec" model. 2099SDValue 2100ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2101 SelectionDAG &DAG) const { 2102 const GlobalValue *GV = GA->getGlobal(); 2103 DebugLoc dl = GA->getDebugLoc(); 2104 SDValue Offset; 2105 SDValue Chain = DAG.getEntryNode(); 2106 EVT PtrVT = getPointerTy(); 2107 // Get the Thread Pointer 2108 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2109 2110 if (GV->isDeclaration()) { 2111 MachineFunction &MF = DAG.getMachineFunction(); 2112 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2113 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2114 // Initial exec model. 2115 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2116 ARMConstantPoolValue *CPV = 2117 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2118 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2119 true); 2120 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2121 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2122 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2123 MachinePointerInfo::getConstantPool(), 2124 false, false, false, 0); 2125 Chain = Offset.getValue(1); 2126 2127 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2128 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2129 2130 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2131 MachinePointerInfo::getConstantPool(), 2132 false, false, false, 0); 2133 } else { 2134 // local exec model 2135 ARMConstantPoolValue *CPV = 2136 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2137 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2138 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2139 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2140 MachinePointerInfo::getConstantPool(), 2141 false, false, false, 0); 2142 } 2143 2144 // The address of the thread local variable is the add of the thread 2145 // pointer with the offset of the variable. 2146 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2147} 2148 2149SDValue 2150ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2151 // TODO: implement the "local dynamic" model 2152 assert(Subtarget->isTargetELF() && 2153 "TLS not implemented for non-ELF targets"); 2154 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2155 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 2156 // otherwise use the "Local Exec" TLS Model 2157 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2158 return LowerToTLSGeneralDynamicModel(GA, DAG); 2159 else 2160 return LowerToTLSExecModels(GA, DAG); 2161} 2162 2163SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2164 SelectionDAG &DAG) const { 2165 EVT PtrVT = getPointerTy(); 2166 DebugLoc dl = Op.getDebugLoc(); 2167 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2168 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2169 if (RelocM == Reloc::PIC_) { 2170 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2171 ARMConstantPoolValue *CPV = 2172 ARMConstantPoolConstant::Create(GV, 2173 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2174 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2175 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2176 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2177 CPAddr, 2178 MachinePointerInfo::getConstantPool(), 2179 false, false, false, 0); 2180 SDValue Chain = Result.getValue(1); 2181 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2182 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2183 if (!UseGOTOFF) 2184 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2185 MachinePointerInfo::getGOT(), 2186 false, false, false, 0); 2187 return Result; 2188 } 2189 2190 // If we have T2 ops, we can materialize the address directly via movt/movw 2191 // pair. This is always cheaper. 2192 if (Subtarget->useMovt()) { 2193 ++NumMovwMovt; 2194 // FIXME: Once remat is capable of dealing with instructions with register 2195 // operands, expand this into two nodes. 2196 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2197 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2198 } else { 2199 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2200 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2201 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2202 MachinePointerInfo::getConstantPool(), 2203 false, false, false, 0); 2204 } 2205} 2206 2207SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2208 SelectionDAG &DAG) const { 2209 EVT PtrVT = getPointerTy(); 2210 DebugLoc dl = Op.getDebugLoc(); 2211 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2212 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2213 MachineFunction &MF = DAG.getMachineFunction(); 2214 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2215 2216 // FIXME: Enable this for static codegen when tool issues are fixed. Also 2217 // update ARMFastISel::ARMMaterializeGV. 2218 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2219 ++NumMovwMovt; 2220 // FIXME: Once remat is capable of dealing with instructions with register 2221 // operands, expand this into two nodes. 2222 if (RelocM == Reloc::Static) 2223 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2224 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2225 2226 unsigned Wrapper = (RelocM == Reloc::PIC_) 2227 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2228 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2229 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2230 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2231 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2232 MachinePointerInfo::getGOT(), 2233 false, false, false, 0); 2234 return Result; 2235 } 2236 2237 unsigned ARMPCLabelIndex = 0; 2238 SDValue CPAddr; 2239 if (RelocM == Reloc::Static) { 2240 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2241 } else { 2242 ARMPCLabelIndex = AFI->createPICLabelUId(); 2243 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2244 ARMConstantPoolValue *CPV = 2245 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2246 PCAdj); 2247 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2248 } 2249 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2250 2251 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2252 MachinePointerInfo::getConstantPool(), 2253 false, false, false, 0); 2254 SDValue Chain = Result.getValue(1); 2255 2256 if (RelocM == Reloc::PIC_) { 2257 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2258 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2259 } 2260 2261 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2262 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2263 false, false, false, 0); 2264 2265 return Result; 2266} 2267 2268SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2269 SelectionDAG &DAG) const { 2270 assert(Subtarget->isTargetELF() && 2271 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2272 MachineFunction &MF = DAG.getMachineFunction(); 2273 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2274 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2275 EVT PtrVT = getPointerTy(); 2276 DebugLoc dl = Op.getDebugLoc(); 2277 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2278 ARMConstantPoolValue *CPV = 2279 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2280 ARMPCLabelIndex, PCAdj); 2281 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2282 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2283 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2284 MachinePointerInfo::getConstantPool(), 2285 false, false, false, 0); 2286 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2287 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2288} 2289 2290SDValue 2291ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2292 DebugLoc dl = Op.getDebugLoc(); 2293 SDValue Val = DAG.getConstant(0, MVT::i32); 2294 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2295 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2296 Op.getOperand(1), Val); 2297} 2298 2299SDValue 2300ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2301 DebugLoc dl = Op.getDebugLoc(); 2302 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2303 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2304} 2305 2306SDValue 2307ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2308 const ARMSubtarget *Subtarget) const { 2309 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2310 DebugLoc dl = Op.getDebugLoc(); 2311 switch (IntNo) { 2312 default: return SDValue(); // Don't custom lower most intrinsics. 2313 case Intrinsic::arm_thread_pointer: { 2314 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2315 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2316 } 2317 case Intrinsic::eh_sjlj_lsda: { 2318 MachineFunction &MF = DAG.getMachineFunction(); 2319 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2320 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2321 EVT PtrVT = getPointerTy(); 2322 DebugLoc dl = Op.getDebugLoc(); 2323 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2324 SDValue CPAddr; 2325 unsigned PCAdj = (RelocM != Reloc::PIC_) 2326 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2327 ARMConstantPoolValue *CPV = 2328 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2329 ARMCP::CPLSDA, PCAdj); 2330 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2331 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2332 SDValue Result = 2333 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2334 MachinePointerInfo::getConstantPool(), 2335 false, false, false, 0); 2336 2337 if (RelocM == Reloc::PIC_) { 2338 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2339 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2340 } 2341 return Result; 2342 } 2343 case Intrinsic::arm_neon_vmulls: 2344 case Intrinsic::arm_neon_vmullu: { 2345 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2346 ? ARMISD::VMULLs : ARMISD::VMULLu; 2347 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2348 Op.getOperand(1), Op.getOperand(2)); 2349 } 2350 } 2351} 2352 2353static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2354 const ARMSubtarget *Subtarget) { 2355 DebugLoc dl = Op.getDebugLoc(); 2356 if (!Subtarget->hasDataBarrier()) { 2357 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2358 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2359 // here. 2360 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2361 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2362 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2363 DAG.getConstant(0, MVT::i32)); 2364 } 2365 2366 SDValue Op5 = Op.getOperand(5); 2367 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2368 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2369 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2370 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2371 2372 ARM_MB::MemBOpt DMBOpt; 2373 if (isDeviceBarrier) 2374 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2375 else 2376 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2377 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2378 DAG.getConstant(DMBOpt, MVT::i32)); 2379} 2380 2381 2382static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2383 const ARMSubtarget *Subtarget) { 2384 // FIXME: handle "fence singlethread" more efficiently. 2385 DebugLoc dl = Op.getDebugLoc(); 2386 if (!Subtarget->hasDataBarrier()) { 2387 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2388 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2389 // here. 2390 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2391 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2392 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2393 DAG.getConstant(0, MVT::i32)); 2394 } 2395 2396 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2397 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 2398} 2399 2400static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2401 const ARMSubtarget *Subtarget) { 2402 // ARM pre v5TE and Thumb1 does not have preload instructions. 2403 if (!(Subtarget->isThumb2() || 2404 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2405 // Just preserve the chain. 2406 return Op.getOperand(0); 2407 2408 DebugLoc dl = Op.getDebugLoc(); 2409 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2410 if (!isRead && 2411 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2412 // ARMv7 with MP extension has PLDW. 2413 return Op.getOperand(0); 2414 2415 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2416 if (Subtarget->isThumb()) { 2417 // Invert the bits. 2418 isRead = ~isRead & 1; 2419 isData = ~isData & 1; 2420 } 2421 2422 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2423 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2424 DAG.getConstant(isData, MVT::i32)); 2425} 2426 2427static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2428 MachineFunction &MF = DAG.getMachineFunction(); 2429 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2430 2431 // vastart just stores the address of the VarArgsFrameIndex slot into the 2432 // memory location argument. 2433 DebugLoc dl = Op.getDebugLoc(); 2434 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2435 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2436 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2437 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2438 MachinePointerInfo(SV), false, false, 0); 2439} 2440 2441SDValue 2442ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2443 SDValue &Root, SelectionDAG &DAG, 2444 DebugLoc dl) const { 2445 MachineFunction &MF = DAG.getMachineFunction(); 2446 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2447 2448 const TargetRegisterClass *RC; 2449 if (AFI->isThumb1OnlyFunction()) 2450 RC = ARM::tGPRRegisterClass; 2451 else 2452 RC = ARM::GPRRegisterClass; 2453 2454 // Transform the arguments stored in physical registers into virtual ones. 2455 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2456 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2457 2458 SDValue ArgValue2; 2459 if (NextVA.isMemLoc()) { 2460 MachineFrameInfo *MFI = MF.getFrameInfo(); 2461 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2462 2463 // Create load node to retrieve arguments from the stack. 2464 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2465 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2466 MachinePointerInfo::getFixedStack(FI), 2467 false, false, false, 0); 2468 } else { 2469 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2470 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2471 } 2472 2473 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2474} 2475 2476void 2477ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2478 unsigned &VARegSize, unsigned &VARegSaveSize) 2479 const { 2480 unsigned NumGPRs; 2481 if (CCInfo.isFirstByValRegValid()) 2482 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2483 else { 2484 unsigned int firstUnalloced; 2485 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2486 sizeof(GPRArgRegs) / 2487 sizeof(GPRArgRegs[0])); 2488 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2489 } 2490 2491 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2492 VARegSize = NumGPRs * 4; 2493 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2494} 2495 2496// The remaining GPRs hold either the beginning of variable-argument 2497// data, or the beginning of an aggregate passed by value (usuall 2498// byval). Either way, we allocate stack slots adjacent to the data 2499// provided by our caller, and store the unallocated registers there. 2500// If this is a variadic function, the va_list pointer will begin with 2501// these values; otherwise, this reassembles a (byval) structure that 2502// was split between registers and memory. 2503void 2504ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2505 DebugLoc dl, SDValue &Chain, 2506 unsigned ArgOffset) const { 2507 MachineFunction &MF = DAG.getMachineFunction(); 2508 MachineFrameInfo *MFI = MF.getFrameInfo(); 2509 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2510 unsigned firstRegToSaveIndex; 2511 if (CCInfo.isFirstByValRegValid()) 2512 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2513 else { 2514 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2515 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2516 } 2517 2518 unsigned VARegSize, VARegSaveSize; 2519 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2520 if (VARegSaveSize) { 2521 // If this function is vararg, store any remaining integer argument regs 2522 // to their spots on the stack so that they may be loaded by deferencing 2523 // the result of va_next. 2524 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2525 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2526 ArgOffset + VARegSaveSize 2527 - VARegSize, 2528 false)); 2529 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2530 getPointerTy()); 2531 2532 SmallVector<SDValue, 4> MemOps; 2533 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { 2534 const TargetRegisterClass *RC; 2535 if (AFI->isThumb1OnlyFunction()) 2536 RC = ARM::tGPRRegisterClass; 2537 else 2538 RC = ARM::GPRRegisterClass; 2539 2540 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2541 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2542 SDValue Store = 2543 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2544 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2545 false, false, 0); 2546 MemOps.push_back(Store); 2547 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2548 DAG.getConstant(4, getPointerTy())); 2549 } 2550 if (!MemOps.empty()) 2551 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2552 &MemOps[0], MemOps.size()); 2553 } else 2554 // This will point to the next argument passed via stack. 2555 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2556} 2557 2558SDValue 2559ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2560 CallingConv::ID CallConv, bool isVarArg, 2561 const SmallVectorImpl<ISD::InputArg> 2562 &Ins, 2563 DebugLoc dl, SelectionDAG &DAG, 2564 SmallVectorImpl<SDValue> &InVals) 2565 const { 2566 MachineFunction &MF = DAG.getMachineFunction(); 2567 MachineFrameInfo *MFI = MF.getFrameInfo(); 2568 2569 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2570 2571 // Assign locations to all of the incoming arguments. 2572 SmallVector<CCValAssign, 16> ArgLocs; 2573 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2574 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2575 CCInfo.AnalyzeFormalArguments(Ins, 2576 CCAssignFnForNode(CallConv, /* Return*/ false, 2577 isVarArg)); 2578 2579 SmallVector<SDValue, 16> ArgValues; 2580 int lastInsIndex = -1; 2581 2582 SDValue ArgValue; 2583 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2584 CCValAssign &VA = ArgLocs[i]; 2585 2586 // Arguments stored in registers. 2587 if (VA.isRegLoc()) { 2588 EVT RegVT = VA.getLocVT(); 2589 2590 if (VA.needsCustom()) { 2591 // f64 and vector types are split up into multiple registers or 2592 // combinations of registers and stack slots. 2593 if (VA.getLocVT() == MVT::v2f64) { 2594 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2595 Chain, DAG, dl); 2596 VA = ArgLocs[++i]; // skip ahead to next loc 2597 SDValue ArgValue2; 2598 if (VA.isMemLoc()) { 2599 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2600 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2601 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2602 MachinePointerInfo::getFixedStack(FI), 2603 false, false, false, 0); 2604 } else { 2605 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2606 Chain, DAG, dl); 2607 } 2608 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2609 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2610 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2611 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2612 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2613 } else 2614 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2615 2616 } else { 2617 const TargetRegisterClass *RC; 2618 2619 if (RegVT == MVT::f32) 2620 RC = ARM::SPRRegisterClass; 2621 else if (RegVT == MVT::f64) 2622 RC = ARM::DPRRegisterClass; 2623 else if (RegVT == MVT::v2f64) 2624 RC = ARM::QPRRegisterClass; 2625 else if (RegVT == MVT::i32) 2626 RC = (AFI->isThumb1OnlyFunction() ? 2627 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2628 else 2629 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2630 2631 // Transform the arguments in physical registers into virtual ones. 2632 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2633 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2634 } 2635 2636 // If this is an 8 or 16-bit value, it is really passed promoted 2637 // to 32 bits. Insert an assert[sz]ext to capture this, then 2638 // truncate to the right size. 2639 switch (VA.getLocInfo()) { 2640 default: llvm_unreachable("Unknown loc info!"); 2641 case CCValAssign::Full: break; 2642 case CCValAssign::BCvt: 2643 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2644 break; 2645 case CCValAssign::SExt: 2646 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2647 DAG.getValueType(VA.getValVT())); 2648 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2649 break; 2650 case CCValAssign::ZExt: 2651 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2652 DAG.getValueType(VA.getValVT())); 2653 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2654 break; 2655 } 2656 2657 InVals.push_back(ArgValue); 2658 2659 } else { // VA.isRegLoc() 2660 2661 // sanity check 2662 assert(VA.isMemLoc()); 2663 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2664 2665 int index = ArgLocs[i].getValNo(); 2666 2667 // Some Ins[] entries become multiple ArgLoc[] entries. 2668 // Process them only once. 2669 if (index != lastInsIndex) 2670 { 2671 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2672 // FIXME: For now, all byval parameter objects are marked mutable. 2673 // This can be changed with more analysis. 2674 // In case of tail call optimization mark all arguments mutable. 2675 // Since they could be overwritten by lowering of arguments in case of 2676 // a tail call. 2677 if (Flags.isByVal()) { 2678 unsigned VARegSize, VARegSaveSize; 2679 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2680 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); 2681 unsigned Bytes = Flags.getByValSize() - VARegSize; 2682 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2683 int FI = MFI->CreateFixedObject(Bytes, 2684 VA.getLocMemOffset(), false); 2685 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2686 } else { 2687 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2688 VA.getLocMemOffset(), true); 2689 2690 // Create load nodes to retrieve arguments from the stack. 2691 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2692 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2693 MachinePointerInfo::getFixedStack(FI), 2694 false, false, false, 0)); 2695 } 2696 lastInsIndex = index; 2697 } 2698 } 2699 } 2700 2701 // varargs 2702 if (isVarArg) 2703 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); 2704 2705 return Chain; 2706} 2707 2708/// isFloatingPointZero - Return true if this is +0.0. 2709static bool isFloatingPointZero(SDValue Op) { 2710 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2711 return CFP->getValueAPF().isPosZero(); 2712 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2713 // Maybe this has already been legalized into the constant pool? 2714 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2715 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2716 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2717 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2718 return CFP->getValueAPF().isPosZero(); 2719 } 2720 } 2721 return false; 2722} 2723 2724/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2725/// the given operands. 2726SDValue 2727ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2728 SDValue &ARMcc, SelectionDAG &DAG, 2729 DebugLoc dl) const { 2730 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2731 unsigned C = RHSC->getZExtValue(); 2732 if (!isLegalICmpImmediate(C)) { 2733 // Constant does not fit, try adjusting it by one? 2734 switch (CC) { 2735 default: break; 2736 case ISD::SETLT: 2737 case ISD::SETGE: 2738 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2739 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2740 RHS = DAG.getConstant(C-1, MVT::i32); 2741 } 2742 break; 2743 case ISD::SETULT: 2744 case ISD::SETUGE: 2745 if (C != 0 && isLegalICmpImmediate(C-1)) { 2746 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2747 RHS = DAG.getConstant(C-1, MVT::i32); 2748 } 2749 break; 2750 case ISD::SETLE: 2751 case ISD::SETGT: 2752 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2753 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2754 RHS = DAG.getConstant(C+1, MVT::i32); 2755 } 2756 break; 2757 case ISD::SETULE: 2758 case ISD::SETUGT: 2759 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2760 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2761 RHS = DAG.getConstant(C+1, MVT::i32); 2762 } 2763 break; 2764 } 2765 } 2766 } 2767 2768 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2769 ARMISD::NodeType CompareType; 2770 switch (CondCode) { 2771 default: 2772 CompareType = ARMISD::CMP; 2773 break; 2774 case ARMCC::EQ: 2775 case ARMCC::NE: 2776 // Uses only Z Flag 2777 CompareType = ARMISD::CMPZ; 2778 break; 2779 } 2780 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2781 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2782} 2783 2784/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2785SDValue 2786ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2787 DebugLoc dl) const { 2788 SDValue Cmp; 2789 if (!isFloatingPointZero(RHS)) 2790 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2791 else 2792 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2793 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2794} 2795 2796/// duplicateCmp - Glue values can have only one use, so this function 2797/// duplicates a comparison node. 2798SDValue 2799ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2800 unsigned Opc = Cmp.getOpcode(); 2801 DebugLoc DL = Cmp.getDebugLoc(); 2802 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2803 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2804 2805 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2806 Cmp = Cmp.getOperand(0); 2807 Opc = Cmp.getOpcode(); 2808 if (Opc == ARMISD::CMPFP) 2809 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2810 else { 2811 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2812 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2813 } 2814 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2815} 2816 2817SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2818 SDValue Cond = Op.getOperand(0); 2819 SDValue SelectTrue = Op.getOperand(1); 2820 SDValue SelectFalse = Op.getOperand(2); 2821 DebugLoc dl = Op.getDebugLoc(); 2822 2823 // Convert: 2824 // 2825 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2826 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2827 // 2828 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2829 const ConstantSDNode *CMOVTrue = 2830 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2831 const ConstantSDNode *CMOVFalse = 2832 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2833 2834 if (CMOVTrue && CMOVFalse) { 2835 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2836 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2837 2838 SDValue True; 2839 SDValue False; 2840 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2841 True = SelectTrue; 2842 False = SelectFalse; 2843 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2844 True = SelectFalse; 2845 False = SelectTrue; 2846 } 2847 2848 if (True.getNode() && False.getNode()) { 2849 EVT VT = Op.getValueType(); 2850 SDValue ARMcc = Cond.getOperand(2); 2851 SDValue CCR = Cond.getOperand(3); 2852 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2853 assert(True.getValueType() == VT); 2854 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2855 } 2856 } 2857 } 2858 2859 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 2860 // undefined bits before doing a full-word comparison with zero. 2861 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 2862 DAG.getConstant(1, Cond.getValueType())); 2863 2864 return DAG.getSelectCC(dl, Cond, 2865 DAG.getConstant(0, Cond.getValueType()), 2866 SelectTrue, SelectFalse, ISD::SETNE); 2867} 2868 2869SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2870 EVT VT = Op.getValueType(); 2871 SDValue LHS = Op.getOperand(0); 2872 SDValue RHS = Op.getOperand(1); 2873 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2874 SDValue TrueVal = Op.getOperand(2); 2875 SDValue FalseVal = Op.getOperand(3); 2876 DebugLoc dl = Op.getDebugLoc(); 2877 2878 if (LHS.getValueType() == MVT::i32) { 2879 SDValue ARMcc; 2880 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2881 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2882 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2883 } 2884 2885 ARMCC::CondCodes CondCode, CondCode2; 2886 FPCCToARMCC(CC, CondCode, CondCode2); 2887 2888 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2889 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2890 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2891 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2892 ARMcc, CCR, Cmp); 2893 if (CondCode2 != ARMCC::AL) { 2894 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2895 // FIXME: Needs another CMP because flag can have but one use. 2896 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2897 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2898 Result, TrueVal, ARMcc2, CCR, Cmp2); 2899 } 2900 return Result; 2901} 2902 2903/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2904/// to morph to an integer compare sequence. 2905static bool canChangeToInt(SDValue Op, bool &SeenZero, 2906 const ARMSubtarget *Subtarget) { 2907 SDNode *N = Op.getNode(); 2908 if (!N->hasOneUse()) 2909 // Otherwise it requires moving the value from fp to integer registers. 2910 return false; 2911 if (!N->getNumValues()) 2912 return false; 2913 EVT VT = Op.getValueType(); 2914 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2915 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2916 // vmrs are very slow, e.g. cortex-a8. 2917 return false; 2918 2919 if (isFloatingPointZero(Op)) { 2920 SeenZero = true; 2921 return true; 2922 } 2923 return ISD::isNormalLoad(N); 2924} 2925 2926static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2927 if (isFloatingPointZero(Op)) 2928 return DAG.getConstant(0, MVT::i32); 2929 2930 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2931 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2932 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2933 Ld->isVolatile(), Ld->isNonTemporal(), 2934 Ld->isInvariant(), Ld->getAlignment()); 2935 2936 llvm_unreachable("Unknown VFP cmp argument!"); 2937} 2938 2939static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2940 SDValue &RetVal1, SDValue &RetVal2) { 2941 if (isFloatingPointZero(Op)) { 2942 RetVal1 = DAG.getConstant(0, MVT::i32); 2943 RetVal2 = DAG.getConstant(0, MVT::i32); 2944 return; 2945 } 2946 2947 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2948 SDValue Ptr = Ld->getBasePtr(); 2949 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2950 Ld->getChain(), Ptr, 2951 Ld->getPointerInfo(), 2952 Ld->isVolatile(), Ld->isNonTemporal(), 2953 Ld->isInvariant(), Ld->getAlignment()); 2954 2955 EVT PtrType = Ptr.getValueType(); 2956 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2957 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2958 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2959 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2960 Ld->getChain(), NewPtr, 2961 Ld->getPointerInfo().getWithOffset(4), 2962 Ld->isVolatile(), Ld->isNonTemporal(), 2963 Ld->isInvariant(), NewAlign); 2964 return; 2965 } 2966 2967 llvm_unreachable("Unknown VFP cmp argument!"); 2968} 2969 2970/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2971/// f32 and even f64 comparisons to integer ones. 2972SDValue 2973ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2974 SDValue Chain = Op.getOperand(0); 2975 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2976 SDValue LHS = Op.getOperand(2); 2977 SDValue RHS = Op.getOperand(3); 2978 SDValue Dest = Op.getOperand(4); 2979 DebugLoc dl = Op.getDebugLoc(); 2980 2981 bool LHSSeenZero = false; 2982 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 2983 bool RHSSeenZero = false; 2984 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 2985 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 2986 // If unsafe fp math optimization is enabled and there are no other uses of 2987 // the CMP operands, and the condition code is EQ or NE, we can optimize it 2988 // to an integer comparison. 2989 if (CC == ISD::SETOEQ) 2990 CC = ISD::SETEQ; 2991 else if (CC == ISD::SETUNE) 2992 CC = ISD::SETNE; 2993 2994 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 2995 SDValue ARMcc; 2996 if (LHS.getValueType() == MVT::f32) { 2997 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 2998 bitcastf32Toi32(LHS, DAG), Mask); 2999 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3000 bitcastf32Toi32(RHS, DAG), Mask); 3001 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3002 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3003 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3004 Chain, Dest, ARMcc, CCR, Cmp); 3005 } 3006 3007 SDValue LHS1, LHS2; 3008 SDValue RHS1, RHS2; 3009 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3010 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3011 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3012 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3013 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3014 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3015 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3016 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3017 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3018 } 3019 3020 return SDValue(); 3021} 3022 3023SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3024 SDValue Chain = Op.getOperand(0); 3025 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3026 SDValue LHS = Op.getOperand(2); 3027 SDValue RHS = Op.getOperand(3); 3028 SDValue Dest = Op.getOperand(4); 3029 DebugLoc dl = Op.getDebugLoc(); 3030 3031 if (LHS.getValueType() == MVT::i32) { 3032 SDValue ARMcc; 3033 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3034 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3035 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3036 Chain, Dest, ARMcc, CCR, Cmp); 3037 } 3038 3039 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3040 3041 if (getTargetMachine().Options.UnsafeFPMath && 3042 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3043 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3044 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3045 if (Result.getNode()) 3046 return Result; 3047 } 3048 3049 ARMCC::CondCodes CondCode, CondCode2; 3050 FPCCToARMCC(CC, CondCode, CondCode2); 3051 3052 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3053 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3054 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3055 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3056 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3057 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3058 if (CondCode2 != ARMCC::AL) { 3059 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3060 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3061 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3062 } 3063 return Res; 3064} 3065 3066SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3067 SDValue Chain = Op.getOperand(0); 3068 SDValue Table = Op.getOperand(1); 3069 SDValue Index = Op.getOperand(2); 3070 DebugLoc dl = Op.getDebugLoc(); 3071 3072 EVT PTy = getPointerTy(); 3073 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3074 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3075 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3076 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3077 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3078 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3079 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3080 if (Subtarget->isThumb2()) { 3081 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3082 // which does another jump to the destination. This also makes it easier 3083 // to translate it to TBB / TBH later. 3084 // FIXME: This might not work if the function is extremely large. 3085 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3086 Addr, Op.getOperand(2), JTI, UId); 3087 } 3088 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3089 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3090 MachinePointerInfo::getJumpTable(), 3091 false, false, false, 0); 3092 Chain = Addr.getValue(1); 3093 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3094 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3095 } else { 3096 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3097 MachinePointerInfo::getJumpTable(), 3098 false, false, false, 0); 3099 Chain = Addr.getValue(1); 3100 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3101 } 3102} 3103 3104static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3105 EVT VT = Op.getValueType(); 3106 DebugLoc dl = Op.getDebugLoc(); 3107 3108 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3109 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3110 return Op; 3111 return DAG.UnrollVectorOp(Op.getNode()); 3112 } 3113 3114 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3115 "Invalid type for custom lowering!"); 3116 if (VT != MVT::v4i16) 3117 return DAG.UnrollVectorOp(Op.getNode()); 3118 3119 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3120 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3121} 3122 3123static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3124 EVT VT = Op.getValueType(); 3125 if (VT.isVector()) 3126 return LowerVectorFP_TO_INT(Op, DAG); 3127 3128 DebugLoc dl = Op.getDebugLoc(); 3129 unsigned Opc; 3130 3131 switch (Op.getOpcode()) { 3132 default: llvm_unreachable("Invalid opcode!"); 3133 case ISD::FP_TO_SINT: 3134 Opc = ARMISD::FTOSI; 3135 break; 3136 case ISD::FP_TO_UINT: 3137 Opc = ARMISD::FTOUI; 3138 break; 3139 } 3140 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3141 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3142} 3143 3144static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3145 EVT VT = Op.getValueType(); 3146 DebugLoc dl = Op.getDebugLoc(); 3147 3148 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3149 if (VT.getVectorElementType() == MVT::f32) 3150 return Op; 3151 return DAG.UnrollVectorOp(Op.getNode()); 3152 } 3153 3154 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3155 "Invalid type for custom lowering!"); 3156 if (VT != MVT::v4f32) 3157 return DAG.UnrollVectorOp(Op.getNode()); 3158 3159 unsigned CastOpc; 3160 unsigned Opc; 3161 switch (Op.getOpcode()) { 3162 default: llvm_unreachable("Invalid opcode!"); 3163 case ISD::SINT_TO_FP: 3164 CastOpc = ISD::SIGN_EXTEND; 3165 Opc = ISD::SINT_TO_FP; 3166 break; 3167 case ISD::UINT_TO_FP: 3168 CastOpc = ISD::ZERO_EXTEND; 3169 Opc = ISD::UINT_TO_FP; 3170 break; 3171 } 3172 3173 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3174 return DAG.getNode(Opc, dl, VT, Op); 3175} 3176 3177static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3178 EVT VT = Op.getValueType(); 3179 if (VT.isVector()) 3180 return LowerVectorINT_TO_FP(Op, DAG); 3181 3182 DebugLoc dl = Op.getDebugLoc(); 3183 unsigned Opc; 3184 3185 switch (Op.getOpcode()) { 3186 default: llvm_unreachable("Invalid opcode!"); 3187 case ISD::SINT_TO_FP: 3188 Opc = ARMISD::SITOF; 3189 break; 3190 case ISD::UINT_TO_FP: 3191 Opc = ARMISD::UITOF; 3192 break; 3193 } 3194 3195 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3196 return DAG.getNode(Opc, dl, VT, Op); 3197} 3198 3199SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3200 // Implement fcopysign with a fabs and a conditional fneg. 3201 SDValue Tmp0 = Op.getOperand(0); 3202 SDValue Tmp1 = Op.getOperand(1); 3203 DebugLoc dl = Op.getDebugLoc(); 3204 EVT VT = Op.getValueType(); 3205 EVT SrcVT = Tmp1.getValueType(); 3206 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3207 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3208 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3209 3210 if (UseNEON) { 3211 // Use VBSL to copy the sign bit. 3212 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3213 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3214 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3215 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3216 if (VT == MVT::f64) 3217 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3218 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3219 DAG.getConstant(32, MVT::i32)); 3220 else /*if (VT == MVT::f32)*/ 3221 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3222 if (SrcVT == MVT::f32) { 3223 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3224 if (VT == MVT::f64) 3225 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3226 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3227 DAG.getConstant(32, MVT::i32)); 3228 } else if (VT == MVT::f32) 3229 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3230 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3231 DAG.getConstant(32, MVT::i32)); 3232 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3233 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3234 3235 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3236 MVT::i32); 3237 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3238 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3239 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3240 3241 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3242 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3243 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3244 if (VT == MVT::f32) { 3245 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3246 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3247 DAG.getConstant(0, MVT::i32)); 3248 } else { 3249 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3250 } 3251 3252 return Res; 3253 } 3254 3255 // Bitcast operand 1 to i32. 3256 if (SrcVT == MVT::f64) 3257 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3258 &Tmp1, 1).getValue(1); 3259 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3260 3261 // Or in the signbit with integer operations. 3262 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3263 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3264 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3265 if (VT == MVT::f32) { 3266 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3267 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3268 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3269 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3270 } 3271 3272 // f64: Or the high part with signbit and then combine two parts. 3273 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3274 &Tmp0, 1); 3275 SDValue Lo = Tmp0.getValue(0); 3276 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3277 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3278 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3279} 3280 3281SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3282 MachineFunction &MF = DAG.getMachineFunction(); 3283 MachineFrameInfo *MFI = MF.getFrameInfo(); 3284 MFI->setReturnAddressIsTaken(true); 3285 3286 EVT VT = Op.getValueType(); 3287 DebugLoc dl = Op.getDebugLoc(); 3288 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3289 if (Depth) { 3290 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3291 SDValue Offset = DAG.getConstant(4, MVT::i32); 3292 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3293 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3294 MachinePointerInfo(), false, false, false, 0); 3295 } 3296 3297 // Return LR, which contains the return address. Mark it an implicit live-in. 3298 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3299 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3300} 3301 3302SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3303 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3304 MFI->setFrameAddressIsTaken(true); 3305 3306 EVT VT = Op.getValueType(); 3307 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3308 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3309 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3310 ? ARM::R7 : ARM::R11; 3311 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3312 while (Depth--) 3313 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3314 MachinePointerInfo(), 3315 false, false, false, 0); 3316 return FrameAddr; 3317} 3318 3319/// ExpandBITCAST - If the target supports VFP, this function is called to 3320/// expand a bit convert where either the source or destination type is i64 to 3321/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3322/// operand type is illegal (e.g., v2f32 for a target that doesn't support 3323/// vectors), since the legalizer won't know what to do with that. 3324static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3325 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3326 DebugLoc dl = N->getDebugLoc(); 3327 SDValue Op = N->getOperand(0); 3328 3329 // This function is only supposed to be called for i64 types, either as the 3330 // source or destination of the bit convert. 3331 EVT SrcVT = Op.getValueType(); 3332 EVT DstVT = N->getValueType(0); 3333 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3334 "ExpandBITCAST called for non-i64 type"); 3335 3336 // Turn i64->f64 into VMOVDRR. 3337 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3338 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3339 DAG.getConstant(0, MVT::i32)); 3340 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3341 DAG.getConstant(1, MVT::i32)); 3342 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3343 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3344 } 3345 3346 // Turn f64->i64 into VMOVRRD. 3347 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3348 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3349 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3350 // Merge the pieces into a single i64 value. 3351 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3352 } 3353 3354 return SDValue(); 3355} 3356 3357/// getZeroVector - Returns a vector of specified type with all zero elements. 3358/// Zero vectors are used to represent vector negation and in those cases 3359/// will be implemented with the NEON VNEG instruction. However, VNEG does 3360/// not support i64 elements, so sometimes the zero vectors will need to be 3361/// explicitly constructed. Regardless, use a canonical VMOV to create the 3362/// zero vector. 3363static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3364 assert(VT.isVector() && "Expected a vector type"); 3365 // The canonical modified immediate encoding of a zero vector is....0! 3366 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3367 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3368 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3369 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3370} 3371 3372/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3373/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3374SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3375 SelectionDAG &DAG) const { 3376 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3377 EVT VT = Op.getValueType(); 3378 unsigned VTBits = VT.getSizeInBits(); 3379 DebugLoc dl = Op.getDebugLoc(); 3380 SDValue ShOpLo = Op.getOperand(0); 3381 SDValue ShOpHi = Op.getOperand(1); 3382 SDValue ShAmt = Op.getOperand(2); 3383 SDValue ARMcc; 3384 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3385 3386 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3387 3388 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3389 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3390 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3391 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3392 DAG.getConstant(VTBits, MVT::i32)); 3393 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3394 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3395 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3396 3397 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3398 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3399 ARMcc, DAG, dl); 3400 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3401 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3402 CCR, Cmp); 3403 3404 SDValue Ops[2] = { Lo, Hi }; 3405 return DAG.getMergeValues(Ops, 2, dl); 3406} 3407 3408/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3409/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3410SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3411 SelectionDAG &DAG) const { 3412 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3413 EVT VT = Op.getValueType(); 3414 unsigned VTBits = VT.getSizeInBits(); 3415 DebugLoc dl = Op.getDebugLoc(); 3416 SDValue ShOpLo = Op.getOperand(0); 3417 SDValue ShOpHi = Op.getOperand(1); 3418 SDValue ShAmt = Op.getOperand(2); 3419 SDValue ARMcc; 3420 3421 assert(Op.getOpcode() == ISD::SHL_PARTS); 3422 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3423 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3424 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3425 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3426 DAG.getConstant(VTBits, MVT::i32)); 3427 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3428 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3429 3430 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3431 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3432 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3433 ARMcc, DAG, dl); 3434 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3435 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3436 CCR, Cmp); 3437 3438 SDValue Ops[2] = { Lo, Hi }; 3439 return DAG.getMergeValues(Ops, 2, dl); 3440} 3441 3442SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3443 SelectionDAG &DAG) const { 3444 // The rounding mode is in bits 23:22 of the FPSCR. 3445 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3446 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3447 // so that the shift + and get folded into a bitfield extract. 3448 DebugLoc dl = Op.getDebugLoc(); 3449 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3450 DAG.getConstant(Intrinsic::arm_get_fpscr, 3451 MVT::i32)); 3452 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3453 DAG.getConstant(1U << 22, MVT::i32)); 3454 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3455 DAG.getConstant(22, MVT::i32)); 3456 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3457 DAG.getConstant(3, MVT::i32)); 3458} 3459 3460static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3461 const ARMSubtarget *ST) { 3462 EVT VT = N->getValueType(0); 3463 DebugLoc dl = N->getDebugLoc(); 3464 3465 if (!ST->hasV6T2Ops()) 3466 return SDValue(); 3467 3468 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3469 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3470} 3471 3472static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3473 const ARMSubtarget *ST) { 3474 EVT VT = N->getValueType(0); 3475 DebugLoc dl = N->getDebugLoc(); 3476 3477 if (!VT.isVector()) 3478 return SDValue(); 3479 3480 // Lower vector shifts on NEON to use VSHL. 3481 assert(ST->hasNEON() && "unexpected vector shift"); 3482 3483 // Left shifts translate directly to the vshiftu intrinsic. 3484 if (N->getOpcode() == ISD::SHL) 3485 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3486 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3487 N->getOperand(0), N->getOperand(1)); 3488 3489 assert((N->getOpcode() == ISD::SRA || 3490 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3491 3492 // NEON uses the same intrinsics for both left and right shifts. For 3493 // right shifts, the shift amounts are negative, so negate the vector of 3494 // shift amounts. 3495 EVT ShiftVT = N->getOperand(1).getValueType(); 3496 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3497 getZeroVector(ShiftVT, DAG, dl), 3498 N->getOperand(1)); 3499 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3500 Intrinsic::arm_neon_vshifts : 3501 Intrinsic::arm_neon_vshiftu); 3502 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3503 DAG.getConstant(vshiftInt, MVT::i32), 3504 N->getOperand(0), NegatedCount); 3505} 3506 3507static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3508 const ARMSubtarget *ST) { 3509 EVT VT = N->getValueType(0); 3510 DebugLoc dl = N->getDebugLoc(); 3511 3512 // We can get here for a node like i32 = ISD::SHL i32, i64 3513 if (VT != MVT::i64) 3514 return SDValue(); 3515 3516 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3517 "Unknown shift to lower!"); 3518 3519 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3520 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3521 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3522 return SDValue(); 3523 3524 // If we are in thumb mode, we don't have RRX. 3525 if (ST->isThumb1Only()) return SDValue(); 3526 3527 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3528 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3529 DAG.getConstant(0, MVT::i32)); 3530 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3531 DAG.getConstant(1, MVT::i32)); 3532 3533 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3534 // captures the result into a carry flag. 3535 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3536 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3537 3538 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3539 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3540 3541 // Merge the pieces into a single i64 value. 3542 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3543} 3544 3545static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3546 SDValue TmpOp0, TmpOp1; 3547 bool Invert = false; 3548 bool Swap = false; 3549 unsigned Opc = 0; 3550 3551 SDValue Op0 = Op.getOperand(0); 3552 SDValue Op1 = Op.getOperand(1); 3553 SDValue CC = Op.getOperand(2); 3554 EVT VT = Op.getValueType(); 3555 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3556 DebugLoc dl = Op.getDebugLoc(); 3557 3558 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3559 switch (SetCCOpcode) { 3560 default: llvm_unreachable("Illegal FP comparison"); 3561 case ISD::SETUNE: 3562 case ISD::SETNE: Invert = true; // Fallthrough 3563 case ISD::SETOEQ: 3564 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3565 case ISD::SETOLT: 3566 case ISD::SETLT: Swap = true; // Fallthrough 3567 case ISD::SETOGT: 3568 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3569 case ISD::SETOLE: 3570 case ISD::SETLE: Swap = true; // Fallthrough 3571 case ISD::SETOGE: 3572 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3573 case ISD::SETUGE: Swap = true; // Fallthrough 3574 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3575 case ISD::SETUGT: Swap = true; // Fallthrough 3576 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3577 case ISD::SETUEQ: Invert = true; // Fallthrough 3578 case ISD::SETONE: 3579 // Expand this to (OLT | OGT). 3580 TmpOp0 = Op0; 3581 TmpOp1 = Op1; 3582 Opc = ISD::OR; 3583 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3584 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3585 break; 3586 case ISD::SETUO: Invert = true; // Fallthrough 3587 case ISD::SETO: 3588 // Expand this to (OLT | OGE). 3589 TmpOp0 = Op0; 3590 TmpOp1 = Op1; 3591 Opc = ISD::OR; 3592 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3593 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3594 break; 3595 } 3596 } else { 3597 // Integer comparisons. 3598 switch (SetCCOpcode) { 3599 default: llvm_unreachable("Illegal integer comparison"); 3600 case ISD::SETNE: Invert = true; 3601 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3602 case ISD::SETLT: Swap = true; 3603 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3604 case ISD::SETLE: Swap = true; 3605 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3606 case ISD::SETULT: Swap = true; 3607 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3608 case ISD::SETULE: Swap = true; 3609 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3610 } 3611 3612 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3613 if (Opc == ARMISD::VCEQ) { 3614 3615 SDValue AndOp; 3616 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3617 AndOp = Op0; 3618 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3619 AndOp = Op1; 3620 3621 // Ignore bitconvert. 3622 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3623 AndOp = AndOp.getOperand(0); 3624 3625 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3626 Opc = ARMISD::VTST; 3627 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3628 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3629 Invert = !Invert; 3630 } 3631 } 3632 } 3633 3634 if (Swap) 3635 std::swap(Op0, Op1); 3636 3637 // If one of the operands is a constant vector zero, attempt to fold the 3638 // comparison to a specialized compare-against-zero form. 3639 SDValue SingleOp; 3640 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3641 SingleOp = Op0; 3642 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3643 if (Opc == ARMISD::VCGE) 3644 Opc = ARMISD::VCLEZ; 3645 else if (Opc == ARMISD::VCGT) 3646 Opc = ARMISD::VCLTZ; 3647 SingleOp = Op1; 3648 } 3649 3650 SDValue Result; 3651 if (SingleOp.getNode()) { 3652 switch (Opc) { 3653 case ARMISD::VCEQ: 3654 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3655 case ARMISD::VCGE: 3656 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3657 case ARMISD::VCLEZ: 3658 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3659 case ARMISD::VCGT: 3660 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3661 case ARMISD::VCLTZ: 3662 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3663 default: 3664 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3665 } 3666 } else { 3667 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3668 } 3669 3670 if (Invert) 3671 Result = DAG.getNOT(dl, Result, VT); 3672 3673 return Result; 3674} 3675 3676/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3677/// valid vector constant for a NEON instruction with a "modified immediate" 3678/// operand (e.g., VMOV). If so, return the encoded value. 3679static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3680 unsigned SplatBitSize, SelectionDAG &DAG, 3681 EVT &VT, bool is128Bits, NEONModImmType type) { 3682 unsigned OpCmode, Imm; 3683 3684 // SplatBitSize is set to the smallest size that splats the vector, so a 3685 // zero vector will always have SplatBitSize == 8. However, NEON modified 3686 // immediate instructions others than VMOV do not support the 8-bit encoding 3687 // of a zero vector, and the default encoding of zero is supposed to be the 3688 // 32-bit version. 3689 if (SplatBits == 0) 3690 SplatBitSize = 32; 3691 3692 switch (SplatBitSize) { 3693 case 8: 3694 if (type != VMOVModImm) 3695 return SDValue(); 3696 // Any 1-byte value is OK. Op=0, Cmode=1110. 3697 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3698 OpCmode = 0xe; 3699 Imm = SplatBits; 3700 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3701 break; 3702 3703 case 16: 3704 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3705 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3706 if ((SplatBits & ~0xff) == 0) { 3707 // Value = 0x00nn: Op=x, Cmode=100x. 3708 OpCmode = 0x8; 3709 Imm = SplatBits; 3710 break; 3711 } 3712 if ((SplatBits & ~0xff00) == 0) { 3713 // Value = 0xnn00: Op=x, Cmode=101x. 3714 OpCmode = 0xa; 3715 Imm = SplatBits >> 8; 3716 break; 3717 } 3718 return SDValue(); 3719 3720 case 32: 3721 // NEON's 32-bit VMOV supports splat values where: 3722 // * only one byte is nonzero, or 3723 // * the least significant byte is 0xff and the second byte is nonzero, or 3724 // * the least significant 2 bytes are 0xff and the third is nonzero. 3725 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3726 if ((SplatBits & ~0xff) == 0) { 3727 // Value = 0x000000nn: Op=x, Cmode=000x. 3728 OpCmode = 0; 3729 Imm = SplatBits; 3730 break; 3731 } 3732 if ((SplatBits & ~0xff00) == 0) { 3733 // Value = 0x0000nn00: Op=x, Cmode=001x. 3734 OpCmode = 0x2; 3735 Imm = SplatBits >> 8; 3736 break; 3737 } 3738 if ((SplatBits & ~0xff0000) == 0) { 3739 // Value = 0x00nn0000: Op=x, Cmode=010x. 3740 OpCmode = 0x4; 3741 Imm = SplatBits >> 16; 3742 break; 3743 } 3744 if ((SplatBits & ~0xff000000) == 0) { 3745 // Value = 0xnn000000: Op=x, Cmode=011x. 3746 OpCmode = 0x6; 3747 Imm = SplatBits >> 24; 3748 break; 3749 } 3750 3751 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 3752 if (type == OtherModImm) return SDValue(); 3753 3754 if ((SplatBits & ~0xffff) == 0 && 3755 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3756 // Value = 0x0000nnff: Op=x, Cmode=1100. 3757 OpCmode = 0xc; 3758 Imm = SplatBits >> 8; 3759 SplatBits |= 0xff; 3760 break; 3761 } 3762 3763 if ((SplatBits & ~0xffffff) == 0 && 3764 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3765 // Value = 0x00nnffff: Op=x, Cmode=1101. 3766 OpCmode = 0xd; 3767 Imm = SplatBits >> 16; 3768 SplatBits |= 0xffff; 3769 break; 3770 } 3771 3772 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3773 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3774 // VMOV.I32. A (very) minor optimization would be to replicate the value 3775 // and fall through here to test for a valid 64-bit splat. But, then the 3776 // caller would also need to check and handle the change in size. 3777 return SDValue(); 3778 3779 case 64: { 3780 if (type != VMOVModImm) 3781 return SDValue(); 3782 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3783 uint64_t BitMask = 0xff; 3784 uint64_t Val = 0; 3785 unsigned ImmMask = 1; 3786 Imm = 0; 3787 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3788 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3789 Val |= BitMask; 3790 Imm |= ImmMask; 3791 } else if ((SplatBits & BitMask) != 0) { 3792 return SDValue(); 3793 } 3794 BitMask <<= 8; 3795 ImmMask <<= 1; 3796 } 3797 // Op=1, Cmode=1110. 3798 OpCmode = 0x1e; 3799 SplatBits = Val; 3800 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3801 break; 3802 } 3803 3804 default: 3805 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3806 } 3807 3808 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3809 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3810} 3811 3812static bool isVEXTMask(ArrayRef<int> M, EVT VT, 3813 bool &ReverseVEXT, unsigned &Imm) { 3814 unsigned NumElts = VT.getVectorNumElements(); 3815 ReverseVEXT = false; 3816 3817 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3818 if (M[0] < 0) 3819 return false; 3820 3821 Imm = M[0]; 3822 3823 // If this is a VEXT shuffle, the immediate value is the index of the first 3824 // element. The other shuffle indices must be the successive elements after 3825 // the first one. 3826 unsigned ExpectedElt = Imm; 3827 for (unsigned i = 1; i < NumElts; ++i) { 3828 // Increment the expected index. If it wraps around, it may still be 3829 // a VEXT but the source vectors must be swapped. 3830 ExpectedElt += 1; 3831 if (ExpectedElt == NumElts * 2) { 3832 ExpectedElt = 0; 3833 ReverseVEXT = true; 3834 } 3835 3836 if (M[i] < 0) continue; // ignore UNDEF indices 3837 if (ExpectedElt != static_cast<unsigned>(M[i])) 3838 return false; 3839 } 3840 3841 // Adjust the index value if the source operands will be swapped. 3842 if (ReverseVEXT) 3843 Imm -= NumElts; 3844 3845 return true; 3846} 3847 3848/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3849/// instruction with the specified blocksize. (The order of the elements 3850/// within each block of the vector is reversed.) 3851static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 3852 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3853 "Only possible block sizes for VREV are: 16, 32, 64"); 3854 3855 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3856 if (EltSz == 64) 3857 return false; 3858 3859 unsigned NumElts = VT.getVectorNumElements(); 3860 unsigned BlockElts = M[0] + 1; 3861 // If the first shuffle index is UNDEF, be optimistic. 3862 if (M[0] < 0) 3863 BlockElts = BlockSize / EltSz; 3864 3865 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3866 return false; 3867 3868 for (unsigned i = 0; i < NumElts; ++i) { 3869 if (M[i] < 0) continue; // ignore UNDEF indices 3870 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3871 return false; 3872 } 3873 3874 return true; 3875} 3876 3877static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 3878 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 3879 // range, then 0 is placed into the resulting vector. So pretty much any mask 3880 // of 8 elements can work here. 3881 return VT == MVT::v8i8 && M.size() == 8; 3882} 3883 3884static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 3885 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3886 if (EltSz == 64) 3887 return false; 3888 3889 unsigned NumElts = VT.getVectorNumElements(); 3890 WhichResult = (M[0] == 0 ? 0 : 1); 3891 for (unsigned i = 0; i < NumElts; i += 2) { 3892 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3893 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3894 return false; 3895 } 3896 return true; 3897} 3898 3899/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3900/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3901/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3902static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 3903 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3904 if (EltSz == 64) 3905 return false; 3906 3907 unsigned NumElts = VT.getVectorNumElements(); 3908 WhichResult = (M[0] == 0 ? 0 : 1); 3909 for (unsigned i = 0; i < NumElts; i += 2) { 3910 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3911 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3912 return false; 3913 } 3914 return true; 3915} 3916 3917static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 3918 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3919 if (EltSz == 64) 3920 return false; 3921 3922 unsigned NumElts = VT.getVectorNumElements(); 3923 WhichResult = (M[0] == 0 ? 0 : 1); 3924 for (unsigned i = 0; i != NumElts; ++i) { 3925 if (M[i] < 0) continue; // ignore UNDEF indices 3926 if ((unsigned) M[i] != 2 * i + WhichResult) 3927 return false; 3928 } 3929 3930 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3931 if (VT.is64BitVector() && EltSz == 32) 3932 return false; 3933 3934 return true; 3935} 3936 3937/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3938/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3939/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3940static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 3941 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3942 if (EltSz == 64) 3943 return false; 3944 3945 unsigned Half = VT.getVectorNumElements() / 2; 3946 WhichResult = (M[0] == 0 ? 0 : 1); 3947 for (unsigned j = 0; j != 2; ++j) { 3948 unsigned Idx = WhichResult; 3949 for (unsigned i = 0; i != Half; ++i) { 3950 int MIdx = M[i + j * Half]; 3951 if (MIdx >= 0 && (unsigned) MIdx != Idx) 3952 return false; 3953 Idx += 2; 3954 } 3955 } 3956 3957 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3958 if (VT.is64BitVector() && EltSz == 32) 3959 return false; 3960 3961 return true; 3962} 3963 3964static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 3965 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3966 if (EltSz == 64) 3967 return false; 3968 3969 unsigned NumElts = VT.getVectorNumElements(); 3970 WhichResult = (M[0] == 0 ? 0 : 1); 3971 unsigned Idx = WhichResult * NumElts / 2; 3972 for (unsigned i = 0; i != NumElts; i += 2) { 3973 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3974 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 3975 return false; 3976 Idx += 1; 3977 } 3978 3979 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3980 if (VT.is64BitVector() && EltSz == 32) 3981 return false; 3982 3983 return true; 3984} 3985 3986/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3987/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3988/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3989static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 3990 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3991 if (EltSz == 64) 3992 return false; 3993 3994 unsigned NumElts = VT.getVectorNumElements(); 3995 WhichResult = (M[0] == 0 ? 0 : 1); 3996 unsigned Idx = WhichResult * NumElts / 2; 3997 for (unsigned i = 0; i != NumElts; i += 2) { 3998 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3999 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 4000 return false; 4001 Idx += 1; 4002 } 4003 4004 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4005 if (VT.is64BitVector() && EltSz == 32) 4006 return false; 4007 4008 return true; 4009} 4010 4011// If N is an integer constant that can be moved into a register in one 4012// instruction, return an SDValue of such a constant (will become a MOV 4013// instruction). Otherwise return null. 4014static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 4015 const ARMSubtarget *ST, DebugLoc dl) { 4016 uint64_t Val; 4017 if (!isa<ConstantSDNode>(N)) 4018 return SDValue(); 4019 Val = cast<ConstantSDNode>(N)->getZExtValue(); 4020 4021 if (ST->isThumb1Only()) { 4022 if (Val <= 255 || ~Val <= 255) 4023 return DAG.getConstant(Val, MVT::i32); 4024 } else { 4025 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 4026 return DAG.getConstant(Val, MVT::i32); 4027 } 4028 return SDValue(); 4029} 4030 4031// If this is a case we can't handle, return null and let the default 4032// expansion code take care of it. 4033SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4034 const ARMSubtarget *ST) const { 4035 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4036 DebugLoc dl = Op.getDebugLoc(); 4037 EVT VT = Op.getValueType(); 4038 4039 APInt SplatBits, SplatUndef; 4040 unsigned SplatBitSize; 4041 bool HasAnyUndefs; 4042 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 4043 if (SplatBitSize <= 64) { 4044 // Check if an immediate VMOV works. 4045 EVT VmovVT; 4046 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 4047 SplatUndef.getZExtValue(), SplatBitSize, 4048 DAG, VmovVT, VT.is128BitVector(), 4049 VMOVModImm); 4050 if (Val.getNode()) { 4051 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 4052 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4053 } 4054 4055 // Try an immediate VMVN. 4056 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 4057 Val = isNEONModifiedImm(NegatedImm, 4058 SplatUndef.getZExtValue(), SplatBitSize, 4059 DAG, VmovVT, VT.is128BitVector(), 4060 VMVNModImm); 4061 if (Val.getNode()) { 4062 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4063 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4064 } 4065 4066 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4067 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 4068 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4069 if (ImmVal != -1) { 4070 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4071 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4072 } 4073 } 4074 } 4075 } 4076 4077 // Scan through the operands to see if only one value is used. 4078 unsigned NumElts = VT.getVectorNumElements(); 4079 bool isOnlyLowElement = true; 4080 bool usesOnlyOneValue = true; 4081 bool isConstant = true; 4082 SDValue Value; 4083 for (unsigned i = 0; i < NumElts; ++i) { 4084 SDValue V = Op.getOperand(i); 4085 if (V.getOpcode() == ISD::UNDEF) 4086 continue; 4087 if (i > 0) 4088 isOnlyLowElement = false; 4089 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4090 isConstant = false; 4091 4092 if (!Value.getNode()) 4093 Value = V; 4094 else if (V != Value) 4095 usesOnlyOneValue = false; 4096 } 4097 4098 if (!Value.getNode()) 4099 return DAG.getUNDEF(VT); 4100 4101 if (isOnlyLowElement) 4102 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4103 4104 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4105 4106 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4107 // i32 and try again. 4108 if (usesOnlyOneValue && EltSize <= 32) { 4109 if (!isConstant) 4110 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4111 if (VT.getVectorElementType().isFloatingPoint()) { 4112 SmallVector<SDValue, 8> Ops; 4113 for (unsigned i = 0; i < NumElts; ++i) 4114 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4115 Op.getOperand(i))); 4116 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4117 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4118 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4119 if (Val.getNode()) 4120 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4121 } 4122 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4123 if (Val.getNode()) 4124 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4125 } 4126 4127 // If all elements are constants and the case above didn't get hit, fall back 4128 // to the default expansion, which will generate a load from the constant 4129 // pool. 4130 if (isConstant) 4131 return SDValue(); 4132 4133 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4134 if (NumElts >= 4) { 4135 SDValue shuffle = ReconstructShuffle(Op, DAG); 4136 if (shuffle != SDValue()) 4137 return shuffle; 4138 } 4139 4140 // Vectors with 32- or 64-bit elements can be built by directly assigning 4141 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4142 // will be legalized. 4143 if (EltSize >= 32) { 4144 // Do the expansion with floating-point types, since that is what the VFP 4145 // registers are defined to use, and since i64 is not legal. 4146 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4147 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4148 SmallVector<SDValue, 8> Ops; 4149 for (unsigned i = 0; i < NumElts; ++i) 4150 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4151 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4152 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4153 } 4154 4155 return SDValue(); 4156} 4157 4158// Gather data to see if the operation can be modelled as a 4159// shuffle in combination with VEXTs. 4160SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4161 SelectionDAG &DAG) const { 4162 DebugLoc dl = Op.getDebugLoc(); 4163 EVT VT = Op.getValueType(); 4164 unsigned NumElts = VT.getVectorNumElements(); 4165 4166 SmallVector<SDValue, 2> SourceVecs; 4167 SmallVector<unsigned, 2> MinElts; 4168 SmallVector<unsigned, 2> MaxElts; 4169 4170 for (unsigned i = 0; i < NumElts; ++i) { 4171 SDValue V = Op.getOperand(i); 4172 if (V.getOpcode() == ISD::UNDEF) 4173 continue; 4174 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 4175 // A shuffle can only come from building a vector from various 4176 // elements of other vectors. 4177 return SDValue(); 4178 } else if (V.getOperand(0).getValueType().getVectorElementType() != 4179 VT.getVectorElementType()) { 4180 // This code doesn't know how to handle shuffles where the vector 4181 // element types do not match (this happens because type legalization 4182 // promotes the return type of EXTRACT_VECTOR_ELT). 4183 // FIXME: It might be appropriate to extend this code to handle 4184 // mismatched types. 4185 return SDValue(); 4186 } 4187 4188 // Record this extraction against the appropriate vector if possible... 4189 SDValue SourceVec = V.getOperand(0); 4190 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4191 bool FoundSource = false; 4192 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4193 if (SourceVecs[j] == SourceVec) { 4194 if (MinElts[j] > EltNo) 4195 MinElts[j] = EltNo; 4196 if (MaxElts[j] < EltNo) 4197 MaxElts[j] = EltNo; 4198 FoundSource = true; 4199 break; 4200 } 4201 } 4202 4203 // Or record a new source if not... 4204 if (!FoundSource) { 4205 SourceVecs.push_back(SourceVec); 4206 MinElts.push_back(EltNo); 4207 MaxElts.push_back(EltNo); 4208 } 4209 } 4210 4211 // Currently only do something sane when at most two source vectors 4212 // involved. 4213 if (SourceVecs.size() > 2) 4214 return SDValue(); 4215 4216 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4217 int VEXTOffsets[2] = {0, 0}; 4218 4219 // This loop extracts the usage patterns of the source vectors 4220 // and prepares appropriate SDValues for a shuffle if possible. 4221 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4222 if (SourceVecs[i].getValueType() == VT) { 4223 // No VEXT necessary 4224 ShuffleSrcs[i] = SourceVecs[i]; 4225 VEXTOffsets[i] = 0; 4226 continue; 4227 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4228 // It probably isn't worth padding out a smaller vector just to 4229 // break it down again in a shuffle. 4230 return SDValue(); 4231 } 4232 4233 // Since only 64-bit and 128-bit vectors are legal on ARM and 4234 // we've eliminated the other cases... 4235 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4236 "unexpected vector sizes in ReconstructShuffle"); 4237 4238 if (MaxElts[i] - MinElts[i] >= NumElts) { 4239 // Span too large for a VEXT to cope 4240 return SDValue(); 4241 } 4242 4243 if (MinElts[i] >= NumElts) { 4244 // The extraction can just take the second half 4245 VEXTOffsets[i] = NumElts; 4246 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4247 SourceVecs[i], 4248 DAG.getIntPtrConstant(NumElts)); 4249 } else if (MaxElts[i] < NumElts) { 4250 // The extraction can just take the first half 4251 VEXTOffsets[i] = 0; 4252 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4253 SourceVecs[i], 4254 DAG.getIntPtrConstant(0)); 4255 } else { 4256 // An actual VEXT is needed 4257 VEXTOffsets[i] = MinElts[i]; 4258 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4259 SourceVecs[i], 4260 DAG.getIntPtrConstant(0)); 4261 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4262 SourceVecs[i], 4263 DAG.getIntPtrConstant(NumElts)); 4264 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4265 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4266 } 4267 } 4268 4269 SmallVector<int, 8> Mask; 4270 4271 for (unsigned i = 0; i < NumElts; ++i) { 4272 SDValue Entry = Op.getOperand(i); 4273 if (Entry.getOpcode() == ISD::UNDEF) { 4274 Mask.push_back(-1); 4275 continue; 4276 } 4277 4278 SDValue ExtractVec = Entry.getOperand(0); 4279 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4280 .getOperand(1))->getSExtValue(); 4281 if (ExtractVec == SourceVecs[0]) { 4282 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4283 } else { 4284 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4285 } 4286 } 4287 4288 // Final check before we try to produce nonsense... 4289 if (isShuffleMaskLegal(Mask, VT)) 4290 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4291 &Mask[0]); 4292 4293 return SDValue(); 4294} 4295 4296/// isShuffleMaskLegal - Targets can use this to indicate that they only 4297/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4298/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4299/// are assumed to be legal. 4300bool 4301ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4302 EVT VT) const { 4303 if (VT.getVectorNumElements() == 4 && 4304 (VT.is128BitVector() || VT.is64BitVector())) { 4305 unsigned PFIndexes[4]; 4306 for (unsigned i = 0; i != 4; ++i) { 4307 if (M[i] < 0) 4308 PFIndexes[i] = 8; 4309 else 4310 PFIndexes[i] = M[i]; 4311 } 4312 4313 // Compute the index in the perfect shuffle table. 4314 unsigned PFTableIndex = 4315 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4316 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4317 unsigned Cost = (PFEntry >> 30); 4318 4319 if (Cost <= 4) 4320 return true; 4321 } 4322 4323 bool ReverseVEXT; 4324 unsigned Imm, WhichResult; 4325 4326 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4327 return (EltSize >= 32 || 4328 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4329 isVREVMask(M, VT, 64) || 4330 isVREVMask(M, VT, 32) || 4331 isVREVMask(M, VT, 16) || 4332 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4333 isVTBLMask(M, VT) || 4334 isVTRNMask(M, VT, WhichResult) || 4335 isVUZPMask(M, VT, WhichResult) || 4336 isVZIPMask(M, VT, WhichResult) || 4337 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4338 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4339 isVZIP_v_undef_Mask(M, VT, WhichResult)); 4340} 4341 4342/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4343/// the specified operations to build the shuffle. 4344static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4345 SDValue RHS, SelectionDAG &DAG, 4346 DebugLoc dl) { 4347 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4348 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4349 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4350 4351 enum { 4352 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4353 OP_VREV, 4354 OP_VDUP0, 4355 OP_VDUP1, 4356 OP_VDUP2, 4357 OP_VDUP3, 4358 OP_VEXT1, 4359 OP_VEXT2, 4360 OP_VEXT3, 4361 OP_VUZPL, // VUZP, left result 4362 OP_VUZPR, // VUZP, right result 4363 OP_VZIPL, // VZIP, left result 4364 OP_VZIPR, // VZIP, right result 4365 OP_VTRNL, // VTRN, left result 4366 OP_VTRNR // VTRN, right result 4367 }; 4368 4369 if (OpNum == OP_COPY) { 4370 if (LHSID == (1*9+2)*9+3) return LHS; 4371 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4372 return RHS; 4373 } 4374 4375 SDValue OpLHS, OpRHS; 4376 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4377 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4378 EVT VT = OpLHS.getValueType(); 4379 4380 switch (OpNum) { 4381 default: llvm_unreachable("Unknown shuffle opcode!"); 4382 case OP_VREV: 4383 // VREV divides the vector in half and swaps within the half. 4384 if (VT.getVectorElementType() == MVT::i32 || 4385 VT.getVectorElementType() == MVT::f32) 4386 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4387 // vrev <4 x i16> -> VREV32 4388 if (VT.getVectorElementType() == MVT::i16) 4389 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 4390 // vrev <4 x i8> -> VREV16 4391 assert(VT.getVectorElementType() == MVT::i8); 4392 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 4393 case OP_VDUP0: 4394 case OP_VDUP1: 4395 case OP_VDUP2: 4396 case OP_VDUP3: 4397 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4398 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4399 case OP_VEXT1: 4400 case OP_VEXT2: 4401 case OP_VEXT3: 4402 return DAG.getNode(ARMISD::VEXT, dl, VT, 4403 OpLHS, OpRHS, 4404 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4405 case OP_VUZPL: 4406 case OP_VUZPR: 4407 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4408 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4409 case OP_VZIPL: 4410 case OP_VZIPR: 4411 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4412 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4413 case OP_VTRNL: 4414 case OP_VTRNR: 4415 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4416 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4417 } 4418} 4419 4420static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4421 ArrayRef<int> ShuffleMask, 4422 SelectionDAG &DAG) { 4423 // Check to see if we can use the VTBL instruction. 4424 SDValue V1 = Op.getOperand(0); 4425 SDValue V2 = Op.getOperand(1); 4426 DebugLoc DL = Op.getDebugLoc(); 4427 4428 SmallVector<SDValue, 8> VTBLMask; 4429 for (ArrayRef<int>::iterator 4430 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4431 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4432 4433 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4434 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4435 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4436 &VTBLMask[0], 8)); 4437 4438 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4439 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4440 &VTBLMask[0], 8)); 4441} 4442 4443static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4444 SDValue V1 = Op.getOperand(0); 4445 SDValue V2 = Op.getOperand(1); 4446 DebugLoc dl = Op.getDebugLoc(); 4447 EVT VT = Op.getValueType(); 4448 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4449 4450 // Convert shuffles that are directly supported on NEON to target-specific 4451 // DAG nodes, instead of keeping them as shuffles and matching them again 4452 // during code selection. This is more efficient and avoids the possibility 4453 // of inconsistencies between legalization and selection. 4454 // FIXME: floating-point vectors should be canonicalized to integer vectors 4455 // of the same time so that they get CSEd properly. 4456 ArrayRef<int> ShuffleMask = SVN->getMask(); 4457 4458 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4459 if (EltSize <= 32) { 4460 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4461 int Lane = SVN->getSplatIndex(); 4462 // If this is undef splat, generate it via "just" vdup, if possible. 4463 if (Lane == -1) Lane = 0; 4464 4465 // Test if V1 is a SCALAR_TO_VECTOR. 4466 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4467 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4468 } 4469 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 4470 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 4471 // reaches it). 4472 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 4473 !isa<ConstantSDNode>(V1.getOperand(0))) { 4474 bool IsScalarToVector = true; 4475 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 4476 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 4477 IsScalarToVector = false; 4478 break; 4479 } 4480 if (IsScalarToVector) 4481 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4482 } 4483 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4484 DAG.getConstant(Lane, MVT::i32)); 4485 } 4486 4487 bool ReverseVEXT; 4488 unsigned Imm; 4489 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4490 if (ReverseVEXT) 4491 std::swap(V1, V2); 4492 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4493 DAG.getConstant(Imm, MVT::i32)); 4494 } 4495 4496 if (isVREVMask(ShuffleMask, VT, 64)) 4497 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4498 if (isVREVMask(ShuffleMask, VT, 32)) 4499 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4500 if (isVREVMask(ShuffleMask, VT, 16)) 4501 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4502 4503 // Check for Neon shuffles that modify both input vectors in place. 4504 // If both results are used, i.e., if there are two shuffles with the same 4505 // source operands and with masks corresponding to both results of one of 4506 // these operations, DAG memoization will ensure that a single node is 4507 // used for both shuffles. 4508 unsigned WhichResult; 4509 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4510 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4511 V1, V2).getValue(WhichResult); 4512 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4513 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4514 V1, V2).getValue(WhichResult); 4515 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4516 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4517 V1, V2).getValue(WhichResult); 4518 4519 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4520 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4521 V1, V1).getValue(WhichResult); 4522 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4523 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4524 V1, V1).getValue(WhichResult); 4525 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4526 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4527 V1, V1).getValue(WhichResult); 4528 } 4529 4530 // If the shuffle is not directly supported and it has 4 elements, use 4531 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4532 unsigned NumElts = VT.getVectorNumElements(); 4533 if (NumElts == 4) { 4534 unsigned PFIndexes[4]; 4535 for (unsigned i = 0; i != 4; ++i) { 4536 if (ShuffleMask[i] < 0) 4537 PFIndexes[i] = 8; 4538 else 4539 PFIndexes[i] = ShuffleMask[i]; 4540 } 4541 4542 // Compute the index in the perfect shuffle table. 4543 unsigned PFTableIndex = 4544 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4545 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4546 unsigned Cost = (PFEntry >> 30); 4547 4548 if (Cost <= 4) 4549 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4550 } 4551 4552 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 4553 if (EltSize >= 32) { 4554 // Do the expansion with floating-point types, since that is what the VFP 4555 // registers are defined to use, and since i64 is not legal. 4556 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4557 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4558 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 4559 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 4560 SmallVector<SDValue, 8> Ops; 4561 for (unsigned i = 0; i < NumElts; ++i) { 4562 if (ShuffleMask[i] < 0) 4563 Ops.push_back(DAG.getUNDEF(EltVT)); 4564 else 4565 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 4566 ShuffleMask[i] < (int)NumElts ? V1 : V2, 4567 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 4568 MVT::i32))); 4569 } 4570 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4571 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4572 } 4573 4574 if (VT == MVT::v8i8) { 4575 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 4576 if (NewOp.getNode()) 4577 return NewOp; 4578 } 4579 4580 return SDValue(); 4581} 4582 4583static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4584 // INSERT_VECTOR_ELT is legal only for immediate indexes. 4585 SDValue Lane = Op.getOperand(2); 4586 if (!isa<ConstantSDNode>(Lane)) 4587 return SDValue(); 4588 4589 return Op; 4590} 4591 4592static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4593 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 4594 SDValue Lane = Op.getOperand(1); 4595 if (!isa<ConstantSDNode>(Lane)) 4596 return SDValue(); 4597 4598 SDValue Vec = Op.getOperand(0); 4599 if (Op.getValueType() == MVT::i32 && 4600 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 4601 DebugLoc dl = Op.getDebugLoc(); 4602 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 4603 } 4604 4605 return Op; 4606} 4607 4608static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 4609 // The only time a CONCAT_VECTORS operation can have legal types is when 4610 // two 64-bit vectors are concatenated to a 128-bit vector. 4611 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 4612 "unexpected CONCAT_VECTORS"); 4613 DebugLoc dl = Op.getDebugLoc(); 4614 SDValue Val = DAG.getUNDEF(MVT::v2f64); 4615 SDValue Op0 = Op.getOperand(0); 4616 SDValue Op1 = Op.getOperand(1); 4617 if (Op0.getOpcode() != ISD::UNDEF) 4618 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4619 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 4620 DAG.getIntPtrConstant(0)); 4621 if (Op1.getOpcode() != ISD::UNDEF) 4622 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4623 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 4624 DAG.getIntPtrConstant(1)); 4625 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 4626} 4627 4628/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 4629/// element has been zero/sign-extended, depending on the isSigned parameter, 4630/// from an integer type half its size. 4631static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 4632 bool isSigned) { 4633 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 4634 EVT VT = N->getValueType(0); 4635 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 4636 SDNode *BVN = N->getOperand(0).getNode(); 4637 if (BVN->getValueType(0) != MVT::v4i32 || 4638 BVN->getOpcode() != ISD::BUILD_VECTOR) 4639 return false; 4640 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4641 unsigned HiElt = 1 - LoElt; 4642 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 4643 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 4644 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 4645 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 4646 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 4647 return false; 4648 if (isSigned) { 4649 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 4650 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 4651 return true; 4652 } else { 4653 if (Hi0->isNullValue() && Hi1->isNullValue()) 4654 return true; 4655 } 4656 return false; 4657 } 4658 4659 if (N->getOpcode() != ISD::BUILD_VECTOR) 4660 return false; 4661 4662 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 4663 SDNode *Elt = N->getOperand(i).getNode(); 4664 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 4665 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4666 unsigned HalfSize = EltSize / 2; 4667 if (isSigned) { 4668 if (!isIntN(HalfSize, C->getSExtValue())) 4669 return false; 4670 } else { 4671 if (!isUIntN(HalfSize, C->getZExtValue())) 4672 return false; 4673 } 4674 continue; 4675 } 4676 return false; 4677 } 4678 4679 return true; 4680} 4681 4682/// isSignExtended - Check if a node is a vector value that is sign-extended 4683/// or a constant BUILD_VECTOR with sign-extended elements. 4684static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 4685 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 4686 return true; 4687 if (isExtendedBUILD_VECTOR(N, DAG, true)) 4688 return true; 4689 return false; 4690} 4691 4692/// isZeroExtended - Check if a node is a vector value that is zero-extended 4693/// or a constant BUILD_VECTOR with zero-extended elements. 4694static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 4695 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 4696 return true; 4697 if (isExtendedBUILD_VECTOR(N, DAG, false)) 4698 return true; 4699 return false; 4700} 4701 4702/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending 4703/// load, or BUILD_VECTOR with extended elements, return the unextended value. 4704static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 4705 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 4706 return N->getOperand(0); 4707 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 4708 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 4709 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 4710 LD->isNonTemporal(), LD->isInvariant(), 4711 LD->getAlignment()); 4712 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 4713 // have been legalized as a BITCAST from v4i32. 4714 if (N->getOpcode() == ISD::BITCAST) { 4715 SDNode *BVN = N->getOperand(0).getNode(); 4716 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 4717 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 4718 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4719 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 4720 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 4721 } 4722 // Construct a new BUILD_VECTOR with elements truncated to half the size. 4723 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 4724 EVT VT = N->getValueType(0); 4725 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 4726 unsigned NumElts = VT.getVectorNumElements(); 4727 MVT TruncVT = MVT::getIntegerVT(EltSize); 4728 SmallVector<SDValue, 8> Ops; 4729 for (unsigned i = 0; i != NumElts; ++i) { 4730 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 4731 const APInt &CInt = C->getAPIntValue(); 4732 Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); 4733 } 4734 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 4735 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 4736} 4737 4738static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 4739 unsigned Opcode = N->getOpcode(); 4740 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4741 SDNode *N0 = N->getOperand(0).getNode(); 4742 SDNode *N1 = N->getOperand(1).getNode(); 4743 return N0->hasOneUse() && N1->hasOneUse() && 4744 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 4745 } 4746 return false; 4747} 4748 4749static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 4750 unsigned Opcode = N->getOpcode(); 4751 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4752 SDNode *N0 = N->getOperand(0).getNode(); 4753 SDNode *N1 = N->getOperand(1).getNode(); 4754 return N0->hasOneUse() && N1->hasOneUse() && 4755 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 4756 } 4757 return false; 4758} 4759 4760static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 4761 // Multiplications are only custom-lowered for 128-bit vectors so that 4762 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 4763 EVT VT = Op.getValueType(); 4764 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 4765 SDNode *N0 = Op.getOperand(0).getNode(); 4766 SDNode *N1 = Op.getOperand(1).getNode(); 4767 unsigned NewOpc = 0; 4768 bool isMLA = false; 4769 bool isN0SExt = isSignExtended(N0, DAG); 4770 bool isN1SExt = isSignExtended(N1, DAG); 4771 if (isN0SExt && isN1SExt) 4772 NewOpc = ARMISD::VMULLs; 4773 else { 4774 bool isN0ZExt = isZeroExtended(N0, DAG); 4775 bool isN1ZExt = isZeroExtended(N1, DAG); 4776 if (isN0ZExt && isN1ZExt) 4777 NewOpc = ARMISD::VMULLu; 4778 else if (isN1SExt || isN1ZExt) { 4779 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 4780 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 4781 if (isN1SExt && isAddSubSExt(N0, DAG)) { 4782 NewOpc = ARMISD::VMULLs; 4783 isMLA = true; 4784 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 4785 NewOpc = ARMISD::VMULLu; 4786 isMLA = true; 4787 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 4788 std::swap(N0, N1); 4789 NewOpc = ARMISD::VMULLu; 4790 isMLA = true; 4791 } 4792 } 4793 4794 if (!NewOpc) { 4795 if (VT == MVT::v2i64) 4796 // Fall through to expand this. It is not legal. 4797 return SDValue(); 4798 else 4799 // Other vector multiplications are legal. 4800 return Op; 4801 } 4802 } 4803 4804 // Legalize to a VMULL instruction. 4805 DebugLoc DL = Op.getDebugLoc(); 4806 SDValue Op0; 4807 SDValue Op1 = SkipExtension(N1, DAG); 4808 if (!isMLA) { 4809 Op0 = SkipExtension(N0, DAG); 4810 assert(Op0.getValueType().is64BitVector() && 4811 Op1.getValueType().is64BitVector() && 4812 "unexpected types for extended operands to VMULL"); 4813 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 4814 } 4815 4816 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 4817 // isel lowering to take advantage of no-stall back to back vmul + vmla. 4818 // vmull q0, d4, d6 4819 // vmlal q0, d5, d6 4820 // is faster than 4821 // vaddl q0, d4, d5 4822 // vmovl q1, d6 4823 // vmul q0, q0, q1 4824 SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); 4825 SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); 4826 EVT Op1VT = Op1.getValueType(); 4827 return DAG.getNode(N0->getOpcode(), DL, VT, 4828 DAG.getNode(NewOpc, DL, VT, 4829 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 4830 DAG.getNode(NewOpc, DL, VT, 4831 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 4832} 4833 4834static SDValue 4835LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 4836 // Convert to float 4837 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 4838 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 4839 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 4840 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 4841 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 4842 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 4843 // Get reciprocal estimate. 4844 // float4 recip = vrecpeq_f32(yf); 4845 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4846 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 4847 // Because char has a smaller range than uchar, we can actually get away 4848 // without any newton steps. This requires that we use a weird bias 4849 // of 0xb000, however (again, this has been exhaustively tested). 4850 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 4851 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 4852 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 4853 Y = DAG.getConstant(0xb000, MVT::i32); 4854 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 4855 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 4856 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 4857 // Convert back to short. 4858 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 4859 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 4860 return X; 4861} 4862 4863static SDValue 4864LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 4865 SDValue N2; 4866 // Convert to float. 4867 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 4868 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 4869 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 4870 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 4871 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4872 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4873 4874 // Use reciprocal estimate and one refinement step. 4875 // float4 recip = vrecpeq_f32(yf); 4876 // recip *= vrecpsq_f32(yf, recip); 4877 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4878 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4879 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4880 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4881 N1, N2); 4882 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4883 // Because short has a smaller range than ushort, we can actually get away 4884 // with only a single newton step. This requires that we use a weird bias 4885 // of 89, however (again, this has been exhaustively tested). 4886 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 4887 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4888 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4889 N1 = DAG.getConstant(0x89, MVT::i32); 4890 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4891 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4892 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4893 // Convert back to integer and return. 4894 // return vmovn_s32(vcvt_s32_f32(result)); 4895 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4896 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4897 return N0; 4898} 4899 4900static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 4901 EVT VT = Op.getValueType(); 4902 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4903 "unexpected type for custom-lowering ISD::SDIV"); 4904 4905 DebugLoc dl = Op.getDebugLoc(); 4906 SDValue N0 = Op.getOperand(0); 4907 SDValue N1 = Op.getOperand(1); 4908 SDValue N2, N3; 4909 4910 if (VT == MVT::v8i8) { 4911 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 4912 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 4913 4914 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4915 DAG.getIntPtrConstant(4)); 4916 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4917 DAG.getIntPtrConstant(4)); 4918 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4919 DAG.getIntPtrConstant(0)); 4920 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4921 DAG.getIntPtrConstant(0)); 4922 4923 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 4924 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 4925 4926 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4927 N0 = LowerCONCAT_VECTORS(N0, DAG); 4928 4929 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 4930 return N0; 4931 } 4932 return LowerSDIV_v4i16(N0, N1, dl, DAG); 4933} 4934 4935static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 4936 EVT VT = Op.getValueType(); 4937 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4938 "unexpected type for custom-lowering ISD::UDIV"); 4939 4940 DebugLoc dl = Op.getDebugLoc(); 4941 SDValue N0 = Op.getOperand(0); 4942 SDValue N1 = Op.getOperand(1); 4943 SDValue N2, N3; 4944 4945 if (VT == MVT::v8i8) { 4946 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 4947 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 4948 4949 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4950 DAG.getIntPtrConstant(4)); 4951 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4952 DAG.getIntPtrConstant(4)); 4953 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4954 DAG.getIntPtrConstant(0)); 4955 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4956 DAG.getIntPtrConstant(0)); 4957 4958 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 4959 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 4960 4961 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4962 N0 = LowerCONCAT_VECTORS(N0, DAG); 4963 4964 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 4965 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 4966 N0); 4967 return N0; 4968 } 4969 4970 // v4i16 sdiv ... Convert to float. 4971 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 4972 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 4973 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 4974 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 4975 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4976 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4977 4978 // Use reciprocal estimate and two refinement steps. 4979 // float4 recip = vrecpeq_f32(yf); 4980 // recip *= vrecpsq_f32(yf, recip); 4981 // recip *= vrecpsq_f32(yf, recip); 4982 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4983 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 4984 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4985 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4986 BN1, N2); 4987 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4988 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4989 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4990 BN1, N2); 4991 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4992 // Simply multiplying by the reciprocal estimate can leave us a few ulps 4993 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 4994 // and that it will never cause us to return an answer too large). 4995 // float4 result = as_float4(as_int4(xf*recip) + 2); 4996 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4997 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4998 N1 = DAG.getConstant(2, MVT::i32); 4999 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5000 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5001 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5002 // Convert back to integer and return. 5003 // return vmovn_u32(vcvt_s32_f32(result)); 5004 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5005 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5006 return N0; 5007} 5008 5009static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 5010 EVT VT = Op.getNode()->getValueType(0); 5011 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 5012 5013 unsigned Opc; 5014 bool ExtraOp = false; 5015 switch (Op.getOpcode()) { 5016 default: llvm_unreachable("Invalid code"); 5017 case ISD::ADDC: Opc = ARMISD::ADDC; break; 5018 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 5019 case ISD::SUBC: Opc = ARMISD::SUBC; break; 5020 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 5021 } 5022 5023 if (!ExtraOp) 5024 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 5025 Op.getOperand(1)); 5026 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 5027 Op.getOperand(1), Op.getOperand(2)); 5028} 5029 5030static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 5031 // Monotonic load/store is legal for all targets 5032 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 5033 return Op; 5034 5035 // Aquire/Release load/store is not legal for targets without a 5036 // dmb or equivalent available. 5037 return SDValue(); 5038} 5039 5040 5041static void 5042ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 5043 SelectionDAG &DAG, unsigned NewOp) { 5044 DebugLoc dl = Node->getDebugLoc(); 5045 assert (Node->getValueType(0) == MVT::i64 && 5046 "Only know how to expand i64 atomics"); 5047 5048 SmallVector<SDValue, 6> Ops; 5049 Ops.push_back(Node->getOperand(0)); // Chain 5050 Ops.push_back(Node->getOperand(1)); // Ptr 5051 // Low part of Val1 5052 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5053 Node->getOperand(2), DAG.getIntPtrConstant(0))); 5054 // High part of Val1 5055 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5056 Node->getOperand(2), DAG.getIntPtrConstant(1))); 5057 if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { 5058 // High part of Val1 5059 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5060 Node->getOperand(3), DAG.getIntPtrConstant(0))); 5061 // High part of Val2 5062 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5063 Node->getOperand(3), DAG.getIntPtrConstant(1))); 5064 } 5065 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 5066 SDValue Result = 5067 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, 5068 cast<MemSDNode>(Node)->getMemOperand()); 5069 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 5070 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 5071 Results.push_back(Result.getValue(2)); 5072} 5073 5074SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5075 switch (Op.getOpcode()) { 5076 default: llvm_unreachable("Don't know how to custom lower this!"); 5077 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5078 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5079 case ISD::GlobalAddress: 5080 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 5081 LowerGlobalAddressELF(Op, DAG); 5082 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5083 case ISD::SELECT: return LowerSELECT(Op, DAG); 5084 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5085 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 5086 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 5087 case ISD::VASTART: return LowerVASTART(Op, DAG); 5088 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 5089 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5090 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 5091 case ISD::SINT_TO_FP: 5092 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5093 case ISD::FP_TO_SINT: 5094 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 5095 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5096 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5097 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5098 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 5099 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 5100 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 5101 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 5102 Subtarget); 5103 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 5104 case ISD::SHL: 5105 case ISD::SRL: 5106 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 5107 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 5108 case ISD::SRL_PARTS: 5109 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 5110 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 5111 case ISD::SETCC: return LowerVSETCC(Op, DAG); 5112 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 5113 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5114 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5115 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5116 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 5117 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5118 case ISD::MUL: return LowerMUL(Op, DAG); 5119 case ISD::SDIV: return LowerSDIV(Op, DAG); 5120 case ISD::UDIV: return LowerUDIV(Op, DAG); 5121 case ISD::ADDC: 5122 case ISD::ADDE: 5123 case ISD::SUBC: 5124 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 5125 case ISD::ATOMIC_LOAD: 5126 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 5127 } 5128} 5129 5130/// ReplaceNodeResults - Replace the results of node with an illegal result 5131/// type with new values built out of custom code. 5132void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 5133 SmallVectorImpl<SDValue>&Results, 5134 SelectionDAG &DAG) const { 5135 SDValue Res; 5136 switch (N->getOpcode()) { 5137 default: 5138 llvm_unreachable("Don't know how to custom expand this!"); 5139 case ISD::BITCAST: 5140 Res = ExpandBITCAST(N, DAG); 5141 break; 5142 case ISD::SRL: 5143 case ISD::SRA: 5144 Res = Expand64BitShift(N, DAG, Subtarget); 5145 break; 5146 case ISD::ATOMIC_LOAD_ADD: 5147 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); 5148 return; 5149 case ISD::ATOMIC_LOAD_AND: 5150 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); 5151 return; 5152 case ISD::ATOMIC_LOAD_NAND: 5153 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); 5154 return; 5155 case ISD::ATOMIC_LOAD_OR: 5156 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); 5157 return; 5158 case ISD::ATOMIC_LOAD_SUB: 5159 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); 5160 return; 5161 case ISD::ATOMIC_LOAD_XOR: 5162 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); 5163 return; 5164 case ISD::ATOMIC_SWAP: 5165 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); 5166 return; 5167 case ISD::ATOMIC_CMP_SWAP: 5168 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); 5169 return; 5170 } 5171 if (Res.getNode()) 5172 Results.push_back(Res); 5173} 5174 5175//===----------------------------------------------------------------------===// 5176// ARM Scheduler Hooks 5177//===----------------------------------------------------------------------===// 5178 5179MachineBasicBlock * 5180ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 5181 MachineBasicBlock *BB, 5182 unsigned Size) const { 5183 unsigned dest = MI->getOperand(0).getReg(); 5184 unsigned ptr = MI->getOperand(1).getReg(); 5185 unsigned oldval = MI->getOperand(2).getReg(); 5186 unsigned newval = MI->getOperand(3).getReg(); 5187 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5188 DebugLoc dl = MI->getDebugLoc(); 5189 bool isThumb2 = Subtarget->isThumb2(); 5190 5191 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5192 unsigned scratch = 5193 MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass 5194 : ARM::GPRRegisterClass); 5195 5196 if (isThumb2) { 5197 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5198 MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass); 5199 MRI.constrainRegClass(newval, ARM::rGPRRegisterClass); 5200 } 5201 5202 unsigned ldrOpc, strOpc; 5203 switch (Size) { 5204 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5205 case 1: 5206 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5207 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5208 break; 5209 case 2: 5210 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5211 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5212 break; 5213 case 4: 5214 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5215 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5216 break; 5217 } 5218 5219 MachineFunction *MF = BB->getParent(); 5220 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5221 MachineFunction::iterator It = BB; 5222 ++It; // insert the new blocks after the current block 5223 5224 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5225 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5226 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5227 MF->insert(It, loop1MBB); 5228 MF->insert(It, loop2MBB); 5229 MF->insert(It, exitMBB); 5230 5231 // Transfer the remainder of BB and its successor edges to exitMBB. 5232 exitMBB->splice(exitMBB->begin(), BB, 5233 llvm::next(MachineBasicBlock::iterator(MI)), 5234 BB->end()); 5235 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5236 5237 // thisMBB: 5238 // ... 5239 // fallthrough --> loop1MBB 5240 BB->addSuccessor(loop1MBB); 5241 5242 // loop1MBB: 5243 // ldrex dest, [ptr] 5244 // cmp dest, oldval 5245 // bne exitMBB 5246 BB = loop1MBB; 5247 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5248 if (ldrOpc == ARM::t2LDREX) 5249 MIB.addImm(0); 5250 AddDefaultPred(MIB); 5251 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5252 .addReg(dest).addReg(oldval)); 5253 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5254 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5255 BB->addSuccessor(loop2MBB); 5256 BB->addSuccessor(exitMBB); 5257 5258 // loop2MBB: 5259 // strex scratch, newval, [ptr] 5260 // cmp scratch, #0 5261 // bne loop1MBB 5262 BB = loop2MBB; 5263 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 5264 if (strOpc == ARM::t2STREX) 5265 MIB.addImm(0); 5266 AddDefaultPred(MIB); 5267 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5268 .addReg(scratch).addImm(0)); 5269 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5270 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5271 BB->addSuccessor(loop1MBB); 5272 BB->addSuccessor(exitMBB); 5273 5274 // exitMBB: 5275 // ... 5276 BB = exitMBB; 5277 5278 MI->eraseFromParent(); // The instruction is gone now. 5279 5280 return BB; 5281} 5282 5283MachineBasicBlock * 5284ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5285 unsigned Size, unsigned BinOpcode) const { 5286 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5287 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5288 5289 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5290 MachineFunction *MF = BB->getParent(); 5291 MachineFunction::iterator It = BB; 5292 ++It; 5293 5294 unsigned dest = MI->getOperand(0).getReg(); 5295 unsigned ptr = MI->getOperand(1).getReg(); 5296 unsigned incr = MI->getOperand(2).getReg(); 5297 DebugLoc dl = MI->getDebugLoc(); 5298 bool isThumb2 = Subtarget->isThumb2(); 5299 5300 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5301 if (isThumb2) { 5302 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5303 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5304 } 5305 5306 unsigned ldrOpc, strOpc; 5307 switch (Size) { 5308 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5309 case 1: 5310 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5311 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5312 break; 5313 case 2: 5314 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5315 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5316 break; 5317 case 4: 5318 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5319 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5320 break; 5321 } 5322 5323 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5324 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5325 MF->insert(It, loopMBB); 5326 MF->insert(It, exitMBB); 5327 5328 // Transfer the remainder of BB and its successor edges to exitMBB. 5329 exitMBB->splice(exitMBB->begin(), BB, 5330 llvm::next(MachineBasicBlock::iterator(MI)), 5331 BB->end()); 5332 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5333 5334 const TargetRegisterClass *TRC = 5335 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5336 unsigned scratch = MRI.createVirtualRegister(TRC); 5337 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 5338 5339 // thisMBB: 5340 // ... 5341 // fallthrough --> loopMBB 5342 BB->addSuccessor(loopMBB); 5343 5344 // loopMBB: 5345 // ldrex dest, ptr 5346 // <binop> scratch2, dest, incr 5347 // strex scratch, scratch2, ptr 5348 // cmp scratch, #0 5349 // bne- loopMBB 5350 // fallthrough --> exitMBB 5351 BB = loopMBB; 5352 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5353 if (ldrOpc == ARM::t2LDREX) 5354 MIB.addImm(0); 5355 AddDefaultPred(MIB); 5356 if (BinOpcode) { 5357 // operand order needs to go the other way for NAND 5358 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 5359 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5360 addReg(incr).addReg(dest)).addReg(0); 5361 else 5362 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5363 addReg(dest).addReg(incr)).addReg(0); 5364 } 5365 5366 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5367 if (strOpc == ARM::t2STREX) 5368 MIB.addImm(0); 5369 AddDefaultPred(MIB); 5370 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5371 .addReg(scratch).addImm(0)); 5372 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5373 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5374 5375 BB->addSuccessor(loopMBB); 5376 BB->addSuccessor(exitMBB); 5377 5378 // exitMBB: 5379 // ... 5380 BB = exitMBB; 5381 5382 MI->eraseFromParent(); // The instruction is gone now. 5383 5384 return BB; 5385} 5386 5387MachineBasicBlock * 5388ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 5389 MachineBasicBlock *BB, 5390 unsigned Size, 5391 bool signExtend, 5392 ARMCC::CondCodes Cond) const { 5393 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5394 5395 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5396 MachineFunction *MF = BB->getParent(); 5397 MachineFunction::iterator It = BB; 5398 ++It; 5399 5400 unsigned dest = MI->getOperand(0).getReg(); 5401 unsigned ptr = MI->getOperand(1).getReg(); 5402 unsigned incr = MI->getOperand(2).getReg(); 5403 unsigned oldval = dest; 5404 DebugLoc dl = MI->getDebugLoc(); 5405 bool isThumb2 = Subtarget->isThumb2(); 5406 5407 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5408 if (isThumb2) { 5409 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5410 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5411 } 5412 5413 unsigned ldrOpc, strOpc, extendOpc; 5414 switch (Size) { 5415 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5416 case 1: 5417 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5418 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5419 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 5420 break; 5421 case 2: 5422 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5423 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5424 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 5425 break; 5426 case 4: 5427 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5428 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5429 extendOpc = 0; 5430 break; 5431 } 5432 5433 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5434 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5435 MF->insert(It, loopMBB); 5436 MF->insert(It, exitMBB); 5437 5438 // Transfer the remainder of BB and its successor edges to exitMBB. 5439 exitMBB->splice(exitMBB->begin(), BB, 5440 llvm::next(MachineBasicBlock::iterator(MI)), 5441 BB->end()); 5442 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5443 5444 const TargetRegisterClass *TRC = 5445 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5446 unsigned scratch = MRI.createVirtualRegister(TRC); 5447 unsigned scratch2 = MRI.createVirtualRegister(TRC); 5448 5449 // thisMBB: 5450 // ... 5451 // fallthrough --> loopMBB 5452 BB->addSuccessor(loopMBB); 5453 5454 // loopMBB: 5455 // ldrex dest, ptr 5456 // (sign extend dest, if required) 5457 // cmp dest, incr 5458 // cmov.cond scratch2, dest, incr 5459 // strex scratch, scratch2, ptr 5460 // cmp scratch, #0 5461 // bne- loopMBB 5462 // fallthrough --> exitMBB 5463 BB = loopMBB; 5464 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5465 if (ldrOpc == ARM::t2LDREX) 5466 MIB.addImm(0); 5467 AddDefaultPred(MIB); 5468 5469 // Sign extend the value, if necessary. 5470 if (signExtend && extendOpc) { 5471 oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); 5472 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 5473 .addReg(dest) 5474 .addImm(0)); 5475 } 5476 5477 // Build compare and cmov instructions. 5478 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5479 .addReg(oldval).addReg(incr)); 5480 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 5481 .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); 5482 5483 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5484 if (strOpc == ARM::t2STREX) 5485 MIB.addImm(0); 5486 AddDefaultPred(MIB); 5487 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5488 .addReg(scratch).addImm(0)); 5489 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5490 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5491 5492 BB->addSuccessor(loopMBB); 5493 BB->addSuccessor(exitMBB); 5494 5495 // exitMBB: 5496 // ... 5497 BB = exitMBB; 5498 5499 MI->eraseFromParent(); // The instruction is gone now. 5500 5501 return BB; 5502} 5503 5504MachineBasicBlock * 5505ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 5506 unsigned Op1, unsigned Op2, 5507 bool NeedsCarry, bool IsCmpxchg) const { 5508 // This also handles ATOMIC_SWAP, indicated by Op1==0. 5509 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5510 5511 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5512 MachineFunction *MF = BB->getParent(); 5513 MachineFunction::iterator It = BB; 5514 ++It; 5515 5516 unsigned destlo = MI->getOperand(0).getReg(); 5517 unsigned desthi = MI->getOperand(1).getReg(); 5518 unsigned ptr = MI->getOperand(2).getReg(); 5519 unsigned vallo = MI->getOperand(3).getReg(); 5520 unsigned valhi = MI->getOperand(4).getReg(); 5521 DebugLoc dl = MI->getDebugLoc(); 5522 bool isThumb2 = Subtarget->isThumb2(); 5523 5524 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5525 if (isThumb2) { 5526 MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); 5527 MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); 5528 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5529 } 5530 5531 unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; 5532 unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; 5533 5534 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5535 MachineBasicBlock *contBB = 0, *cont2BB = 0; 5536 if (IsCmpxchg) { 5537 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 5538 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 5539 } 5540 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5541 MF->insert(It, loopMBB); 5542 if (IsCmpxchg) { 5543 MF->insert(It, contBB); 5544 MF->insert(It, cont2BB); 5545 } 5546 MF->insert(It, exitMBB); 5547 5548 // Transfer the remainder of BB and its successor edges to exitMBB. 5549 exitMBB->splice(exitMBB->begin(), BB, 5550 llvm::next(MachineBasicBlock::iterator(MI)), 5551 BB->end()); 5552 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5553 5554 const TargetRegisterClass *TRC = 5555 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5556 unsigned storesuccess = MRI.createVirtualRegister(TRC); 5557 5558 // thisMBB: 5559 // ... 5560 // fallthrough --> loopMBB 5561 BB->addSuccessor(loopMBB); 5562 5563 // loopMBB: 5564 // ldrexd r2, r3, ptr 5565 // <binopa> r0, r2, incr 5566 // <binopb> r1, r3, incr 5567 // strexd storesuccess, r0, r1, ptr 5568 // cmp storesuccess, #0 5569 // bne- loopMBB 5570 // fallthrough --> exitMBB 5571 // 5572 // Note that the registers are explicitly specified because there is not any 5573 // way to force the register allocator to allocate a register pair. 5574 // 5575 // FIXME: The hardcoded registers are not necessary for Thumb2, but we 5576 // need to properly enforce the restriction that the two output registers 5577 // for ldrexd must be different. 5578 BB = loopMBB; 5579 // Load 5580 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 5581 .addReg(ARM::R2, RegState::Define) 5582 .addReg(ARM::R3, RegState::Define).addReg(ptr)); 5583 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 5584 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); 5585 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); 5586 5587 if (IsCmpxchg) { 5588 // Add early exit 5589 for (unsigned i = 0; i < 2; i++) { 5590 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 5591 ARM::CMPrr)) 5592 .addReg(i == 0 ? destlo : desthi) 5593 .addReg(i == 0 ? vallo : valhi)); 5594 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5595 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5596 BB->addSuccessor(exitMBB); 5597 BB->addSuccessor(i == 0 ? contBB : cont2BB); 5598 BB = (i == 0 ? contBB : cont2BB); 5599 } 5600 5601 // Copy to physregs for strexd 5602 unsigned setlo = MI->getOperand(5).getReg(); 5603 unsigned sethi = MI->getOperand(6).getReg(); 5604 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); 5605 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); 5606 } else if (Op1) { 5607 // Perform binary operation 5608 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) 5609 .addReg(destlo).addReg(vallo)) 5610 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 5611 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) 5612 .addReg(desthi).addReg(valhi)).addReg(0); 5613 } else { 5614 // Copy to physregs for strexd 5615 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); 5616 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); 5617 } 5618 5619 // Store 5620 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 5621 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); 5622 // Cmp+jump 5623 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5624 .addReg(storesuccess).addImm(0)); 5625 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5626 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5627 5628 BB->addSuccessor(loopMBB); 5629 BB->addSuccessor(exitMBB); 5630 5631 // exitMBB: 5632 // ... 5633 BB = exitMBB; 5634 5635 MI->eraseFromParent(); // The instruction is gone now. 5636 5637 return BB; 5638} 5639 5640/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 5641/// registers the function context. 5642void ARMTargetLowering:: 5643SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 5644 MachineBasicBlock *DispatchBB, int FI) const { 5645 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5646 DebugLoc dl = MI->getDebugLoc(); 5647 MachineFunction *MF = MBB->getParent(); 5648 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5649 MachineConstantPool *MCP = MF->getConstantPool(); 5650 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 5651 const Function *F = MF->getFunction(); 5652 5653 bool isThumb = Subtarget->isThumb(); 5654 bool isThumb2 = Subtarget->isThumb2(); 5655 5656 unsigned PCLabelId = AFI->createPICLabelUId(); 5657 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 5658 ARMConstantPoolValue *CPV = 5659 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 5660 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 5661 5662 const TargetRegisterClass *TRC = 5663 isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5664 5665 // Grab constant pool and fixed stack memory operands. 5666 MachineMemOperand *CPMMO = 5667 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 5668 MachineMemOperand::MOLoad, 4, 4); 5669 5670 MachineMemOperand *FIMMOSt = 5671 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 5672 MachineMemOperand::MOStore, 4, 4); 5673 5674 // Load the address of the dispatch MBB into the jump buffer. 5675 if (isThumb2) { 5676 // Incoming value: jbuf 5677 // ldr.n r5, LCPI1_1 5678 // orr r5, r5, #1 5679 // add r5, pc 5680 // str r5, [$jbuf, #+4] ; &jbuf[1] 5681 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5682 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 5683 .addConstantPoolIndex(CPI) 5684 .addMemOperand(CPMMO)); 5685 // Set the low bit because of thumb mode. 5686 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5687 AddDefaultCC( 5688 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 5689 .addReg(NewVReg1, RegState::Kill) 5690 .addImm(0x01))); 5691 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5692 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 5693 .addReg(NewVReg2, RegState::Kill) 5694 .addImm(PCLabelId); 5695 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 5696 .addReg(NewVReg3, RegState::Kill) 5697 .addFrameIndex(FI) 5698 .addImm(36) // &jbuf[1] :: pc 5699 .addMemOperand(FIMMOSt)); 5700 } else if (isThumb) { 5701 // Incoming value: jbuf 5702 // ldr.n r1, LCPI1_4 5703 // add r1, pc 5704 // mov r2, #1 5705 // orrs r1, r2 5706 // add r2, $jbuf, #+4 ; &jbuf[1] 5707 // str r1, [r2] 5708 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5709 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 5710 .addConstantPoolIndex(CPI) 5711 .addMemOperand(CPMMO)); 5712 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5713 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 5714 .addReg(NewVReg1, RegState::Kill) 5715 .addImm(PCLabelId); 5716 // Set the low bit because of thumb mode. 5717 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5718 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 5719 .addReg(ARM::CPSR, RegState::Define) 5720 .addImm(1)); 5721 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5722 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 5723 .addReg(ARM::CPSR, RegState::Define) 5724 .addReg(NewVReg2, RegState::Kill) 5725 .addReg(NewVReg3, RegState::Kill)); 5726 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 5727 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 5728 .addFrameIndex(FI) 5729 .addImm(36)); // &jbuf[1] :: pc 5730 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 5731 .addReg(NewVReg4, RegState::Kill) 5732 .addReg(NewVReg5, RegState::Kill) 5733 .addImm(0) 5734 .addMemOperand(FIMMOSt)); 5735 } else { 5736 // Incoming value: jbuf 5737 // ldr r1, LCPI1_1 5738 // add r1, pc, r1 5739 // str r1, [$jbuf, #+4] ; &jbuf[1] 5740 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5741 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 5742 .addConstantPoolIndex(CPI) 5743 .addImm(0) 5744 .addMemOperand(CPMMO)); 5745 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5746 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 5747 .addReg(NewVReg1, RegState::Kill) 5748 .addImm(PCLabelId)); 5749 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 5750 .addReg(NewVReg2, RegState::Kill) 5751 .addFrameIndex(FI) 5752 .addImm(36) // &jbuf[1] :: pc 5753 .addMemOperand(FIMMOSt)); 5754 } 5755} 5756 5757MachineBasicBlock *ARMTargetLowering:: 5758EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 5759 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5760 DebugLoc dl = MI->getDebugLoc(); 5761 MachineFunction *MF = MBB->getParent(); 5762 MachineRegisterInfo *MRI = &MF->getRegInfo(); 5763 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 5764 MachineFrameInfo *MFI = MF->getFrameInfo(); 5765 int FI = MFI->getFunctionContextIndex(); 5766 5767 const TargetRegisterClass *TRC = 5768 Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5769 5770 // Get a mapping of the call site numbers to all of the landing pads they're 5771 // associated with. 5772 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 5773 unsigned MaxCSNum = 0; 5774 MachineModuleInfo &MMI = MF->getMMI(); 5775 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { 5776 if (!BB->isLandingPad()) continue; 5777 5778 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 5779 // pad. 5780 for (MachineBasicBlock::iterator 5781 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 5782 if (!II->isEHLabel()) continue; 5783 5784 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 5785 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 5786 5787 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 5788 for (SmallVectorImpl<unsigned>::iterator 5789 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 5790 CSI != CSE; ++CSI) { 5791 CallSiteNumToLPad[*CSI].push_back(BB); 5792 MaxCSNum = std::max(MaxCSNum, *CSI); 5793 } 5794 break; 5795 } 5796 } 5797 5798 // Get an ordered list of the machine basic blocks for the jump table. 5799 std::vector<MachineBasicBlock*> LPadList; 5800 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 5801 LPadList.reserve(CallSiteNumToLPad.size()); 5802 for (unsigned I = 1; I <= MaxCSNum; ++I) { 5803 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 5804 for (SmallVectorImpl<MachineBasicBlock*>::iterator 5805 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 5806 LPadList.push_back(*II); 5807 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 5808 } 5809 } 5810 5811 assert(!LPadList.empty() && 5812 "No landing pad destinations for the dispatch jump table!"); 5813 5814 // Create the jump table and associated information. 5815 MachineJumpTableInfo *JTI = 5816 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 5817 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 5818 unsigned UId = AFI->createJumpTableUId(); 5819 5820 // Create the MBBs for the dispatch code. 5821 5822 // Shove the dispatch's address into the return slot in the function context. 5823 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 5824 DispatchBB->setIsLandingPad(); 5825 5826 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 5827 BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); 5828 DispatchBB->addSuccessor(TrapBB); 5829 5830 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 5831 DispatchBB->addSuccessor(DispContBB); 5832 5833 // Insert and MBBs. 5834 MF->insert(MF->end(), DispatchBB); 5835 MF->insert(MF->end(), DispContBB); 5836 MF->insert(MF->end(), TrapBB); 5837 5838 // Insert code into the entry block that creates and registers the function 5839 // context. 5840 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 5841 5842 MachineMemOperand *FIMMOLd = 5843 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 5844 MachineMemOperand::MOLoad | 5845 MachineMemOperand::MOVolatile, 4, 4); 5846 5847 if (AFI->isThumb1OnlyFunction()) 5848 BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); 5849 else if (!Subtarget->hasVFP2()) 5850 BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); 5851 else 5852 BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 5853 5854 unsigned NumLPads = LPadList.size(); 5855 if (Subtarget->isThumb2()) { 5856 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5857 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 5858 .addFrameIndex(FI) 5859 .addImm(4) 5860 .addMemOperand(FIMMOLd)); 5861 5862 if (NumLPads < 256) { 5863 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 5864 .addReg(NewVReg1) 5865 .addImm(LPadList.size())); 5866 } else { 5867 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5868 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 5869 .addImm(NumLPads & 0xFFFF)); 5870 5871 unsigned VReg2 = VReg1; 5872 if ((NumLPads & 0xFFFF0000) != 0) { 5873 VReg2 = MRI->createVirtualRegister(TRC); 5874 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 5875 .addReg(VReg1) 5876 .addImm(NumLPads >> 16)); 5877 } 5878 5879 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 5880 .addReg(NewVReg1) 5881 .addReg(VReg2)); 5882 } 5883 5884 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 5885 .addMBB(TrapBB) 5886 .addImm(ARMCC::HI) 5887 .addReg(ARM::CPSR); 5888 5889 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5890 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 5891 .addJumpTableIndex(MJTI) 5892 .addImm(UId)); 5893 5894 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5895 AddDefaultCC( 5896 AddDefaultPred( 5897 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 5898 .addReg(NewVReg3, RegState::Kill) 5899 .addReg(NewVReg1) 5900 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 5901 5902 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 5903 .addReg(NewVReg4, RegState::Kill) 5904 .addReg(NewVReg1) 5905 .addJumpTableIndex(MJTI) 5906 .addImm(UId); 5907 } else if (Subtarget->isThumb()) { 5908 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5909 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 5910 .addFrameIndex(FI) 5911 .addImm(1) 5912 .addMemOperand(FIMMOLd)); 5913 5914 if (NumLPads < 256) { 5915 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 5916 .addReg(NewVReg1) 5917 .addImm(NumLPads)); 5918 } else { 5919 MachineConstantPool *ConstantPool = MF->getConstantPool(); 5920 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 5921 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 5922 5923 // MachineConstantPool wants an explicit alignment. 5924 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 5925 if (Align == 0) 5926 Align = getTargetData()->getTypeAllocSize(C->getType()); 5927 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 5928 5929 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5930 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 5931 .addReg(VReg1, RegState::Define) 5932 .addConstantPoolIndex(Idx)); 5933 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 5934 .addReg(NewVReg1) 5935 .addReg(VReg1)); 5936 } 5937 5938 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 5939 .addMBB(TrapBB) 5940 .addImm(ARMCC::HI) 5941 .addReg(ARM::CPSR); 5942 5943 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 5944 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 5945 .addReg(ARM::CPSR, RegState::Define) 5946 .addReg(NewVReg1) 5947 .addImm(2)); 5948 5949 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 5950 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 5951 .addJumpTableIndex(MJTI) 5952 .addImm(UId)); 5953 5954 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 5955 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 5956 .addReg(ARM::CPSR, RegState::Define) 5957 .addReg(NewVReg2, RegState::Kill) 5958 .addReg(NewVReg3)); 5959 5960 MachineMemOperand *JTMMOLd = 5961 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 5962 MachineMemOperand::MOLoad, 4, 4); 5963 5964 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 5965 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 5966 .addReg(NewVReg4, RegState::Kill) 5967 .addImm(0) 5968 .addMemOperand(JTMMOLd)); 5969 5970 unsigned NewVReg6 = MRI->createVirtualRegister(TRC); 5971 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 5972 .addReg(ARM::CPSR, RegState::Define) 5973 .addReg(NewVReg5, RegState::Kill) 5974 .addReg(NewVReg3)); 5975 5976 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 5977 .addReg(NewVReg6, RegState::Kill) 5978 .addJumpTableIndex(MJTI) 5979 .addImm(UId); 5980 } else { 5981 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 5982 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 5983 .addFrameIndex(FI) 5984 .addImm(4) 5985 .addMemOperand(FIMMOLd)); 5986 5987 if (NumLPads < 256) { 5988 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 5989 .addReg(NewVReg1) 5990 .addImm(NumLPads)); 5991 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 5992 unsigned VReg1 = MRI->createVirtualRegister(TRC); 5993 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 5994 .addImm(NumLPads & 0xFFFF)); 5995 5996 unsigned VReg2 = VReg1; 5997 if ((NumLPads & 0xFFFF0000) != 0) { 5998 VReg2 = MRI->createVirtualRegister(TRC); 5999 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 6000 .addReg(VReg1) 6001 .addImm(NumLPads >> 16)); 6002 } 6003 6004 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6005 .addReg(NewVReg1) 6006 .addReg(VReg2)); 6007 } else { 6008 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6009 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6010 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6011 6012 // MachineConstantPool wants an explicit alignment. 6013 unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); 6014 if (Align == 0) 6015 Align = getTargetData()->getTypeAllocSize(C->getType()); 6016 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6017 6018 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6019 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 6020 .addReg(VReg1, RegState::Define) 6021 .addConstantPoolIndex(Idx) 6022 .addImm(0)); 6023 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6024 .addReg(NewVReg1) 6025 .addReg(VReg1, RegState::Kill)); 6026 } 6027 6028 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 6029 .addMBB(TrapBB) 6030 .addImm(ARMCC::HI) 6031 .addReg(ARM::CPSR); 6032 6033 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6034 AddDefaultCC( 6035 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 6036 .addReg(NewVReg1) 6037 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6038 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6039 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 6040 .addJumpTableIndex(MJTI) 6041 .addImm(UId)); 6042 6043 MachineMemOperand *JTMMOLd = 6044 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6045 MachineMemOperand::MOLoad, 4, 4); 6046 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6047 AddDefaultPred( 6048 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 6049 .addReg(NewVReg3, RegState::Kill) 6050 .addReg(NewVReg4) 6051 .addImm(0) 6052 .addMemOperand(JTMMOLd)); 6053 6054 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 6055 .addReg(NewVReg5, RegState::Kill) 6056 .addReg(NewVReg4) 6057 .addJumpTableIndex(MJTI) 6058 .addImm(UId); 6059 } 6060 6061 // Add the jump table entries as successors to the MBB. 6062 MachineBasicBlock *PrevMBB = 0; 6063 for (std::vector<MachineBasicBlock*>::iterator 6064 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 6065 MachineBasicBlock *CurMBB = *I; 6066 if (PrevMBB != CurMBB) 6067 DispContBB->addSuccessor(CurMBB); 6068 PrevMBB = CurMBB; 6069 } 6070 6071 // N.B. the order the invoke BBs are processed in doesn't matter here. 6072 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6073 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6074 const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); 6075 SmallVector<MachineBasicBlock*, 64> MBBLPads; 6076 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 6077 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 6078 MachineBasicBlock *BB = *I; 6079 6080 // Remove the landing pad successor from the invoke block and replace it 6081 // with the new dispatch block. 6082 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 6083 BB->succ_end()); 6084 while (!Successors.empty()) { 6085 MachineBasicBlock *SMBB = Successors.pop_back_val(); 6086 if (SMBB->isLandingPad()) { 6087 BB->removeSuccessor(SMBB); 6088 MBBLPads.push_back(SMBB); 6089 } 6090 } 6091 6092 BB->addSuccessor(DispatchBB); 6093 6094 // Find the invoke call and mark all of the callee-saved registers as 6095 // 'implicit defined' so that they're spilled. This prevents code from 6096 // moving instructions to before the EH block, where they will never be 6097 // executed. 6098 for (MachineBasicBlock::reverse_iterator 6099 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 6100 if (!II->isCall()) continue; 6101 6102 DenseMap<unsigned, bool> DefRegs; 6103 for (MachineInstr::mop_iterator 6104 OI = II->operands_begin(), OE = II->operands_end(); 6105 OI != OE; ++OI) { 6106 if (!OI->isReg()) continue; 6107 DefRegs[OI->getReg()] = true; 6108 } 6109 6110 MachineInstrBuilder MIB(&*II); 6111 6112 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 6113 unsigned Reg = SavedRegs[i]; 6114 if (Subtarget->isThumb2() && 6115 !ARM::tGPRRegisterClass->contains(Reg) && 6116 !ARM::hGPRRegisterClass->contains(Reg)) 6117 continue; 6118 else if (Subtarget->isThumb1Only() && 6119 !ARM::tGPRRegisterClass->contains(Reg)) 6120 continue; 6121 else if (!Subtarget->isThumb() && 6122 !ARM::GPRRegisterClass->contains(Reg)) 6123 continue; 6124 if (!DefRegs[Reg]) 6125 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 6126 } 6127 6128 break; 6129 } 6130 } 6131 6132 // Mark all former landing pads as non-landing pads. The dispatch is the only 6133 // landing pad now. 6134 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6135 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 6136 (*I)->setIsLandingPad(false); 6137 6138 // The instruction is gone now. 6139 MI->eraseFromParent(); 6140 6141 return MBB; 6142} 6143 6144static 6145MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 6146 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 6147 E = MBB->succ_end(); I != E; ++I) 6148 if (*I != Succ) 6149 return *I; 6150 llvm_unreachable("Expecting a BB with two successors!"); 6151} 6152 6153MachineBasicBlock * 6154ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6155 MachineBasicBlock *BB) const { 6156 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6157 DebugLoc dl = MI->getDebugLoc(); 6158 bool isThumb2 = Subtarget->isThumb2(); 6159 switch (MI->getOpcode()) { 6160 default: { 6161 MI->dump(); 6162 llvm_unreachable("Unexpected instr type to insert"); 6163 } 6164 // The Thumb2 pre-indexed stores have the same MI operands, they just 6165 // define them differently in the .td files from the isel patterns, so 6166 // they need pseudos. 6167 case ARM::t2STR_preidx: 6168 MI->setDesc(TII->get(ARM::t2STR_PRE)); 6169 return BB; 6170 case ARM::t2STRB_preidx: 6171 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 6172 return BB; 6173 case ARM::t2STRH_preidx: 6174 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 6175 return BB; 6176 6177 case ARM::STRi_preidx: 6178 case ARM::STRBi_preidx: { 6179 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 6180 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 6181 // Decode the offset. 6182 unsigned Offset = MI->getOperand(4).getImm(); 6183 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 6184 Offset = ARM_AM::getAM2Offset(Offset); 6185 if (isSub) 6186 Offset = -Offset; 6187 6188 MachineMemOperand *MMO = *MI->memoperands_begin(); 6189 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 6190 .addOperand(MI->getOperand(0)) // Rn_wb 6191 .addOperand(MI->getOperand(1)) // Rt 6192 .addOperand(MI->getOperand(2)) // Rn 6193 .addImm(Offset) // offset (skip GPR==zero_reg) 6194 .addOperand(MI->getOperand(5)) // pred 6195 .addOperand(MI->getOperand(6)) 6196 .addMemOperand(MMO); 6197 MI->eraseFromParent(); 6198 return BB; 6199 } 6200 case ARM::STRr_preidx: 6201 case ARM::STRBr_preidx: 6202 case ARM::STRH_preidx: { 6203 unsigned NewOpc; 6204 switch (MI->getOpcode()) { 6205 default: llvm_unreachable("unexpected opcode!"); 6206 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 6207 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 6208 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 6209 } 6210 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 6211 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 6212 MIB.addOperand(MI->getOperand(i)); 6213 MI->eraseFromParent(); 6214 return BB; 6215 } 6216 case ARM::ATOMIC_LOAD_ADD_I8: 6217 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6218 case ARM::ATOMIC_LOAD_ADD_I16: 6219 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6220 case ARM::ATOMIC_LOAD_ADD_I32: 6221 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 6222 6223 case ARM::ATOMIC_LOAD_AND_I8: 6224 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6225 case ARM::ATOMIC_LOAD_AND_I16: 6226 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6227 case ARM::ATOMIC_LOAD_AND_I32: 6228 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6229 6230 case ARM::ATOMIC_LOAD_OR_I8: 6231 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6232 case ARM::ATOMIC_LOAD_OR_I16: 6233 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6234 case ARM::ATOMIC_LOAD_OR_I32: 6235 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6236 6237 case ARM::ATOMIC_LOAD_XOR_I8: 6238 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6239 case ARM::ATOMIC_LOAD_XOR_I16: 6240 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6241 case ARM::ATOMIC_LOAD_XOR_I32: 6242 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6243 6244 case ARM::ATOMIC_LOAD_NAND_I8: 6245 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6246 case ARM::ATOMIC_LOAD_NAND_I16: 6247 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6248 case ARM::ATOMIC_LOAD_NAND_I32: 6249 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 6250 6251 case ARM::ATOMIC_LOAD_SUB_I8: 6252 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6253 case ARM::ATOMIC_LOAD_SUB_I16: 6254 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6255 case ARM::ATOMIC_LOAD_SUB_I32: 6256 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 6257 6258 case ARM::ATOMIC_LOAD_MIN_I8: 6259 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 6260 case ARM::ATOMIC_LOAD_MIN_I16: 6261 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 6262 case ARM::ATOMIC_LOAD_MIN_I32: 6263 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 6264 6265 case ARM::ATOMIC_LOAD_MAX_I8: 6266 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 6267 case ARM::ATOMIC_LOAD_MAX_I16: 6268 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 6269 case ARM::ATOMIC_LOAD_MAX_I32: 6270 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 6271 6272 case ARM::ATOMIC_LOAD_UMIN_I8: 6273 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 6274 case ARM::ATOMIC_LOAD_UMIN_I16: 6275 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 6276 case ARM::ATOMIC_LOAD_UMIN_I32: 6277 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 6278 6279 case ARM::ATOMIC_LOAD_UMAX_I8: 6280 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 6281 case ARM::ATOMIC_LOAD_UMAX_I16: 6282 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 6283 case ARM::ATOMIC_LOAD_UMAX_I32: 6284 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 6285 6286 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 6287 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 6288 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 6289 6290 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 6291 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 6292 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 6293 6294 6295 case ARM::ATOMADD6432: 6296 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 6297 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 6298 /*NeedsCarry*/ true); 6299 case ARM::ATOMSUB6432: 6300 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 6301 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 6302 /*NeedsCarry*/ true); 6303 case ARM::ATOMOR6432: 6304 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 6305 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 6306 case ARM::ATOMXOR6432: 6307 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 6308 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 6309 case ARM::ATOMAND6432: 6310 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 6311 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 6312 case ARM::ATOMSWAP6432: 6313 return EmitAtomicBinary64(MI, BB, 0, 0, false); 6314 case ARM::ATOMCMPXCHG6432: 6315 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 6316 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 6317 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 6318 6319 case ARM::tMOVCCr_pseudo: { 6320 // To "insert" a SELECT_CC instruction, we actually have to insert the 6321 // diamond control-flow pattern. The incoming instruction knows the 6322 // destination vreg to set, the condition code register to branch on, the 6323 // true/false values to select between, and a branch opcode to use. 6324 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6325 MachineFunction::iterator It = BB; 6326 ++It; 6327 6328 // thisMBB: 6329 // ... 6330 // TrueVal = ... 6331 // cmpTY ccX, r1, r2 6332 // bCC copy1MBB 6333 // fallthrough --> copy0MBB 6334 MachineBasicBlock *thisMBB = BB; 6335 MachineFunction *F = BB->getParent(); 6336 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6337 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6338 F->insert(It, copy0MBB); 6339 F->insert(It, sinkMBB); 6340 6341 // Transfer the remainder of BB and its successor edges to sinkMBB. 6342 sinkMBB->splice(sinkMBB->begin(), BB, 6343 llvm::next(MachineBasicBlock::iterator(MI)), 6344 BB->end()); 6345 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6346 6347 BB->addSuccessor(copy0MBB); 6348 BB->addSuccessor(sinkMBB); 6349 6350 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 6351 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 6352 6353 // copy0MBB: 6354 // %FalseValue = ... 6355 // # fallthrough to sinkMBB 6356 BB = copy0MBB; 6357 6358 // Update machine-CFG edges 6359 BB->addSuccessor(sinkMBB); 6360 6361 // sinkMBB: 6362 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6363 // ... 6364 BB = sinkMBB; 6365 BuildMI(*BB, BB->begin(), dl, 6366 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 6367 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 6368 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6369 6370 MI->eraseFromParent(); // The pseudo instruction is gone now. 6371 return BB; 6372 } 6373 6374 case ARM::BCCi64: 6375 case ARM::BCCZi64: { 6376 // If there is an unconditional branch to the other successor, remove it. 6377 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 6378 6379 // Compare both parts that make up the double comparison separately for 6380 // equality. 6381 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 6382 6383 unsigned LHS1 = MI->getOperand(1).getReg(); 6384 unsigned LHS2 = MI->getOperand(2).getReg(); 6385 if (RHSisZero) { 6386 AddDefaultPred(BuildMI(BB, dl, 6387 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6388 .addReg(LHS1).addImm(0)); 6389 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6390 .addReg(LHS2).addImm(0) 6391 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 6392 } else { 6393 unsigned RHS1 = MI->getOperand(3).getReg(); 6394 unsigned RHS2 = MI->getOperand(4).getReg(); 6395 AddDefaultPred(BuildMI(BB, dl, 6396 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6397 .addReg(LHS1).addReg(RHS1)); 6398 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6399 .addReg(LHS2).addReg(RHS2) 6400 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 6401 } 6402 6403 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 6404 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 6405 if (MI->getOperand(0).getImm() == ARMCC::NE) 6406 std::swap(destMBB, exitMBB); 6407 6408 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6409 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 6410 if (isThumb2) 6411 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 6412 else 6413 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 6414 6415 MI->eraseFromParent(); // The pseudo instruction is gone now. 6416 return BB; 6417 } 6418 6419 case ARM::Int_eh_sjlj_setjmp: 6420 case ARM::Int_eh_sjlj_setjmp_nofp: 6421 case ARM::tInt_eh_sjlj_setjmp: 6422 case ARM::t2Int_eh_sjlj_setjmp: 6423 case ARM::t2Int_eh_sjlj_setjmp_nofp: 6424 EmitSjLjDispatchBlock(MI, BB); 6425 return BB; 6426 6427 case ARM::ABS: 6428 case ARM::t2ABS: { 6429 // To insert an ABS instruction, we have to insert the 6430 // diamond control-flow pattern. The incoming instruction knows the 6431 // source vreg to test against 0, the destination vreg to set, 6432 // the condition code register to branch on, the 6433 // true/false values to select between, and a branch opcode to use. 6434 // It transforms 6435 // V1 = ABS V0 6436 // into 6437 // V2 = MOVS V0 6438 // BCC (branch to SinkBB if V0 >= 0) 6439 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 6440 // SinkBB: V1 = PHI(V2, V3) 6441 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6442 MachineFunction::iterator BBI = BB; 6443 ++BBI; 6444 MachineFunction *Fn = BB->getParent(); 6445 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 6446 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 6447 Fn->insert(BBI, RSBBB); 6448 Fn->insert(BBI, SinkBB); 6449 6450 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 6451 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 6452 bool isThumb2 = Subtarget->isThumb2(); 6453 MachineRegisterInfo &MRI = Fn->getRegInfo(); 6454 // In Thumb mode S must not be specified if source register is the SP or 6455 // PC and if destination register is the SP, so restrict register class 6456 unsigned NewMovDstReg = MRI.createVirtualRegister( 6457 isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); 6458 unsigned NewRsbDstReg = MRI.createVirtualRegister( 6459 isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); 6460 6461 // Transfer the remainder of BB and its successor edges to sinkMBB. 6462 SinkBB->splice(SinkBB->begin(), BB, 6463 llvm::next(MachineBasicBlock::iterator(MI)), 6464 BB->end()); 6465 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 6466 6467 BB->addSuccessor(RSBBB); 6468 BB->addSuccessor(SinkBB); 6469 6470 // fall through to SinkMBB 6471 RSBBB->addSuccessor(SinkBB); 6472 6473 // insert a movs at the end of BB 6474 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), 6475 NewMovDstReg) 6476 .addReg(ABSSrcReg, RegState::Kill) 6477 .addImm((unsigned)ARMCC::AL).addReg(0) 6478 .addReg(ARM::CPSR, RegState::Define); 6479 6480 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 6481 BuildMI(BB, dl, 6482 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 6483 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 6484 6485 // insert rsbri in RSBBB 6486 // Note: BCC and rsbri will be converted into predicated rsbmi 6487 // by if-conversion pass 6488 BuildMI(*RSBBB, RSBBB->begin(), dl, 6489 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 6490 .addReg(NewMovDstReg, RegState::Kill) 6491 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 6492 6493 // insert PHI in SinkBB, 6494 // reuse ABSDstReg to not change uses of ABS instruction 6495 BuildMI(*SinkBB, SinkBB->begin(), dl, 6496 TII->get(ARM::PHI), ABSDstReg) 6497 .addReg(NewRsbDstReg).addMBB(RSBBB) 6498 .addReg(NewMovDstReg).addMBB(BB); 6499 6500 // remove ABS instruction 6501 MI->eraseFromParent(); 6502 6503 // return last added BB 6504 return SinkBB; 6505 } 6506 } 6507} 6508 6509void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 6510 SDNode *Node) const { 6511 if (!MI->hasPostISelHook()) { 6512 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 6513 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 6514 return; 6515 } 6516 6517 const MCInstrDesc *MCID = &MI->getDesc(); 6518 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 6519 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 6520 // operand is still set to noreg. If needed, set the optional operand's 6521 // register to CPSR, and remove the redundant implicit def. 6522 // 6523 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 6524 6525 // Rename pseudo opcodes. 6526 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 6527 if (NewOpc) { 6528 const ARMBaseInstrInfo *TII = 6529 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 6530 MCID = &TII->get(NewOpc); 6531 6532 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 6533 "converted opcode should be the same except for cc_out"); 6534 6535 MI->setDesc(*MCID); 6536 6537 // Add the optional cc_out operand 6538 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 6539 } 6540 unsigned ccOutIdx = MCID->getNumOperands() - 1; 6541 6542 // Any ARM instruction that sets the 's' bit should specify an optional 6543 // "cc_out" operand in the last operand position. 6544 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 6545 assert(!NewOpc && "Optional cc_out operand required"); 6546 return; 6547 } 6548 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 6549 // since we already have an optional CPSR def. 6550 bool definesCPSR = false; 6551 bool deadCPSR = false; 6552 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 6553 i != e; ++i) { 6554 const MachineOperand &MO = MI->getOperand(i); 6555 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 6556 definesCPSR = true; 6557 if (MO.isDead()) 6558 deadCPSR = true; 6559 MI->RemoveOperand(i); 6560 break; 6561 } 6562 } 6563 if (!definesCPSR) { 6564 assert(!NewOpc && "Optional cc_out operand required"); 6565 return; 6566 } 6567 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 6568 if (deadCPSR) { 6569 assert(!MI->getOperand(ccOutIdx).getReg() && 6570 "expect uninitialized optional cc_out operand"); 6571 return; 6572 } 6573 6574 // If this instruction was defined with an optional CPSR def and its dag node 6575 // had a live implicit CPSR def, then activate the optional CPSR def. 6576 MachineOperand &MO = MI->getOperand(ccOutIdx); 6577 MO.setReg(ARM::CPSR); 6578 MO.setIsDef(true); 6579} 6580 6581//===----------------------------------------------------------------------===// 6582// ARM Optimization Hooks 6583//===----------------------------------------------------------------------===// 6584 6585static 6586SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 6587 TargetLowering::DAGCombinerInfo &DCI) { 6588 SelectionDAG &DAG = DCI.DAG; 6589 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6590 EVT VT = N->getValueType(0); 6591 unsigned Opc = N->getOpcode(); 6592 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 6593 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 6594 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 6595 ISD::CondCode CC = ISD::SETCC_INVALID; 6596 6597 if (isSlctCC) { 6598 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 6599 } else { 6600 SDValue CCOp = Slct.getOperand(0); 6601 if (CCOp.getOpcode() == ISD::SETCC) 6602 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 6603 } 6604 6605 bool DoXform = false; 6606 bool InvCC = false; 6607 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 6608 "Bad input!"); 6609 6610 if (LHS.getOpcode() == ISD::Constant && 6611 cast<ConstantSDNode>(LHS)->isNullValue()) { 6612 DoXform = true; 6613 } else if (CC != ISD::SETCC_INVALID && 6614 RHS.getOpcode() == ISD::Constant && 6615 cast<ConstantSDNode>(RHS)->isNullValue()) { 6616 std::swap(LHS, RHS); 6617 SDValue Op0 = Slct.getOperand(0); 6618 EVT OpVT = isSlctCC ? Op0.getValueType() : 6619 Op0.getOperand(0).getValueType(); 6620 bool isInt = OpVT.isInteger(); 6621 CC = ISD::getSetCCInverse(CC, isInt); 6622 6623 if (!TLI.isCondCodeLegal(CC, OpVT)) 6624 return SDValue(); // Inverse operator isn't legal. 6625 6626 DoXform = true; 6627 InvCC = true; 6628 } 6629 6630 if (DoXform) { 6631 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 6632 if (isSlctCC) 6633 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 6634 Slct.getOperand(0), Slct.getOperand(1), CC); 6635 SDValue CCOp = Slct.getOperand(0); 6636 if (InvCC) 6637 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 6638 CCOp.getOperand(0), CCOp.getOperand(1), CC); 6639 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 6640 CCOp, OtherOp, Result); 6641 } 6642 return SDValue(); 6643} 6644 6645// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 6646// (only after legalization). 6647static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 6648 TargetLowering::DAGCombinerInfo &DCI, 6649 const ARMSubtarget *Subtarget) { 6650 6651 // Only perform optimization if after legalize, and if NEON is available. We 6652 // also expected both operands to be BUILD_VECTORs. 6653 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 6654 || N0.getOpcode() != ISD::BUILD_VECTOR 6655 || N1.getOpcode() != ISD::BUILD_VECTOR) 6656 return SDValue(); 6657 6658 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 6659 EVT VT = N->getValueType(0); 6660 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 6661 return SDValue(); 6662 6663 // Check that the vector operands are of the right form. 6664 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 6665 // operands, where N is the size of the formed vector. 6666 // Each EXTRACT_VECTOR should have the same input vector and odd or even 6667 // index such that we have a pair wise add pattern. 6668 6669 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 6670 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 6671 return SDValue(); 6672 SDValue Vec = N0->getOperand(0)->getOperand(0); 6673 SDNode *V = Vec.getNode(); 6674 unsigned nextIndex = 0; 6675 6676 // For each operands to the ADD which are BUILD_VECTORs, 6677 // check to see if each of their operands are an EXTRACT_VECTOR with 6678 // the same vector and appropriate index. 6679 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 6680 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 6681 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 6682 6683 SDValue ExtVec0 = N0->getOperand(i); 6684 SDValue ExtVec1 = N1->getOperand(i); 6685 6686 // First operand is the vector, verify its the same. 6687 if (V != ExtVec0->getOperand(0).getNode() || 6688 V != ExtVec1->getOperand(0).getNode()) 6689 return SDValue(); 6690 6691 // Second is the constant, verify its correct. 6692 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 6693 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 6694 6695 // For the constant, we want to see all the even or all the odd. 6696 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 6697 || C1->getZExtValue() != nextIndex+1) 6698 return SDValue(); 6699 6700 // Increment index. 6701 nextIndex+=2; 6702 } else 6703 return SDValue(); 6704 } 6705 6706 // Create VPADDL node. 6707 SelectionDAG &DAG = DCI.DAG; 6708 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6709 6710 // Build operand list. 6711 SmallVector<SDValue, 8> Ops; 6712 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 6713 TLI.getPointerTy())); 6714 6715 // Input is the vector. 6716 Ops.push_back(Vec); 6717 6718 // Get widened type and narrowed type. 6719 MVT widenType; 6720 unsigned numElem = VT.getVectorNumElements(); 6721 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 6722 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 6723 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 6724 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 6725 default: 6726 llvm_unreachable("Invalid vector element type for padd optimization."); 6727 } 6728 6729 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 6730 widenType, &Ops[0], Ops.size()); 6731 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); 6732} 6733 6734/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 6735/// operands N0 and N1. This is a helper for PerformADDCombine that is 6736/// called with the default operands, and if that fails, with commuted 6737/// operands. 6738static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 6739 TargetLowering::DAGCombinerInfo &DCI, 6740 const ARMSubtarget *Subtarget){ 6741 6742 // Attempt to create vpaddl for this add. 6743 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 6744 if (Result.getNode()) 6745 return Result; 6746 6747 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 6748 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 6749 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 6750 if (Result.getNode()) return Result; 6751 } 6752 return SDValue(); 6753} 6754 6755/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 6756/// 6757static SDValue PerformADDCombine(SDNode *N, 6758 TargetLowering::DAGCombinerInfo &DCI, 6759 const ARMSubtarget *Subtarget) { 6760 SDValue N0 = N->getOperand(0); 6761 SDValue N1 = N->getOperand(1); 6762 6763 // First try with the default operand order. 6764 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 6765 if (Result.getNode()) 6766 return Result; 6767 6768 // If that didn't work, try again with the operands commuted. 6769 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 6770} 6771 6772/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 6773/// 6774static SDValue PerformSUBCombine(SDNode *N, 6775 TargetLowering::DAGCombinerInfo &DCI) { 6776 SDValue N0 = N->getOperand(0); 6777 SDValue N1 = N->getOperand(1); 6778 6779 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 6780 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 6781 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 6782 if (Result.getNode()) return Result; 6783 } 6784 6785 return SDValue(); 6786} 6787 6788/// PerformVMULCombine 6789/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 6790/// special multiplier accumulator forwarding. 6791/// vmul d3, d0, d2 6792/// vmla d3, d1, d2 6793/// is faster than 6794/// vadd d3, d0, d1 6795/// vmul d3, d3, d2 6796static SDValue PerformVMULCombine(SDNode *N, 6797 TargetLowering::DAGCombinerInfo &DCI, 6798 const ARMSubtarget *Subtarget) { 6799 if (!Subtarget->hasVMLxForwarding()) 6800 return SDValue(); 6801 6802 SelectionDAG &DAG = DCI.DAG; 6803 SDValue N0 = N->getOperand(0); 6804 SDValue N1 = N->getOperand(1); 6805 unsigned Opcode = N0.getOpcode(); 6806 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 6807 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 6808 Opcode = N1.getOpcode(); 6809 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 6810 Opcode != ISD::FADD && Opcode != ISD::FSUB) 6811 return SDValue(); 6812 std::swap(N0, N1); 6813 } 6814 6815 EVT VT = N->getValueType(0); 6816 DebugLoc DL = N->getDebugLoc(); 6817 SDValue N00 = N0->getOperand(0); 6818 SDValue N01 = N0->getOperand(1); 6819 return DAG.getNode(Opcode, DL, VT, 6820 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 6821 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 6822} 6823 6824static SDValue PerformMULCombine(SDNode *N, 6825 TargetLowering::DAGCombinerInfo &DCI, 6826 const ARMSubtarget *Subtarget) { 6827 SelectionDAG &DAG = DCI.DAG; 6828 6829 if (Subtarget->isThumb1Only()) 6830 return SDValue(); 6831 6832 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 6833 return SDValue(); 6834 6835 EVT VT = N->getValueType(0); 6836 if (VT.is64BitVector() || VT.is128BitVector()) 6837 return PerformVMULCombine(N, DCI, Subtarget); 6838 if (VT != MVT::i32) 6839 return SDValue(); 6840 6841 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6842 if (!C) 6843 return SDValue(); 6844 6845 uint64_t MulAmt = C->getZExtValue(); 6846 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 6847 ShiftAmt = ShiftAmt & (32 - 1); 6848 SDValue V = N->getOperand(0); 6849 DebugLoc DL = N->getDebugLoc(); 6850 6851 SDValue Res; 6852 MulAmt >>= ShiftAmt; 6853 if (isPowerOf2_32(MulAmt - 1)) { 6854 // (mul x, 2^N + 1) => (add (shl x, N), x) 6855 Res = DAG.getNode(ISD::ADD, DL, VT, 6856 V, DAG.getNode(ISD::SHL, DL, VT, 6857 V, DAG.getConstant(Log2_32(MulAmt-1), 6858 MVT::i32))); 6859 } else if (isPowerOf2_32(MulAmt + 1)) { 6860 // (mul x, 2^N - 1) => (sub (shl x, N), x) 6861 Res = DAG.getNode(ISD::SUB, DL, VT, 6862 DAG.getNode(ISD::SHL, DL, VT, 6863 V, DAG.getConstant(Log2_32(MulAmt+1), 6864 MVT::i32)), 6865 V); 6866 } else 6867 return SDValue(); 6868 6869 if (ShiftAmt != 0) 6870 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 6871 DAG.getConstant(ShiftAmt, MVT::i32)); 6872 6873 // Do not add new nodes to DAG combiner worklist. 6874 DCI.CombineTo(N, Res, false); 6875 return SDValue(); 6876} 6877 6878static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { 6879 if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) 6880 return false; 6881 6882 SDValue FalseVal = N.getOperand(0); 6883 ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); 6884 if (!C) 6885 return false; 6886 if (AllOnes) 6887 return C->isAllOnesValue(); 6888 return C->isNullValue(); 6889} 6890 6891/// formConditionalOp - Combine an operation with a conditional move operand 6892/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) 6893/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) 6894static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, 6895 bool Commutable) { 6896 SDValue N0 = N->getOperand(0); 6897 SDValue N1 = N->getOperand(1); 6898 6899 bool isAND = N->getOpcode() == ISD::AND; 6900 bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); 6901 if (!isCand && Commutable) { 6902 isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); 6903 if (isCand) 6904 std::swap(N0, N1); 6905 } 6906 if (!isCand) 6907 return SDValue(); 6908 6909 unsigned Opc = 0; 6910 switch (N->getOpcode()) { 6911 default: llvm_unreachable("Unexpected node"); 6912 case ISD::AND: Opc = ARMISD::CAND; break; 6913 case ISD::OR: Opc = ARMISD::COR; break; 6914 case ISD::XOR: Opc = ARMISD::CXOR; break; 6915 } 6916 return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, 6917 N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), 6918 N1.getOperand(4)); 6919} 6920 6921static SDValue PerformANDCombine(SDNode *N, 6922 TargetLowering::DAGCombinerInfo &DCI, 6923 const ARMSubtarget *Subtarget) { 6924 6925 // Attempt to use immediate-form VBIC 6926 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 6927 DebugLoc dl = N->getDebugLoc(); 6928 EVT VT = N->getValueType(0); 6929 SelectionDAG &DAG = DCI.DAG; 6930 6931 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 6932 return SDValue(); 6933 6934 APInt SplatBits, SplatUndef; 6935 unsigned SplatBitSize; 6936 bool HasAnyUndefs; 6937 if (BVN && 6938 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 6939 if (SplatBitSize <= 64) { 6940 EVT VbicVT; 6941 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 6942 SplatUndef.getZExtValue(), SplatBitSize, 6943 DAG, VbicVT, VT.is128BitVector(), 6944 OtherModImm); 6945 if (Val.getNode()) { 6946 SDValue Input = 6947 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 6948 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 6949 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 6950 } 6951 } 6952 } 6953 6954 if (!Subtarget->isThumb1Only()) { 6955 // (and x, (cmov -1, y, cond)) => (and.cond x, y) 6956 SDValue CAND = formConditionalOp(N, DAG, true); 6957 if (CAND.getNode()) 6958 return CAND; 6959 } 6960 6961 return SDValue(); 6962} 6963 6964/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 6965static SDValue PerformORCombine(SDNode *N, 6966 TargetLowering::DAGCombinerInfo &DCI, 6967 const ARMSubtarget *Subtarget) { 6968 // Attempt to use immediate-form VORR 6969 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 6970 DebugLoc dl = N->getDebugLoc(); 6971 EVT VT = N->getValueType(0); 6972 SelectionDAG &DAG = DCI.DAG; 6973 6974 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 6975 return SDValue(); 6976 6977 APInt SplatBits, SplatUndef; 6978 unsigned SplatBitSize; 6979 bool HasAnyUndefs; 6980 if (BVN && Subtarget->hasNEON() && 6981 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 6982 if (SplatBitSize <= 64) { 6983 EVT VorrVT; 6984 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 6985 SplatUndef.getZExtValue(), SplatBitSize, 6986 DAG, VorrVT, VT.is128BitVector(), 6987 OtherModImm); 6988 if (Val.getNode()) { 6989 SDValue Input = 6990 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 6991 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 6992 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 6993 } 6994 } 6995 } 6996 6997 if (!Subtarget->isThumb1Only()) { 6998 // (or x, (cmov 0, y, cond)) => (or.cond x, y) 6999 SDValue COR = formConditionalOp(N, DAG, true); 7000 if (COR.getNode()) 7001 return COR; 7002 } 7003 7004 SDValue N0 = N->getOperand(0); 7005 if (N0.getOpcode() != ISD::AND) 7006 return SDValue(); 7007 SDValue N1 = N->getOperand(1); 7008 7009 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 7010 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 7011 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 7012 APInt SplatUndef; 7013 unsigned SplatBitSize; 7014 bool HasAnyUndefs; 7015 7016 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 7017 APInt SplatBits0; 7018 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 7019 HasAnyUndefs) && !HasAnyUndefs) { 7020 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 7021 APInt SplatBits1; 7022 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 7023 HasAnyUndefs) && !HasAnyUndefs && 7024 SplatBits0 == ~SplatBits1) { 7025 // Canonicalize the vector type to make instruction selection simpler. 7026 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 7027 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 7028 N0->getOperand(1), N0->getOperand(0), 7029 N1->getOperand(0)); 7030 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 7031 } 7032 } 7033 } 7034 7035 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 7036 // reasonable. 7037 7038 // BFI is only available on V6T2+ 7039 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 7040 return SDValue(); 7041 7042 DebugLoc DL = N->getDebugLoc(); 7043 // 1) or (and A, mask), val => ARMbfi A, val, mask 7044 // iff (val & mask) == val 7045 // 7046 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 7047 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 7048 // && mask == ~mask2 7049 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 7050 // && ~mask == mask2 7051 // (i.e., copy a bitfield value into another bitfield of the same width) 7052 7053 if (VT != MVT::i32) 7054 return SDValue(); 7055 7056 SDValue N00 = N0.getOperand(0); 7057 7058 // The value and the mask need to be constants so we can verify this is 7059 // actually a bitfield set. If the mask is 0xffff, we can do better 7060 // via a movt instruction, so don't use BFI in that case. 7061 SDValue MaskOp = N0.getOperand(1); 7062 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 7063 if (!MaskC) 7064 return SDValue(); 7065 unsigned Mask = MaskC->getZExtValue(); 7066 if (Mask == 0xffff) 7067 return SDValue(); 7068 SDValue Res; 7069 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 7070 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 7071 if (N1C) { 7072 unsigned Val = N1C->getZExtValue(); 7073 if ((Val & ~Mask) != Val) 7074 return SDValue(); 7075 7076 if (ARM::isBitFieldInvertedMask(Mask)) { 7077 Val >>= CountTrailingZeros_32(~Mask); 7078 7079 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 7080 DAG.getConstant(Val, MVT::i32), 7081 DAG.getConstant(Mask, MVT::i32)); 7082 7083 // Do not add new nodes to DAG combiner worklist. 7084 DCI.CombineTo(N, Res, false); 7085 return SDValue(); 7086 } 7087 } else if (N1.getOpcode() == ISD::AND) { 7088 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 7089 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 7090 if (!N11C) 7091 return SDValue(); 7092 unsigned Mask2 = N11C->getZExtValue(); 7093 7094 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 7095 // as is to match. 7096 if (ARM::isBitFieldInvertedMask(Mask) && 7097 (Mask == ~Mask2)) { 7098 // The pack halfword instruction works better for masks that fit it, 7099 // so use that when it's available. 7100 if (Subtarget->hasT2ExtractPack() && 7101 (Mask == 0xffff || Mask == 0xffff0000)) 7102 return SDValue(); 7103 // 2a 7104 unsigned amt = CountTrailingZeros_32(Mask2); 7105 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 7106 DAG.getConstant(amt, MVT::i32)); 7107 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 7108 DAG.getConstant(Mask, MVT::i32)); 7109 // Do not add new nodes to DAG combiner worklist. 7110 DCI.CombineTo(N, Res, false); 7111 return SDValue(); 7112 } else if (ARM::isBitFieldInvertedMask(~Mask) && 7113 (~Mask == Mask2)) { 7114 // The pack halfword instruction works better for masks that fit it, 7115 // so use that when it's available. 7116 if (Subtarget->hasT2ExtractPack() && 7117 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 7118 return SDValue(); 7119 // 2b 7120 unsigned lsb = CountTrailingZeros_32(Mask); 7121 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 7122 DAG.getConstant(lsb, MVT::i32)); 7123 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 7124 DAG.getConstant(Mask2, MVT::i32)); 7125 // Do not add new nodes to DAG combiner worklist. 7126 DCI.CombineTo(N, Res, false); 7127 return SDValue(); 7128 } 7129 } 7130 7131 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 7132 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 7133 ARM::isBitFieldInvertedMask(~Mask)) { 7134 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 7135 // where lsb(mask) == #shamt and masked bits of B are known zero. 7136 SDValue ShAmt = N00.getOperand(1); 7137 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 7138 unsigned LSB = CountTrailingZeros_32(Mask); 7139 if (ShAmtC != LSB) 7140 return SDValue(); 7141 7142 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 7143 DAG.getConstant(~Mask, MVT::i32)); 7144 7145 // Do not add new nodes to DAG combiner worklist. 7146 DCI.CombineTo(N, Res, false); 7147 } 7148 7149 return SDValue(); 7150} 7151 7152static SDValue PerformXORCombine(SDNode *N, 7153 TargetLowering::DAGCombinerInfo &DCI, 7154 const ARMSubtarget *Subtarget) { 7155 EVT VT = N->getValueType(0); 7156 SelectionDAG &DAG = DCI.DAG; 7157 7158 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 7159 return SDValue(); 7160 7161 if (!Subtarget->isThumb1Only()) { 7162 // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) 7163 SDValue CXOR = formConditionalOp(N, DAG, true); 7164 if (CXOR.getNode()) 7165 return CXOR; 7166 } 7167 7168 return SDValue(); 7169} 7170 7171/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 7172/// the bits being cleared by the AND are not demanded by the BFI. 7173static SDValue PerformBFICombine(SDNode *N, 7174 TargetLowering::DAGCombinerInfo &DCI) { 7175 SDValue N1 = N->getOperand(1); 7176 if (N1.getOpcode() == ISD::AND) { 7177 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 7178 if (!N11C) 7179 return SDValue(); 7180 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 7181 unsigned LSB = CountTrailingZeros_32(~InvMask); 7182 unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; 7183 unsigned Mask = (1 << Width)-1; 7184 unsigned Mask2 = N11C->getZExtValue(); 7185 if ((Mask & (~Mask2)) == 0) 7186 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 7187 N->getOperand(0), N1.getOperand(0), 7188 N->getOperand(2)); 7189 } 7190 return SDValue(); 7191} 7192 7193/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 7194/// ARMISD::VMOVRRD. 7195static SDValue PerformVMOVRRDCombine(SDNode *N, 7196 TargetLowering::DAGCombinerInfo &DCI) { 7197 // vmovrrd(vmovdrr x, y) -> x,y 7198 SDValue InDouble = N->getOperand(0); 7199 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 7200 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 7201 7202 // vmovrrd(load f64) -> (load i32), (load i32) 7203 SDNode *InNode = InDouble.getNode(); 7204 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 7205 InNode->getValueType(0) == MVT::f64 && 7206 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 7207 !cast<LoadSDNode>(InNode)->isVolatile()) { 7208 // TODO: Should this be done for non-FrameIndex operands? 7209 LoadSDNode *LD = cast<LoadSDNode>(InNode); 7210 7211 SelectionDAG &DAG = DCI.DAG; 7212 DebugLoc DL = LD->getDebugLoc(); 7213 SDValue BasePtr = LD->getBasePtr(); 7214 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 7215 LD->getPointerInfo(), LD->isVolatile(), 7216 LD->isNonTemporal(), LD->isInvariant(), 7217 LD->getAlignment()); 7218 7219 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 7220 DAG.getConstant(4, MVT::i32)); 7221 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 7222 LD->getPointerInfo(), LD->isVolatile(), 7223 LD->isNonTemporal(), LD->isInvariant(), 7224 std::min(4U, LD->getAlignment() / 2)); 7225 7226 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 7227 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 7228 DCI.RemoveFromWorklist(LD); 7229 DAG.DeleteNode(LD); 7230 return Result; 7231 } 7232 7233 return SDValue(); 7234} 7235 7236/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 7237/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 7238static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 7239 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 7240 SDValue Op0 = N->getOperand(0); 7241 SDValue Op1 = N->getOperand(1); 7242 if (Op0.getOpcode() == ISD::BITCAST) 7243 Op0 = Op0.getOperand(0); 7244 if (Op1.getOpcode() == ISD::BITCAST) 7245 Op1 = Op1.getOperand(0); 7246 if (Op0.getOpcode() == ARMISD::VMOVRRD && 7247 Op0.getNode() == Op1.getNode() && 7248 Op0.getResNo() == 0 && Op1.getResNo() == 1) 7249 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 7250 N->getValueType(0), Op0.getOperand(0)); 7251 return SDValue(); 7252} 7253 7254/// PerformSTORECombine - Target-specific dag combine xforms for 7255/// ISD::STORE. 7256static SDValue PerformSTORECombine(SDNode *N, 7257 TargetLowering::DAGCombinerInfo &DCI) { 7258 // Bitcast an i64 store extracted from a vector to f64. 7259 // Otherwise, the i64 value will be legalized to a pair of i32 values. 7260 StoreSDNode *St = cast<StoreSDNode>(N); 7261 SDValue StVal = St->getValue(); 7262 if (!ISD::isNormalStore(St) || St->isVolatile()) 7263 return SDValue(); 7264 7265 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 7266 StVal.getNode()->hasOneUse() && !St->isVolatile()) { 7267 SelectionDAG &DAG = DCI.DAG; 7268 DebugLoc DL = St->getDebugLoc(); 7269 SDValue BasePtr = St->getBasePtr(); 7270 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 7271 StVal.getNode()->getOperand(0), BasePtr, 7272 St->getPointerInfo(), St->isVolatile(), 7273 St->isNonTemporal(), St->getAlignment()); 7274 7275 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 7276 DAG.getConstant(4, MVT::i32)); 7277 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 7278 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 7279 St->isNonTemporal(), 7280 std::min(4U, St->getAlignment() / 2)); 7281 } 7282 7283 if (StVal.getValueType() != MVT::i64 || 7284 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 7285 return SDValue(); 7286 7287 SelectionDAG &DAG = DCI.DAG; 7288 DebugLoc dl = StVal.getDebugLoc(); 7289 SDValue IntVec = StVal.getOperand(0); 7290 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 7291 IntVec.getValueType().getVectorNumElements()); 7292 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 7293 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 7294 Vec, StVal.getOperand(1)); 7295 dl = N->getDebugLoc(); 7296 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 7297 // Make the DAGCombiner fold the bitcasts. 7298 DCI.AddToWorklist(Vec.getNode()); 7299 DCI.AddToWorklist(ExtElt.getNode()); 7300 DCI.AddToWorklist(V.getNode()); 7301 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 7302 St->getPointerInfo(), St->isVolatile(), 7303 St->isNonTemporal(), St->getAlignment(), 7304 St->getTBAAInfo()); 7305} 7306 7307/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 7308/// are normal, non-volatile loads. If so, it is profitable to bitcast an 7309/// i64 vector to have f64 elements, since the value can then be loaded 7310/// directly into a VFP register. 7311static bool hasNormalLoadOperand(SDNode *N) { 7312 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 7313 for (unsigned i = 0; i < NumElts; ++i) { 7314 SDNode *Elt = N->getOperand(i).getNode(); 7315 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 7316 return true; 7317 } 7318 return false; 7319} 7320 7321/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 7322/// ISD::BUILD_VECTOR. 7323static SDValue PerformBUILD_VECTORCombine(SDNode *N, 7324 TargetLowering::DAGCombinerInfo &DCI){ 7325 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 7326 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 7327 // into a pair of GPRs, which is fine when the value is used as a scalar, 7328 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 7329 SelectionDAG &DAG = DCI.DAG; 7330 if (N->getNumOperands() == 2) { 7331 SDValue RV = PerformVMOVDRRCombine(N, DAG); 7332 if (RV.getNode()) 7333 return RV; 7334 } 7335 7336 // Load i64 elements as f64 values so that type legalization does not split 7337 // them up into i32 values. 7338 EVT VT = N->getValueType(0); 7339 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 7340 return SDValue(); 7341 DebugLoc dl = N->getDebugLoc(); 7342 SmallVector<SDValue, 8> Ops; 7343 unsigned NumElts = VT.getVectorNumElements(); 7344 for (unsigned i = 0; i < NumElts; ++i) { 7345 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 7346 Ops.push_back(V); 7347 // Make the DAGCombiner fold the bitcast. 7348 DCI.AddToWorklist(V.getNode()); 7349 } 7350 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 7351 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 7352 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 7353} 7354 7355/// PerformInsertEltCombine - Target-specific dag combine xforms for 7356/// ISD::INSERT_VECTOR_ELT. 7357static SDValue PerformInsertEltCombine(SDNode *N, 7358 TargetLowering::DAGCombinerInfo &DCI) { 7359 // Bitcast an i64 load inserted into a vector to f64. 7360 // Otherwise, the i64 value will be legalized to a pair of i32 values. 7361 EVT VT = N->getValueType(0); 7362 SDNode *Elt = N->getOperand(1).getNode(); 7363 if (VT.getVectorElementType() != MVT::i64 || 7364 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 7365 return SDValue(); 7366 7367 SelectionDAG &DAG = DCI.DAG; 7368 DebugLoc dl = N->getDebugLoc(); 7369 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 7370 VT.getVectorNumElements()); 7371 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 7372 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 7373 // Make the DAGCombiner fold the bitcasts. 7374 DCI.AddToWorklist(Vec.getNode()); 7375 DCI.AddToWorklist(V.getNode()); 7376 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 7377 Vec, V, N->getOperand(2)); 7378 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 7379} 7380 7381/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 7382/// ISD::VECTOR_SHUFFLE. 7383static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 7384 // The LLVM shufflevector instruction does not require the shuffle mask 7385 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 7386 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 7387 // operands do not match the mask length, they are extended by concatenating 7388 // them with undef vectors. That is probably the right thing for other 7389 // targets, but for NEON it is better to concatenate two double-register 7390 // size vector operands into a single quad-register size vector. Do that 7391 // transformation here: 7392 // shuffle(concat(v1, undef), concat(v2, undef)) -> 7393 // shuffle(concat(v1, v2), undef) 7394 SDValue Op0 = N->getOperand(0); 7395 SDValue Op1 = N->getOperand(1); 7396 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 7397 Op1.getOpcode() != ISD::CONCAT_VECTORS || 7398 Op0.getNumOperands() != 2 || 7399 Op1.getNumOperands() != 2) 7400 return SDValue(); 7401 SDValue Concat0Op1 = Op0.getOperand(1); 7402 SDValue Concat1Op1 = Op1.getOperand(1); 7403 if (Concat0Op1.getOpcode() != ISD::UNDEF || 7404 Concat1Op1.getOpcode() != ISD::UNDEF) 7405 return SDValue(); 7406 // Skip the transformation if any of the types are illegal. 7407 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7408 EVT VT = N->getValueType(0); 7409 if (!TLI.isTypeLegal(VT) || 7410 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 7411 !TLI.isTypeLegal(Concat1Op1.getValueType())) 7412 return SDValue(); 7413 7414 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 7415 Op0.getOperand(0), Op1.getOperand(0)); 7416 // Translate the shuffle mask. 7417 SmallVector<int, 16> NewMask; 7418 unsigned NumElts = VT.getVectorNumElements(); 7419 unsigned HalfElts = NumElts/2; 7420 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7421 for (unsigned n = 0; n < NumElts; ++n) { 7422 int MaskElt = SVN->getMaskElt(n); 7423 int NewElt = -1; 7424 if (MaskElt < (int)HalfElts) 7425 NewElt = MaskElt; 7426 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 7427 NewElt = HalfElts + MaskElt - NumElts; 7428 NewMask.push_back(NewElt); 7429 } 7430 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 7431 DAG.getUNDEF(VT), NewMask.data()); 7432} 7433 7434/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 7435/// NEON load/store intrinsics to merge base address updates. 7436static SDValue CombineBaseUpdate(SDNode *N, 7437 TargetLowering::DAGCombinerInfo &DCI) { 7438 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 7439 return SDValue(); 7440 7441 SelectionDAG &DAG = DCI.DAG; 7442 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 7443 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 7444 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 7445 SDValue Addr = N->getOperand(AddrOpIdx); 7446 7447 // Search for a use of the address operand that is an increment. 7448 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 7449 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 7450 SDNode *User = *UI; 7451 if (User->getOpcode() != ISD::ADD || 7452 UI.getUse().getResNo() != Addr.getResNo()) 7453 continue; 7454 7455 // Check that the add is independent of the load/store. Otherwise, folding 7456 // it would create a cycle. 7457 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 7458 continue; 7459 7460 // Find the new opcode for the updating load/store. 7461 bool isLoad = true; 7462 bool isLaneOp = false; 7463 unsigned NewOpc = 0; 7464 unsigned NumVecs = 0; 7465 if (isIntrinsic) { 7466 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 7467 switch (IntNo) { 7468 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 7469 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 7470 NumVecs = 1; break; 7471 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 7472 NumVecs = 2; break; 7473 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 7474 NumVecs = 3; break; 7475 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 7476 NumVecs = 4; break; 7477 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 7478 NumVecs = 2; isLaneOp = true; break; 7479 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 7480 NumVecs = 3; isLaneOp = true; break; 7481 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 7482 NumVecs = 4; isLaneOp = true; break; 7483 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 7484 NumVecs = 1; isLoad = false; break; 7485 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 7486 NumVecs = 2; isLoad = false; break; 7487 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 7488 NumVecs = 3; isLoad = false; break; 7489 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 7490 NumVecs = 4; isLoad = false; break; 7491 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 7492 NumVecs = 2; isLoad = false; isLaneOp = true; break; 7493 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 7494 NumVecs = 3; isLoad = false; isLaneOp = true; break; 7495 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 7496 NumVecs = 4; isLoad = false; isLaneOp = true; break; 7497 } 7498 } else { 7499 isLaneOp = true; 7500 switch (N->getOpcode()) { 7501 default: llvm_unreachable("unexpected opcode for Neon base update"); 7502 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 7503 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 7504 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 7505 } 7506 } 7507 7508 // Find the size of memory referenced by the load/store. 7509 EVT VecTy; 7510 if (isLoad) 7511 VecTy = N->getValueType(0); 7512 else 7513 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 7514 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 7515 if (isLaneOp) 7516 NumBytes /= VecTy.getVectorNumElements(); 7517 7518 // If the increment is a constant, it must match the memory ref size. 7519 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 7520 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 7521 uint64_t IncVal = CInc->getZExtValue(); 7522 if (IncVal != NumBytes) 7523 continue; 7524 } else if (NumBytes >= 3 * 16) { 7525 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 7526 // separate instructions that make it harder to use a non-constant update. 7527 continue; 7528 } 7529 7530 // Create the new updating load/store node. 7531 EVT Tys[6]; 7532 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 7533 unsigned n; 7534 for (n = 0; n < NumResultVecs; ++n) 7535 Tys[n] = VecTy; 7536 Tys[n++] = MVT::i32; 7537 Tys[n] = MVT::Other; 7538 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 7539 SmallVector<SDValue, 8> Ops; 7540 Ops.push_back(N->getOperand(0)); // incoming chain 7541 Ops.push_back(N->getOperand(AddrOpIdx)); 7542 Ops.push_back(Inc); 7543 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 7544 Ops.push_back(N->getOperand(i)); 7545 } 7546 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 7547 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 7548 Ops.data(), Ops.size(), 7549 MemInt->getMemoryVT(), 7550 MemInt->getMemOperand()); 7551 7552 // Update the uses. 7553 std::vector<SDValue> NewResults; 7554 for (unsigned i = 0; i < NumResultVecs; ++i) { 7555 NewResults.push_back(SDValue(UpdN.getNode(), i)); 7556 } 7557 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 7558 DCI.CombineTo(N, NewResults); 7559 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 7560 7561 break; 7562 } 7563 return SDValue(); 7564} 7565 7566/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 7567/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 7568/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 7569/// return true. 7570static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 7571 SelectionDAG &DAG = DCI.DAG; 7572 EVT VT = N->getValueType(0); 7573 // vldN-dup instructions only support 64-bit vectors for N > 1. 7574 if (!VT.is64BitVector()) 7575 return false; 7576 7577 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 7578 SDNode *VLD = N->getOperand(0).getNode(); 7579 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 7580 return false; 7581 unsigned NumVecs = 0; 7582 unsigned NewOpc = 0; 7583 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 7584 if (IntNo == Intrinsic::arm_neon_vld2lane) { 7585 NumVecs = 2; 7586 NewOpc = ARMISD::VLD2DUP; 7587 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 7588 NumVecs = 3; 7589 NewOpc = ARMISD::VLD3DUP; 7590 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 7591 NumVecs = 4; 7592 NewOpc = ARMISD::VLD4DUP; 7593 } else { 7594 return false; 7595 } 7596 7597 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 7598 // numbers match the load. 7599 unsigned VLDLaneNo = 7600 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 7601 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 7602 UI != UE; ++UI) { 7603 // Ignore uses of the chain result. 7604 if (UI.getUse().getResNo() == NumVecs) 7605 continue; 7606 SDNode *User = *UI; 7607 if (User->getOpcode() != ARMISD::VDUPLANE || 7608 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 7609 return false; 7610 } 7611 7612 // Create the vldN-dup node. 7613 EVT Tys[5]; 7614 unsigned n; 7615 for (n = 0; n < NumVecs; ++n) 7616 Tys[n] = VT; 7617 Tys[n] = MVT::Other; 7618 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 7619 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 7620 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 7621 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 7622 Ops, 2, VLDMemInt->getMemoryVT(), 7623 VLDMemInt->getMemOperand()); 7624 7625 // Update the uses. 7626 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 7627 UI != UE; ++UI) { 7628 unsigned ResNo = UI.getUse().getResNo(); 7629 // Ignore uses of the chain result. 7630 if (ResNo == NumVecs) 7631 continue; 7632 SDNode *User = *UI; 7633 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 7634 } 7635 7636 // Now the vldN-lane intrinsic is dead except for its chain result. 7637 // Update uses of the chain. 7638 std::vector<SDValue> VLDDupResults; 7639 for (unsigned n = 0; n < NumVecs; ++n) 7640 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 7641 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 7642 DCI.CombineTo(VLD, VLDDupResults); 7643 7644 return true; 7645} 7646 7647/// PerformVDUPLANECombine - Target-specific dag combine xforms for 7648/// ARMISD::VDUPLANE. 7649static SDValue PerformVDUPLANECombine(SDNode *N, 7650 TargetLowering::DAGCombinerInfo &DCI) { 7651 SDValue Op = N->getOperand(0); 7652 7653 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 7654 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 7655 if (CombineVLDDUP(N, DCI)) 7656 return SDValue(N, 0); 7657 7658 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 7659 // redundant. Ignore bit_converts for now; element sizes are checked below. 7660 while (Op.getOpcode() == ISD::BITCAST) 7661 Op = Op.getOperand(0); 7662 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 7663 return SDValue(); 7664 7665 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 7666 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 7667 // The canonical VMOV for a zero vector uses a 32-bit element size. 7668 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7669 unsigned EltBits; 7670 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 7671 EltSize = 8; 7672 EVT VT = N->getValueType(0); 7673 if (EltSize > VT.getVectorElementType().getSizeInBits()) 7674 return SDValue(); 7675 7676 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 7677} 7678 7679// isConstVecPow2 - Return true if each vector element is a power of 2, all 7680// elements are the same constant, C, and Log2(C) ranges from 1 to 32. 7681static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 7682{ 7683 integerPart cN; 7684 integerPart c0 = 0; 7685 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 7686 I != E; I++) { 7687 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 7688 if (!C) 7689 return false; 7690 7691 bool isExact; 7692 APFloat APF = C->getValueAPF(); 7693 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 7694 != APFloat::opOK || !isExact) 7695 return false; 7696 7697 c0 = (I == 0) ? cN : c0; 7698 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 7699 return false; 7700 } 7701 C = c0; 7702 return true; 7703} 7704 7705/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 7706/// can replace combinations of VMUL and VCVT (floating-point to integer) 7707/// when the VMUL has a constant operand that is a power of 2. 7708/// 7709/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 7710/// vmul.f32 d16, d17, d16 7711/// vcvt.s32.f32 d16, d16 7712/// becomes: 7713/// vcvt.s32.f32 d16, d16, #3 7714static SDValue PerformVCVTCombine(SDNode *N, 7715 TargetLowering::DAGCombinerInfo &DCI, 7716 const ARMSubtarget *Subtarget) { 7717 SelectionDAG &DAG = DCI.DAG; 7718 SDValue Op = N->getOperand(0); 7719 7720 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 7721 Op.getOpcode() != ISD::FMUL) 7722 return SDValue(); 7723 7724 uint64_t C; 7725 SDValue N0 = Op->getOperand(0); 7726 SDValue ConstVec = Op->getOperand(1); 7727 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 7728 7729 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 7730 !isConstVecPow2(ConstVec, isSigned, C)) 7731 return SDValue(); 7732 7733 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 7734 Intrinsic::arm_neon_vcvtfp2fxu; 7735 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 7736 N->getValueType(0), 7737 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 7738 DAG.getConstant(Log2_64(C), MVT::i32)); 7739} 7740 7741/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 7742/// can replace combinations of VCVT (integer to floating-point) and VDIV 7743/// when the VDIV has a constant operand that is a power of 2. 7744/// 7745/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 7746/// vcvt.f32.s32 d16, d16 7747/// vdiv.f32 d16, d17, d16 7748/// becomes: 7749/// vcvt.f32.s32 d16, d16, #3 7750static SDValue PerformVDIVCombine(SDNode *N, 7751 TargetLowering::DAGCombinerInfo &DCI, 7752 const ARMSubtarget *Subtarget) { 7753 SelectionDAG &DAG = DCI.DAG; 7754 SDValue Op = N->getOperand(0); 7755 unsigned OpOpcode = Op.getNode()->getOpcode(); 7756 7757 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 7758 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 7759 return SDValue(); 7760 7761 uint64_t C; 7762 SDValue ConstVec = N->getOperand(1); 7763 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 7764 7765 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 7766 !isConstVecPow2(ConstVec, isSigned, C)) 7767 return SDValue(); 7768 7769 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 7770 Intrinsic::arm_neon_vcvtfxu2fp; 7771 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 7772 Op.getValueType(), 7773 DAG.getConstant(IntrinsicOpcode, MVT::i32), 7774 Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); 7775} 7776 7777/// Getvshiftimm - Check if this is a valid build_vector for the immediate 7778/// operand of a vector shift operation, where all the elements of the 7779/// build_vector must have the same constant integer value. 7780static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 7781 // Ignore bit_converts. 7782 while (Op.getOpcode() == ISD::BITCAST) 7783 Op = Op.getOperand(0); 7784 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 7785 APInt SplatBits, SplatUndef; 7786 unsigned SplatBitSize; 7787 bool HasAnyUndefs; 7788 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 7789 HasAnyUndefs, ElementBits) || 7790 SplatBitSize > ElementBits) 7791 return false; 7792 Cnt = SplatBits.getSExtValue(); 7793 return true; 7794} 7795 7796/// isVShiftLImm - Check if this is a valid build_vector for the immediate 7797/// operand of a vector shift left operation. That value must be in the range: 7798/// 0 <= Value < ElementBits for a left shift; or 7799/// 0 <= Value <= ElementBits for a long left shift. 7800static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 7801 assert(VT.isVector() && "vector shift count is not a vector type"); 7802 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 7803 if (! getVShiftImm(Op, ElementBits, Cnt)) 7804 return false; 7805 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 7806} 7807 7808/// isVShiftRImm - Check if this is a valid build_vector for the immediate 7809/// operand of a vector shift right operation. For a shift opcode, the value 7810/// is positive, but for an intrinsic the value count must be negative. The 7811/// absolute value must be in the range: 7812/// 1 <= |Value| <= ElementBits for a right shift; or 7813/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 7814static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 7815 int64_t &Cnt) { 7816 assert(VT.isVector() && "vector shift count is not a vector type"); 7817 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 7818 if (! getVShiftImm(Op, ElementBits, Cnt)) 7819 return false; 7820 if (isIntrinsic) 7821 Cnt = -Cnt; 7822 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 7823} 7824 7825/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 7826static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 7827 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 7828 switch (IntNo) { 7829 default: 7830 // Don't do anything for most intrinsics. 7831 break; 7832 7833 // Vector shifts: check for immediate versions and lower them. 7834 // Note: This is done during DAG combining instead of DAG legalizing because 7835 // the build_vectors for 64-bit vector element shift counts are generally 7836 // not legal, and it is hard to see their values after they get legalized to 7837 // loads from a constant pool. 7838 case Intrinsic::arm_neon_vshifts: 7839 case Intrinsic::arm_neon_vshiftu: 7840 case Intrinsic::arm_neon_vshiftls: 7841 case Intrinsic::arm_neon_vshiftlu: 7842 case Intrinsic::arm_neon_vshiftn: 7843 case Intrinsic::arm_neon_vrshifts: 7844 case Intrinsic::arm_neon_vrshiftu: 7845 case Intrinsic::arm_neon_vrshiftn: 7846 case Intrinsic::arm_neon_vqshifts: 7847 case Intrinsic::arm_neon_vqshiftu: 7848 case Intrinsic::arm_neon_vqshiftsu: 7849 case Intrinsic::arm_neon_vqshiftns: 7850 case Intrinsic::arm_neon_vqshiftnu: 7851 case Intrinsic::arm_neon_vqshiftnsu: 7852 case Intrinsic::arm_neon_vqrshiftns: 7853 case Intrinsic::arm_neon_vqrshiftnu: 7854 case Intrinsic::arm_neon_vqrshiftnsu: { 7855 EVT VT = N->getOperand(1).getValueType(); 7856 int64_t Cnt; 7857 unsigned VShiftOpc = 0; 7858 7859 switch (IntNo) { 7860 case Intrinsic::arm_neon_vshifts: 7861 case Intrinsic::arm_neon_vshiftu: 7862 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 7863 VShiftOpc = ARMISD::VSHL; 7864 break; 7865 } 7866 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 7867 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 7868 ARMISD::VSHRs : ARMISD::VSHRu); 7869 break; 7870 } 7871 return SDValue(); 7872 7873 case Intrinsic::arm_neon_vshiftls: 7874 case Intrinsic::arm_neon_vshiftlu: 7875 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 7876 break; 7877 llvm_unreachable("invalid shift count for vshll intrinsic"); 7878 7879 case Intrinsic::arm_neon_vrshifts: 7880 case Intrinsic::arm_neon_vrshiftu: 7881 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 7882 break; 7883 return SDValue(); 7884 7885 case Intrinsic::arm_neon_vqshifts: 7886 case Intrinsic::arm_neon_vqshiftu: 7887 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 7888 break; 7889 return SDValue(); 7890 7891 case Intrinsic::arm_neon_vqshiftsu: 7892 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 7893 break; 7894 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 7895 7896 case Intrinsic::arm_neon_vshiftn: 7897 case Intrinsic::arm_neon_vrshiftn: 7898 case Intrinsic::arm_neon_vqshiftns: 7899 case Intrinsic::arm_neon_vqshiftnu: 7900 case Intrinsic::arm_neon_vqshiftnsu: 7901 case Intrinsic::arm_neon_vqrshiftns: 7902 case Intrinsic::arm_neon_vqrshiftnu: 7903 case Intrinsic::arm_neon_vqrshiftnsu: 7904 // Narrowing shifts require an immediate right shift. 7905 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 7906 break; 7907 llvm_unreachable("invalid shift count for narrowing vector shift " 7908 "intrinsic"); 7909 7910 default: 7911 llvm_unreachable("unhandled vector shift"); 7912 } 7913 7914 switch (IntNo) { 7915 case Intrinsic::arm_neon_vshifts: 7916 case Intrinsic::arm_neon_vshiftu: 7917 // Opcode already set above. 7918 break; 7919 case Intrinsic::arm_neon_vshiftls: 7920 case Intrinsic::arm_neon_vshiftlu: 7921 if (Cnt == VT.getVectorElementType().getSizeInBits()) 7922 VShiftOpc = ARMISD::VSHLLi; 7923 else 7924 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 7925 ARMISD::VSHLLs : ARMISD::VSHLLu); 7926 break; 7927 case Intrinsic::arm_neon_vshiftn: 7928 VShiftOpc = ARMISD::VSHRN; break; 7929 case Intrinsic::arm_neon_vrshifts: 7930 VShiftOpc = ARMISD::VRSHRs; break; 7931 case Intrinsic::arm_neon_vrshiftu: 7932 VShiftOpc = ARMISD::VRSHRu; break; 7933 case Intrinsic::arm_neon_vrshiftn: 7934 VShiftOpc = ARMISD::VRSHRN; break; 7935 case Intrinsic::arm_neon_vqshifts: 7936 VShiftOpc = ARMISD::VQSHLs; break; 7937 case Intrinsic::arm_neon_vqshiftu: 7938 VShiftOpc = ARMISD::VQSHLu; break; 7939 case Intrinsic::arm_neon_vqshiftsu: 7940 VShiftOpc = ARMISD::VQSHLsu; break; 7941 case Intrinsic::arm_neon_vqshiftns: 7942 VShiftOpc = ARMISD::VQSHRNs; break; 7943 case Intrinsic::arm_neon_vqshiftnu: 7944 VShiftOpc = ARMISD::VQSHRNu; break; 7945 case Intrinsic::arm_neon_vqshiftnsu: 7946 VShiftOpc = ARMISD::VQSHRNsu; break; 7947 case Intrinsic::arm_neon_vqrshiftns: 7948 VShiftOpc = ARMISD::VQRSHRNs; break; 7949 case Intrinsic::arm_neon_vqrshiftnu: 7950 VShiftOpc = ARMISD::VQRSHRNu; break; 7951 case Intrinsic::arm_neon_vqrshiftnsu: 7952 VShiftOpc = ARMISD::VQRSHRNsu; break; 7953 } 7954 7955 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 7956 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 7957 } 7958 7959 case Intrinsic::arm_neon_vshiftins: { 7960 EVT VT = N->getOperand(1).getValueType(); 7961 int64_t Cnt; 7962 unsigned VShiftOpc = 0; 7963 7964 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 7965 VShiftOpc = ARMISD::VSLI; 7966 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 7967 VShiftOpc = ARMISD::VSRI; 7968 else { 7969 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 7970 } 7971 7972 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 7973 N->getOperand(1), N->getOperand(2), 7974 DAG.getConstant(Cnt, MVT::i32)); 7975 } 7976 7977 case Intrinsic::arm_neon_vqrshifts: 7978 case Intrinsic::arm_neon_vqrshiftu: 7979 // No immediate versions of these to check for. 7980 break; 7981 } 7982 7983 return SDValue(); 7984} 7985 7986/// PerformShiftCombine - Checks for immediate versions of vector shifts and 7987/// lowers them. As with the vector shift intrinsics, this is done during DAG 7988/// combining instead of DAG legalizing because the build_vectors for 64-bit 7989/// vector element shift counts are generally not legal, and it is hard to see 7990/// their values after they get legalized to loads from a constant pool. 7991static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 7992 const ARMSubtarget *ST) { 7993 EVT VT = N->getValueType(0); 7994 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 7995 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 7996 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 7997 SDValue N1 = N->getOperand(1); 7998 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 7999 SDValue N0 = N->getOperand(0); 8000 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 8001 DAG.MaskedValueIsZero(N0.getOperand(0), 8002 APInt::getHighBitsSet(32, 16))) 8003 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); 8004 } 8005 } 8006 8007 // Nothing to be done for scalar shifts. 8008 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8009 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 8010 return SDValue(); 8011 8012 assert(ST->hasNEON() && "unexpected vector shift"); 8013 int64_t Cnt; 8014 8015 switch (N->getOpcode()) { 8016 default: llvm_unreachable("unexpected shift opcode"); 8017 8018 case ISD::SHL: 8019 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 8020 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 8021 DAG.getConstant(Cnt, MVT::i32)); 8022 break; 8023 8024 case ISD::SRA: 8025 case ISD::SRL: 8026 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 8027 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 8028 ARMISD::VSHRs : ARMISD::VSHRu); 8029 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 8030 DAG.getConstant(Cnt, MVT::i32)); 8031 } 8032 } 8033 return SDValue(); 8034} 8035 8036/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 8037/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 8038static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 8039 const ARMSubtarget *ST) { 8040 SDValue N0 = N->getOperand(0); 8041 8042 // Check for sign- and zero-extensions of vector extract operations of 8- 8043 // and 16-bit vector elements. NEON supports these directly. They are 8044 // handled during DAG combining because type legalization will promote them 8045 // to 32-bit types and it is messy to recognize the operations after that. 8046 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 8047 SDValue Vec = N0.getOperand(0); 8048 SDValue Lane = N0.getOperand(1); 8049 EVT VT = N->getValueType(0); 8050 EVT EltVT = N0.getValueType(); 8051 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8052 8053 if (VT == MVT::i32 && 8054 (EltVT == MVT::i8 || EltVT == MVT::i16) && 8055 TLI.isTypeLegal(Vec.getValueType()) && 8056 isa<ConstantSDNode>(Lane)) { 8057 8058 unsigned Opc = 0; 8059 switch (N->getOpcode()) { 8060 default: llvm_unreachable("unexpected opcode"); 8061 case ISD::SIGN_EXTEND: 8062 Opc = ARMISD::VGETLANEs; 8063 break; 8064 case ISD::ZERO_EXTEND: 8065 case ISD::ANY_EXTEND: 8066 Opc = ARMISD::VGETLANEu; 8067 break; 8068 } 8069 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 8070 } 8071 } 8072 8073 return SDValue(); 8074} 8075 8076/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 8077/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 8078static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 8079 const ARMSubtarget *ST) { 8080 // If the target supports NEON, try to use vmax/vmin instructions for f32 8081 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 8082 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 8083 // a NaN; only do the transformation when it matches that behavior. 8084 8085 // For now only do this when using NEON for FP operations; if using VFP, it 8086 // is not obvious that the benefit outweighs the cost of switching to the 8087 // NEON pipeline. 8088 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 8089 N->getValueType(0) != MVT::f32) 8090 return SDValue(); 8091 8092 SDValue CondLHS = N->getOperand(0); 8093 SDValue CondRHS = N->getOperand(1); 8094 SDValue LHS = N->getOperand(2); 8095 SDValue RHS = N->getOperand(3); 8096 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 8097 8098 unsigned Opcode = 0; 8099 bool IsReversed; 8100 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 8101 IsReversed = false; // x CC y ? x : y 8102 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 8103 IsReversed = true ; // x CC y ? y : x 8104 } else { 8105 return SDValue(); 8106 } 8107 8108 bool IsUnordered; 8109 switch (CC) { 8110 default: break; 8111 case ISD::SETOLT: 8112 case ISD::SETOLE: 8113 case ISD::SETLT: 8114 case ISD::SETLE: 8115 case ISD::SETULT: 8116 case ISD::SETULE: 8117 // If LHS is NaN, an ordered comparison will be false and the result will 8118 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 8119 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 8120 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 8121 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 8122 break; 8123 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 8124 // will return -0, so vmin can only be used for unsafe math or if one of 8125 // the operands is known to be nonzero. 8126 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 8127 !DAG.getTarget().Options.UnsafeFPMath && 8128 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 8129 break; 8130 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 8131 break; 8132 8133 case ISD::SETOGT: 8134 case ISD::SETOGE: 8135 case ISD::SETGT: 8136 case ISD::SETGE: 8137 case ISD::SETUGT: 8138 case ISD::SETUGE: 8139 // If LHS is NaN, an ordered comparison will be false and the result will 8140 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 8141 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 8142 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 8143 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 8144 break; 8145 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 8146 // will return +0, so vmax can only be used for unsafe math or if one of 8147 // the operands is known to be nonzero. 8148 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 8149 !DAG.getTarget().Options.UnsafeFPMath && 8150 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 8151 break; 8152 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 8153 break; 8154 } 8155 8156 if (!Opcode) 8157 return SDValue(); 8158 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 8159} 8160 8161/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 8162SDValue 8163ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 8164 SDValue Cmp = N->getOperand(4); 8165 if (Cmp.getOpcode() != ARMISD::CMPZ) 8166 // Only looking at EQ and NE cases. 8167 return SDValue(); 8168 8169 EVT VT = N->getValueType(0); 8170 DebugLoc dl = N->getDebugLoc(); 8171 SDValue LHS = Cmp.getOperand(0); 8172 SDValue RHS = Cmp.getOperand(1); 8173 SDValue FalseVal = N->getOperand(0); 8174 SDValue TrueVal = N->getOperand(1); 8175 SDValue ARMcc = N->getOperand(2); 8176 ARMCC::CondCodes CC = 8177 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 8178 8179 // Simplify 8180 // mov r1, r0 8181 // cmp r1, x 8182 // mov r0, y 8183 // moveq r0, x 8184 // to 8185 // cmp r0, x 8186 // movne r0, y 8187 // 8188 // mov r1, r0 8189 // cmp r1, x 8190 // mov r0, x 8191 // movne r0, y 8192 // to 8193 // cmp r0, x 8194 // movne r0, y 8195 /// FIXME: Turn this into a target neutral optimization? 8196 SDValue Res; 8197 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 8198 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 8199 N->getOperand(3), Cmp); 8200 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 8201 SDValue ARMcc; 8202 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 8203 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 8204 N->getOperand(3), NewCmp); 8205 } 8206 8207 if (Res.getNode()) { 8208 APInt KnownZero, KnownOne; 8209 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 8210 DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); 8211 // Capture demanded bits information that would be otherwise lost. 8212 if (KnownZero == 0xfffffffe) 8213 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8214 DAG.getValueType(MVT::i1)); 8215 else if (KnownZero == 0xffffff00) 8216 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8217 DAG.getValueType(MVT::i8)); 8218 else if (KnownZero == 0xffff0000) 8219 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 8220 DAG.getValueType(MVT::i16)); 8221 } 8222 8223 return Res; 8224} 8225 8226SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 8227 DAGCombinerInfo &DCI) const { 8228 switch (N->getOpcode()) { 8229 default: break; 8230 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 8231 case ISD::SUB: return PerformSUBCombine(N, DCI); 8232 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 8233 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 8234 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 8235 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 8236 case ARMISD::BFI: return PerformBFICombine(N, DCI); 8237 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 8238 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 8239 case ISD::STORE: return PerformSTORECombine(N, DCI); 8240 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 8241 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 8242 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 8243 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 8244 case ISD::FP_TO_SINT: 8245 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 8246 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 8247 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 8248 case ISD::SHL: 8249 case ISD::SRA: 8250 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 8251 case ISD::SIGN_EXTEND: 8252 case ISD::ZERO_EXTEND: 8253 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 8254 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 8255 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 8256 case ARMISD::VLD2DUP: 8257 case ARMISD::VLD3DUP: 8258 case ARMISD::VLD4DUP: 8259 return CombineBaseUpdate(N, DCI); 8260 case ISD::INTRINSIC_VOID: 8261 case ISD::INTRINSIC_W_CHAIN: 8262 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8263 case Intrinsic::arm_neon_vld1: 8264 case Intrinsic::arm_neon_vld2: 8265 case Intrinsic::arm_neon_vld3: 8266 case Intrinsic::arm_neon_vld4: 8267 case Intrinsic::arm_neon_vld2lane: 8268 case Intrinsic::arm_neon_vld3lane: 8269 case Intrinsic::arm_neon_vld4lane: 8270 case Intrinsic::arm_neon_vst1: 8271 case Intrinsic::arm_neon_vst2: 8272 case Intrinsic::arm_neon_vst3: 8273 case Intrinsic::arm_neon_vst4: 8274 case Intrinsic::arm_neon_vst2lane: 8275 case Intrinsic::arm_neon_vst3lane: 8276 case Intrinsic::arm_neon_vst4lane: 8277 return CombineBaseUpdate(N, DCI); 8278 default: break; 8279 } 8280 break; 8281 } 8282 return SDValue(); 8283} 8284 8285bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 8286 EVT VT) const { 8287 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 8288} 8289 8290bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 8291 if (!Subtarget->allowsUnalignedMem()) 8292 return false; 8293 8294 switch (VT.getSimpleVT().SimpleTy) { 8295 default: 8296 return false; 8297 case MVT::i8: 8298 case MVT::i16: 8299 case MVT::i32: 8300 return true; 8301 // FIXME: VLD1 etc with standard alignment is legal. 8302 } 8303} 8304 8305static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 8306 unsigned AlignCheck) { 8307 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 8308 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 8309} 8310 8311EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 8312 unsigned DstAlign, unsigned SrcAlign, 8313 bool IsZeroVal, 8314 bool MemcpyStrSrc, 8315 MachineFunction &MF) const { 8316 const Function *F = MF.getFunction(); 8317 8318 // See if we can use NEON instructions for this... 8319 if (IsZeroVal && 8320 !F->hasFnAttr(Attribute::NoImplicitFloat) && 8321 Subtarget->hasNEON()) { 8322 if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { 8323 return MVT::v4i32; 8324 } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { 8325 return MVT::v2i32; 8326 } 8327 } 8328 8329 // Lowering to i32/i16 if the size permits. 8330 if (Size >= 4) { 8331 return MVT::i32; 8332 } else if (Size >= 2) { 8333 return MVT::i16; 8334 } 8335 8336 // Let the target-independent logic figure it out. 8337 return MVT::Other; 8338} 8339 8340static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 8341 if (V < 0) 8342 return false; 8343 8344 unsigned Scale = 1; 8345 switch (VT.getSimpleVT().SimpleTy) { 8346 default: return false; 8347 case MVT::i1: 8348 case MVT::i8: 8349 // Scale == 1; 8350 break; 8351 case MVT::i16: 8352 // Scale == 2; 8353 Scale = 2; 8354 break; 8355 case MVT::i32: 8356 // Scale == 4; 8357 Scale = 4; 8358 break; 8359 } 8360 8361 if ((V & (Scale - 1)) != 0) 8362 return false; 8363 V /= Scale; 8364 return V == (V & ((1LL << 5) - 1)); 8365} 8366 8367static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 8368 const ARMSubtarget *Subtarget) { 8369 bool isNeg = false; 8370 if (V < 0) { 8371 isNeg = true; 8372 V = - V; 8373 } 8374 8375 switch (VT.getSimpleVT().SimpleTy) { 8376 default: return false; 8377 case MVT::i1: 8378 case MVT::i8: 8379 case MVT::i16: 8380 case MVT::i32: 8381 // + imm12 or - imm8 8382 if (isNeg) 8383 return V == (V & ((1LL << 8) - 1)); 8384 return V == (V & ((1LL << 12) - 1)); 8385 case MVT::f32: 8386 case MVT::f64: 8387 // Same as ARM mode. FIXME: NEON? 8388 if (!Subtarget->hasVFP2()) 8389 return false; 8390 if ((V & 3) != 0) 8391 return false; 8392 V >>= 2; 8393 return V == (V & ((1LL << 8) - 1)); 8394 } 8395} 8396 8397/// isLegalAddressImmediate - Return true if the integer value can be used 8398/// as the offset of the target addressing mode for load / store of the 8399/// given type. 8400static bool isLegalAddressImmediate(int64_t V, EVT VT, 8401 const ARMSubtarget *Subtarget) { 8402 if (V == 0) 8403 return true; 8404 8405 if (!VT.isSimple()) 8406 return false; 8407 8408 if (Subtarget->isThumb1Only()) 8409 return isLegalT1AddressImmediate(V, VT); 8410 else if (Subtarget->isThumb2()) 8411 return isLegalT2AddressImmediate(V, VT, Subtarget); 8412 8413 // ARM mode. 8414 if (V < 0) 8415 V = - V; 8416 switch (VT.getSimpleVT().SimpleTy) { 8417 default: return false; 8418 case MVT::i1: 8419 case MVT::i8: 8420 case MVT::i32: 8421 // +- imm12 8422 return V == (V & ((1LL << 12) - 1)); 8423 case MVT::i16: 8424 // +- imm8 8425 return V == (V & ((1LL << 8) - 1)); 8426 case MVT::f32: 8427 case MVT::f64: 8428 if (!Subtarget->hasVFP2()) // FIXME: NEON? 8429 return false; 8430 if ((V & 3) != 0) 8431 return false; 8432 V >>= 2; 8433 return V == (V & ((1LL << 8) - 1)); 8434 } 8435} 8436 8437bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 8438 EVT VT) const { 8439 int Scale = AM.Scale; 8440 if (Scale < 0) 8441 return false; 8442 8443 switch (VT.getSimpleVT().SimpleTy) { 8444 default: return false; 8445 case MVT::i1: 8446 case MVT::i8: 8447 case MVT::i16: 8448 case MVT::i32: 8449 if (Scale == 1) 8450 return true; 8451 // r + r << imm 8452 Scale = Scale & ~1; 8453 return Scale == 2 || Scale == 4 || Scale == 8; 8454 case MVT::i64: 8455 // r + r 8456 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 8457 return true; 8458 return false; 8459 case MVT::isVoid: 8460 // Note, we allow "void" uses (basically, uses that aren't loads or 8461 // stores), because arm allows folding a scale into many arithmetic 8462 // operations. This should be made more precise and revisited later. 8463 8464 // Allow r << imm, but the imm has to be a multiple of two. 8465 if (Scale & 1) return false; 8466 return isPowerOf2_32(Scale); 8467 } 8468} 8469 8470/// isLegalAddressingMode - Return true if the addressing mode represented 8471/// by AM is legal for this target, for a load/store of the specified type. 8472bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 8473 Type *Ty) const { 8474 EVT VT = getValueType(Ty, true); 8475 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 8476 return false; 8477 8478 // Can never fold addr of global into load/store. 8479 if (AM.BaseGV) 8480 return false; 8481 8482 switch (AM.Scale) { 8483 case 0: // no scale reg, must be "r+i" or "r", or "i". 8484 break; 8485 case 1: 8486 if (Subtarget->isThumb1Only()) 8487 return false; 8488 // FALL THROUGH. 8489 default: 8490 // ARM doesn't support any R+R*scale+imm addr modes. 8491 if (AM.BaseOffs) 8492 return false; 8493 8494 if (!VT.isSimple()) 8495 return false; 8496 8497 if (Subtarget->isThumb2()) 8498 return isLegalT2ScaledAddressingMode(AM, VT); 8499 8500 int Scale = AM.Scale; 8501 switch (VT.getSimpleVT().SimpleTy) { 8502 default: return false; 8503 case MVT::i1: 8504 case MVT::i8: 8505 case MVT::i32: 8506 if (Scale < 0) Scale = -Scale; 8507 if (Scale == 1) 8508 return true; 8509 // r + r << imm 8510 return isPowerOf2_32(Scale & ~1); 8511 case MVT::i16: 8512 case MVT::i64: 8513 // r + r 8514 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 8515 return true; 8516 return false; 8517 8518 case MVT::isVoid: 8519 // Note, we allow "void" uses (basically, uses that aren't loads or 8520 // stores), because arm allows folding a scale into many arithmetic 8521 // operations. This should be made more precise and revisited later. 8522 8523 // Allow r << imm, but the imm has to be a multiple of two. 8524 if (Scale & 1) return false; 8525 return isPowerOf2_32(Scale); 8526 } 8527 } 8528 return true; 8529} 8530 8531/// isLegalICmpImmediate - Return true if the specified immediate is legal 8532/// icmp immediate, that is the target has icmp instructions which can compare 8533/// a register against the immediate without having to materialize the 8534/// immediate into a register. 8535bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 8536 if (!Subtarget->isThumb()) 8537 return ARM_AM::getSOImmVal(Imm) != -1; 8538 if (Subtarget->isThumb2()) 8539 return ARM_AM::getT2SOImmVal(Imm) != -1; 8540 return Imm >= 0 && Imm <= 255; 8541} 8542 8543/// isLegalAddImmediate - Return true if the specified immediate is legal 8544/// add immediate, that is the target has add instructions which can add 8545/// a register with the immediate without having to materialize the 8546/// immediate into a register. 8547bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 8548 return ARM_AM::getSOImmVal(Imm) != -1; 8549} 8550 8551static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 8552 bool isSEXTLoad, SDValue &Base, 8553 SDValue &Offset, bool &isInc, 8554 SelectionDAG &DAG) { 8555 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 8556 return false; 8557 8558 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 8559 // AddressingMode 3 8560 Base = Ptr->getOperand(0); 8561 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 8562 int RHSC = (int)RHS->getZExtValue(); 8563 if (RHSC < 0 && RHSC > -256) { 8564 assert(Ptr->getOpcode() == ISD::ADD); 8565 isInc = false; 8566 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 8567 return true; 8568 } 8569 } 8570 isInc = (Ptr->getOpcode() == ISD::ADD); 8571 Offset = Ptr->getOperand(1); 8572 return true; 8573 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 8574 // AddressingMode 2 8575 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 8576 int RHSC = (int)RHS->getZExtValue(); 8577 if (RHSC < 0 && RHSC > -0x1000) { 8578 assert(Ptr->getOpcode() == ISD::ADD); 8579 isInc = false; 8580 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 8581 Base = Ptr->getOperand(0); 8582 return true; 8583 } 8584 } 8585 8586 if (Ptr->getOpcode() == ISD::ADD) { 8587 isInc = true; 8588 ARM_AM::ShiftOpc ShOpcVal= 8589 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 8590 if (ShOpcVal != ARM_AM::no_shift) { 8591 Base = Ptr->getOperand(1); 8592 Offset = Ptr->getOperand(0); 8593 } else { 8594 Base = Ptr->getOperand(0); 8595 Offset = Ptr->getOperand(1); 8596 } 8597 return true; 8598 } 8599 8600 isInc = (Ptr->getOpcode() == ISD::ADD); 8601 Base = Ptr->getOperand(0); 8602 Offset = Ptr->getOperand(1); 8603 return true; 8604 } 8605 8606 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 8607 return false; 8608} 8609 8610static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 8611 bool isSEXTLoad, SDValue &Base, 8612 SDValue &Offset, bool &isInc, 8613 SelectionDAG &DAG) { 8614 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 8615 return false; 8616 8617 Base = Ptr->getOperand(0); 8618 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 8619 int RHSC = (int)RHS->getZExtValue(); 8620 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 8621 assert(Ptr->getOpcode() == ISD::ADD); 8622 isInc = false; 8623 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 8624 return true; 8625 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 8626 isInc = Ptr->getOpcode() == ISD::ADD; 8627 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 8628 return true; 8629 } 8630 } 8631 8632 return false; 8633} 8634 8635/// getPreIndexedAddressParts - returns true by value, base pointer and 8636/// offset pointer and addressing mode by reference if the node's address 8637/// can be legally represented as pre-indexed load / store address. 8638bool 8639ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 8640 SDValue &Offset, 8641 ISD::MemIndexedMode &AM, 8642 SelectionDAG &DAG) const { 8643 if (Subtarget->isThumb1Only()) 8644 return false; 8645 8646 EVT VT; 8647 SDValue Ptr; 8648 bool isSEXTLoad = false; 8649 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8650 Ptr = LD->getBasePtr(); 8651 VT = LD->getMemoryVT(); 8652 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 8653 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8654 Ptr = ST->getBasePtr(); 8655 VT = ST->getMemoryVT(); 8656 } else 8657 return false; 8658 8659 bool isInc; 8660 bool isLegal = false; 8661 if (Subtarget->isThumb2()) 8662 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 8663 Offset, isInc, DAG); 8664 else 8665 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 8666 Offset, isInc, DAG); 8667 if (!isLegal) 8668 return false; 8669 8670 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 8671 return true; 8672} 8673 8674/// getPostIndexedAddressParts - returns true by value, base pointer and 8675/// offset pointer and addressing mode by reference if this node can be 8676/// combined with a load / store to form a post-indexed load / store. 8677bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 8678 SDValue &Base, 8679 SDValue &Offset, 8680 ISD::MemIndexedMode &AM, 8681 SelectionDAG &DAG) const { 8682 if (Subtarget->isThumb1Only()) 8683 return false; 8684 8685 EVT VT; 8686 SDValue Ptr; 8687 bool isSEXTLoad = false; 8688 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8689 VT = LD->getMemoryVT(); 8690 Ptr = LD->getBasePtr(); 8691 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 8692 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8693 VT = ST->getMemoryVT(); 8694 Ptr = ST->getBasePtr(); 8695 } else 8696 return false; 8697 8698 bool isInc; 8699 bool isLegal = false; 8700 if (Subtarget->isThumb2()) 8701 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 8702 isInc, DAG); 8703 else 8704 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 8705 isInc, DAG); 8706 if (!isLegal) 8707 return false; 8708 8709 if (Ptr != Base) { 8710 // Swap base ptr and offset to catch more post-index load / store when 8711 // it's legal. In Thumb2 mode, offset must be an immediate. 8712 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 8713 !Subtarget->isThumb2()) 8714 std::swap(Base, Offset); 8715 8716 // Post-indexed load / store update the base pointer. 8717 if (Ptr != Base) 8718 return false; 8719 } 8720 8721 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 8722 return true; 8723} 8724 8725void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 8726 const APInt &Mask, 8727 APInt &KnownZero, 8728 APInt &KnownOne, 8729 const SelectionDAG &DAG, 8730 unsigned Depth) const { 8731 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 8732 switch (Op.getOpcode()) { 8733 default: break; 8734 case ARMISD::CMOV: { 8735 // Bits are known zero/one if known on the LHS and RHS. 8736 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 8737 if (KnownZero == 0 && KnownOne == 0) return; 8738 8739 APInt KnownZeroRHS, KnownOneRHS; 8740 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 8741 KnownZeroRHS, KnownOneRHS, Depth+1); 8742 KnownZero &= KnownZeroRHS; 8743 KnownOne &= KnownOneRHS; 8744 return; 8745 } 8746 } 8747} 8748 8749//===----------------------------------------------------------------------===// 8750// ARM Inline Assembly Support 8751//===----------------------------------------------------------------------===// 8752 8753bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 8754 // Looking for "rev" which is V6+. 8755 if (!Subtarget->hasV6Ops()) 8756 return false; 8757 8758 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 8759 std::string AsmStr = IA->getAsmString(); 8760 SmallVector<StringRef, 4> AsmPieces; 8761 SplitString(AsmStr, AsmPieces, ";\n"); 8762 8763 switch (AsmPieces.size()) { 8764 default: return false; 8765 case 1: 8766 AsmStr = AsmPieces[0]; 8767 AsmPieces.clear(); 8768 SplitString(AsmStr, AsmPieces, " \t,"); 8769 8770 // rev $0, $1 8771 if (AsmPieces.size() == 3 && 8772 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 8773 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 8774 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 8775 if (Ty && Ty->getBitWidth() == 32) 8776 return IntrinsicLowering::LowerToByteSwap(CI); 8777 } 8778 break; 8779 } 8780 8781 return false; 8782} 8783 8784/// getConstraintType - Given a constraint letter, return the type of 8785/// constraint it is for this target. 8786ARMTargetLowering::ConstraintType 8787ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 8788 if (Constraint.size() == 1) { 8789 switch (Constraint[0]) { 8790 default: break; 8791 case 'l': return C_RegisterClass; 8792 case 'w': return C_RegisterClass; 8793 case 'h': return C_RegisterClass; 8794 case 'x': return C_RegisterClass; 8795 case 't': return C_RegisterClass; 8796 case 'j': return C_Other; // Constant for movw. 8797 // An address with a single base register. Due to the way we 8798 // currently handle addresses it is the same as an 'r' memory constraint. 8799 case 'Q': return C_Memory; 8800 } 8801 } else if (Constraint.size() == 2) { 8802 switch (Constraint[0]) { 8803 default: break; 8804 // All 'U+' constraints are addresses. 8805 case 'U': return C_Memory; 8806 } 8807 } 8808 return TargetLowering::getConstraintType(Constraint); 8809} 8810 8811/// Examine constraint type and operand type and determine a weight value. 8812/// This object must already have been set up with the operand type 8813/// and the current alternative constraint selected. 8814TargetLowering::ConstraintWeight 8815ARMTargetLowering::getSingleConstraintMatchWeight( 8816 AsmOperandInfo &info, const char *constraint) const { 8817 ConstraintWeight weight = CW_Invalid; 8818 Value *CallOperandVal = info.CallOperandVal; 8819 // If we don't have a value, we can't do a match, 8820 // but allow it at the lowest weight. 8821 if (CallOperandVal == NULL) 8822 return CW_Default; 8823 Type *type = CallOperandVal->getType(); 8824 // Look at the constraint type. 8825 switch (*constraint) { 8826 default: 8827 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 8828 break; 8829 case 'l': 8830 if (type->isIntegerTy()) { 8831 if (Subtarget->isThumb()) 8832 weight = CW_SpecificReg; 8833 else 8834 weight = CW_Register; 8835 } 8836 break; 8837 case 'w': 8838 if (type->isFloatingPointTy()) 8839 weight = CW_Register; 8840 break; 8841 } 8842 return weight; 8843} 8844 8845typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 8846RCPair 8847ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 8848 EVT VT) const { 8849 if (Constraint.size() == 1) { 8850 // GCC ARM Constraint Letters 8851 switch (Constraint[0]) { 8852 case 'l': // Low regs or general regs. 8853 if (Subtarget->isThumb()) 8854 return RCPair(0U, ARM::tGPRRegisterClass); 8855 else 8856 return RCPair(0U, ARM::GPRRegisterClass); 8857 case 'h': // High regs or no regs. 8858 if (Subtarget->isThumb()) 8859 return RCPair(0U, ARM::hGPRRegisterClass); 8860 break; 8861 case 'r': 8862 return RCPair(0U, ARM::GPRRegisterClass); 8863 case 'w': 8864 if (VT == MVT::f32) 8865 return RCPair(0U, ARM::SPRRegisterClass); 8866 if (VT.getSizeInBits() == 64) 8867 return RCPair(0U, ARM::DPRRegisterClass); 8868 if (VT.getSizeInBits() == 128) 8869 return RCPair(0U, ARM::QPRRegisterClass); 8870 break; 8871 case 'x': 8872 if (VT == MVT::f32) 8873 return RCPair(0U, ARM::SPR_8RegisterClass); 8874 if (VT.getSizeInBits() == 64) 8875 return RCPair(0U, ARM::DPR_8RegisterClass); 8876 if (VT.getSizeInBits() == 128) 8877 return RCPair(0U, ARM::QPR_8RegisterClass); 8878 break; 8879 case 't': 8880 if (VT == MVT::f32) 8881 return RCPair(0U, ARM::SPRRegisterClass); 8882 break; 8883 } 8884 } 8885 if (StringRef("{cc}").equals_lower(Constraint)) 8886 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 8887 8888 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 8889} 8890 8891/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 8892/// vector. If it is invalid, don't add anything to Ops. 8893void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 8894 std::string &Constraint, 8895 std::vector<SDValue>&Ops, 8896 SelectionDAG &DAG) const { 8897 SDValue Result(0, 0); 8898 8899 // Currently only support length 1 constraints. 8900 if (Constraint.length() != 1) return; 8901 8902 char ConstraintLetter = Constraint[0]; 8903 switch (ConstraintLetter) { 8904 default: break; 8905 case 'j': 8906 case 'I': case 'J': case 'K': case 'L': 8907 case 'M': case 'N': case 'O': 8908 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 8909 if (!C) 8910 return; 8911 8912 int64_t CVal64 = C->getSExtValue(); 8913 int CVal = (int) CVal64; 8914 // None of these constraints allow values larger than 32 bits. Check 8915 // that the value fits in an int. 8916 if (CVal != CVal64) 8917 return; 8918 8919 switch (ConstraintLetter) { 8920 case 'j': 8921 // Constant suitable for movw, must be between 0 and 8922 // 65535. 8923 if (Subtarget->hasV6T2Ops()) 8924 if (CVal >= 0 && CVal <= 65535) 8925 break; 8926 return; 8927 case 'I': 8928 if (Subtarget->isThumb1Only()) { 8929 // This must be a constant between 0 and 255, for ADD 8930 // immediates. 8931 if (CVal >= 0 && CVal <= 255) 8932 break; 8933 } else if (Subtarget->isThumb2()) { 8934 // A constant that can be used as an immediate value in a 8935 // data-processing instruction. 8936 if (ARM_AM::getT2SOImmVal(CVal) != -1) 8937 break; 8938 } else { 8939 // A constant that can be used as an immediate value in a 8940 // data-processing instruction. 8941 if (ARM_AM::getSOImmVal(CVal) != -1) 8942 break; 8943 } 8944 return; 8945 8946 case 'J': 8947 if (Subtarget->isThumb()) { // FIXME thumb2 8948 // This must be a constant between -255 and -1, for negated ADD 8949 // immediates. This can be used in GCC with an "n" modifier that 8950 // prints the negated value, for use with SUB instructions. It is 8951 // not useful otherwise but is implemented for compatibility. 8952 if (CVal >= -255 && CVal <= -1) 8953 break; 8954 } else { 8955 // This must be a constant between -4095 and 4095. It is not clear 8956 // what this constraint is intended for. Implemented for 8957 // compatibility with GCC. 8958 if (CVal >= -4095 && CVal <= 4095) 8959 break; 8960 } 8961 return; 8962 8963 case 'K': 8964 if (Subtarget->isThumb1Only()) { 8965 // A 32-bit value where only one byte has a nonzero value. Exclude 8966 // zero to match GCC. This constraint is used by GCC internally for 8967 // constants that can be loaded with a move/shift combination. 8968 // It is not useful otherwise but is implemented for compatibility. 8969 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 8970 break; 8971 } else if (Subtarget->isThumb2()) { 8972 // A constant whose bitwise inverse can be used as an immediate 8973 // value in a data-processing instruction. This can be used in GCC 8974 // with a "B" modifier that prints the inverted value, for use with 8975 // BIC and MVN instructions. It is not useful otherwise but is 8976 // implemented for compatibility. 8977 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 8978 break; 8979 } else { 8980 // A constant whose bitwise inverse can be used as an immediate 8981 // value in a data-processing instruction. This can be used in GCC 8982 // with a "B" modifier that prints the inverted value, for use with 8983 // BIC and MVN instructions. It is not useful otherwise but is 8984 // implemented for compatibility. 8985 if (ARM_AM::getSOImmVal(~CVal) != -1) 8986 break; 8987 } 8988 return; 8989 8990 case 'L': 8991 if (Subtarget->isThumb1Only()) { 8992 // This must be a constant between -7 and 7, 8993 // for 3-operand ADD/SUB immediate instructions. 8994 if (CVal >= -7 && CVal < 7) 8995 break; 8996 } else if (Subtarget->isThumb2()) { 8997 // A constant whose negation can be used as an immediate value in a 8998 // data-processing instruction. This can be used in GCC with an "n" 8999 // modifier that prints the negated value, for use with SUB 9000 // instructions. It is not useful otherwise but is implemented for 9001 // compatibility. 9002 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 9003 break; 9004 } else { 9005 // A constant whose negation can be used as an immediate value in a 9006 // data-processing instruction. This can be used in GCC with an "n" 9007 // modifier that prints the negated value, for use with SUB 9008 // instructions. It is not useful otherwise but is implemented for 9009 // compatibility. 9010 if (ARM_AM::getSOImmVal(-CVal) != -1) 9011 break; 9012 } 9013 return; 9014 9015 case 'M': 9016 if (Subtarget->isThumb()) { // FIXME thumb2 9017 // This must be a multiple of 4 between 0 and 1020, for 9018 // ADD sp + immediate. 9019 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 9020 break; 9021 } else { 9022 // A power of two or a constant between 0 and 32. This is used in 9023 // GCC for the shift amount on shifted register operands, but it is 9024 // useful in general for any shift amounts. 9025 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 9026 break; 9027 } 9028 return; 9029 9030 case 'N': 9031 if (Subtarget->isThumb()) { // FIXME thumb2 9032 // This must be a constant between 0 and 31, for shift amounts. 9033 if (CVal >= 0 && CVal <= 31) 9034 break; 9035 } 9036 return; 9037 9038 case 'O': 9039 if (Subtarget->isThumb()) { // FIXME thumb2 9040 // This must be a multiple of 4 between -508 and 508, for 9041 // ADD/SUB sp = sp + immediate. 9042 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 9043 break; 9044 } 9045 return; 9046 } 9047 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 9048 break; 9049 } 9050 9051 if (Result.getNode()) { 9052 Ops.push_back(Result); 9053 return; 9054 } 9055 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9056} 9057 9058bool 9059ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 9060 // The ARM target isn't yet aware of offsets. 9061 return false; 9062} 9063 9064bool ARM::isBitFieldInvertedMask(unsigned v) { 9065 if (v == 0xffffffff) 9066 return 0; 9067 // there can be 1's on either or both "outsides", all the "inside" 9068 // bits must be 0's 9069 unsigned int lsb = 0, msb = 31; 9070 while (v & (1 << msb)) --msb; 9071 while (v & (1 << lsb)) ++lsb; 9072 for (unsigned int i = lsb; i <= msb; ++i) { 9073 if (v & (1 << i)) 9074 return 0; 9075 } 9076 return 1; 9077} 9078 9079/// isFPImmLegal - Returns true if the target can instruction select the 9080/// specified FP immediate natively. If false, the legalizer will 9081/// materialize the FP immediate as a load from a constant pool. 9082bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 9083 if (!Subtarget->hasVFP3()) 9084 return false; 9085 if (VT == MVT::f32) 9086 return ARM_AM::getFP32Imm(Imm) != -1; 9087 if (VT == MVT::f64) 9088 return ARM_AM::getFP64Imm(Imm) != -1; 9089 return false; 9090} 9091 9092/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 9093/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 9094/// specified in the intrinsic calls. 9095bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 9096 const CallInst &I, 9097 unsigned Intrinsic) const { 9098 switch (Intrinsic) { 9099 case Intrinsic::arm_neon_vld1: 9100 case Intrinsic::arm_neon_vld2: 9101 case Intrinsic::arm_neon_vld3: 9102 case Intrinsic::arm_neon_vld4: 9103 case Intrinsic::arm_neon_vld2lane: 9104 case Intrinsic::arm_neon_vld3lane: 9105 case Intrinsic::arm_neon_vld4lane: { 9106 Info.opc = ISD::INTRINSIC_W_CHAIN; 9107 // Conservatively set memVT to the entire set of vectors loaded. 9108 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 9109 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 9110 Info.ptrVal = I.getArgOperand(0); 9111 Info.offset = 0; 9112 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 9113 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 9114 Info.vol = false; // volatile loads with NEON intrinsics not supported 9115 Info.readMem = true; 9116 Info.writeMem = false; 9117 return true; 9118 } 9119 case Intrinsic::arm_neon_vst1: 9120 case Intrinsic::arm_neon_vst2: 9121 case Intrinsic::arm_neon_vst3: 9122 case Intrinsic::arm_neon_vst4: 9123 case Intrinsic::arm_neon_vst2lane: 9124 case Intrinsic::arm_neon_vst3lane: 9125 case Intrinsic::arm_neon_vst4lane: { 9126 Info.opc = ISD::INTRINSIC_VOID; 9127 // Conservatively set memVT to the entire set of vectors stored. 9128 unsigned NumElts = 0; 9129 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 9130 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 9131 if (!ArgTy->isVectorTy()) 9132 break; 9133 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 9134 } 9135 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 9136 Info.ptrVal = I.getArgOperand(0); 9137 Info.offset = 0; 9138 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 9139 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 9140 Info.vol = false; // volatile stores with NEON intrinsics not supported 9141 Info.readMem = false; 9142 Info.writeMem = true; 9143 return true; 9144 } 9145 case Intrinsic::arm_strexd: { 9146 Info.opc = ISD::INTRINSIC_W_CHAIN; 9147 Info.memVT = MVT::i64; 9148 Info.ptrVal = I.getArgOperand(2); 9149 Info.offset = 0; 9150 Info.align = 8; 9151 Info.vol = true; 9152 Info.readMem = false; 9153 Info.writeMem = true; 9154 return true; 9155 } 9156 case Intrinsic::arm_ldrexd: { 9157 Info.opc = ISD::INTRINSIC_W_CHAIN; 9158 Info.memVT = MVT::i64; 9159 Info.ptrVal = I.getArgOperand(0); 9160 Info.offset = 0; 9161 Info.align = 8; 9162 Info.vol = true; 9163 Info.readMem = true; 9164 Info.writeMem = false; 9165 return true; 9166 } 9167 default: 9168 break; 9169 } 9170 9171 return false; 9172} 9173