ARMISelLowering.cpp revision 1e93df6f0b5ee6e36d7ec18e6035f0f5a53e5ec6
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ARM.h" 16#include "ARMAddressingModes.h" 17#include "ARMConstantPoolValue.h" 18#include "ARMISelLowering.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMPerfectShuffle.h" 21#include "ARMRegisterInfo.h" 22#include "ARMSubtarget.h" 23#include "ARMTargetMachine.h" 24#include "ARMTargetObjectFile.h" 25#include "llvm/CallingConv.h" 26#include "llvm/Constants.h" 27#include "llvm/Function.h" 28#include "llvm/GlobalValue.h" 29#include "llvm/Instruction.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Type.h" 32#include "llvm/CodeGen/CallingConvLower.h" 33#include "llvm/CodeGen/MachineBasicBlock.h" 34#include "llvm/CodeGen/MachineFrameInfo.h" 35#include "llvm/CodeGen/MachineFunction.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/PseudoSourceValue.h" 39#include "llvm/CodeGen/SelectionDAG.h" 40#include "llvm/MC/MCSectionMachO.h" 41#include "llvm/Target/TargetOptions.h" 42#include "llvm/ADT/VectorExtras.h" 43#include "llvm/Support/CommandLine.h" 44#include "llvm/Support/ErrorHandling.h" 45#include "llvm/Support/MathExtras.h" 46#include "llvm/Support/raw_ostream.h" 47#include <sstream> 48using namespace llvm; 49 50static cl::opt<bool> 51EnableARMLongCalls("arm-long-calls", cl::Hidden, 52 cl::desc("Generate calls via indirect call instructions."), 53 cl::init(false)); 54 55static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 56 CCValAssign::LocInfo &LocInfo, 57 ISD::ArgFlagsTy &ArgFlags, 58 CCState &State); 59static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 60 CCValAssign::LocInfo &LocInfo, 61 ISD::ArgFlagsTy &ArgFlags, 62 CCState &State); 63static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 64 CCValAssign::LocInfo &LocInfo, 65 ISD::ArgFlagsTy &ArgFlags, 66 CCState &State); 67static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 68 CCValAssign::LocInfo &LocInfo, 69 ISD::ArgFlagsTy &ArgFlags, 70 CCState &State); 71 72void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 73 EVT PromotedBitwiseVT) { 74 if (VT != PromotedLdStVT) { 75 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 76 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 77 PromotedLdStVT.getSimpleVT()); 78 79 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 80 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 81 PromotedLdStVT.getSimpleVT()); 82 } 83 84 EVT ElemTy = VT.getVectorElementType(); 85 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 86 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 87 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 88 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 89 if (ElemTy != MVT::i32) { 90 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 91 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 92 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 93 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 94 } 95 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 96 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 97 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); 98 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 99 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 100 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 101 if (VT.isInteger()) { 102 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 103 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 104 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 105 } 106 107 // Promote all bit-wise operations. 108 if (VT.isInteger() && VT != PromotedBitwiseVT) { 109 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 110 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 111 PromotedBitwiseVT.getSimpleVT()); 112 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 113 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 114 PromotedBitwiseVT.getSimpleVT()); 115 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 116 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 117 PromotedBitwiseVT.getSimpleVT()); 118 } 119 120 // Neon does not support vector divide/remainder operations. 121 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 122 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 123 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 124 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 125 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 126 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 127} 128 129void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 130 addRegisterClass(VT, ARM::DPRRegisterClass); 131 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 132} 133 134void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 135 addRegisterClass(VT, ARM::QPRRegisterClass); 136 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 137} 138 139static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 140 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 141 return new TargetLoweringObjectFileMachO(); 142 143 return new ARMElfTargetObjectFile(); 144} 145 146ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 147 : TargetLowering(TM, createTLOF(TM)) { 148 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 149 150 if (Subtarget->isTargetDarwin()) { 151 // Uses VFP for Thumb libfuncs if available. 152 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 153 // Single-precision floating-point arithmetic. 154 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 155 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 156 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 157 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 158 159 // Double-precision floating-point arithmetic. 160 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 161 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 162 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 163 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 164 165 // Single-precision comparisons. 166 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 167 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 168 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 169 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 170 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 171 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 172 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 173 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 174 175 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 176 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 177 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 178 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 179 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 180 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 181 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 182 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 183 184 // Double-precision comparisons. 185 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 186 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 187 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 188 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 189 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 190 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 191 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 192 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 193 194 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 195 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 196 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 197 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 198 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 199 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 200 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 201 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 202 203 // Floating-point to integer conversions. 204 // i64 conversions are done via library routines even when generating VFP 205 // instructions, so use the same ones. 206 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 207 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 208 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 209 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 210 211 // Conversions between floating types. 212 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 213 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 214 215 // Integer to floating-point conversions. 216 // i64 conversions are done via library routines even when generating VFP 217 // instructions, so use the same ones. 218 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 219 // e.g., __floatunsidf vs. __floatunssidfvfp. 220 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 221 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 222 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 223 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 224 } 225 } 226 227 // These libcalls are not available in 32-bit. 228 setLibcallName(RTLIB::SHL_I128, 0); 229 setLibcallName(RTLIB::SRL_I128, 0); 230 setLibcallName(RTLIB::SRA_I128, 0); 231 232 // Libcalls should use the AAPCS base standard ABI, even if hard float 233 // is in effect, as per the ARM RTABI specification, section 4.1.2. 234 if (Subtarget->isAAPCS_ABI()) { 235 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { 236 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), 237 CallingConv::ARM_AAPCS); 238 } 239 } 240 241 if (Subtarget->isThumb1Only()) 242 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 243 else 244 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 245 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 246 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 247 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 248 249 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 250 } 251 252 if (Subtarget->hasNEON()) { 253 addDRTypeForNEON(MVT::v2f32); 254 addDRTypeForNEON(MVT::v8i8); 255 addDRTypeForNEON(MVT::v4i16); 256 addDRTypeForNEON(MVT::v2i32); 257 addDRTypeForNEON(MVT::v1i64); 258 259 addQRTypeForNEON(MVT::v4f32); 260 addQRTypeForNEON(MVT::v2f64); 261 addQRTypeForNEON(MVT::v16i8); 262 addQRTypeForNEON(MVT::v8i16); 263 addQRTypeForNEON(MVT::v4i32); 264 addQRTypeForNEON(MVT::v2i64); 265 266 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 267 // neither Neon nor VFP support any arithmetic operations on it. 268 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 269 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 270 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 271 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 272 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 273 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 274 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 275 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 276 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 277 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 278 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 279 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 280 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 281 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 282 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 283 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 284 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 285 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 286 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 287 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 288 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 289 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 290 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 291 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 292 293 // Neon does not support some operations on v1i64 and v2i64 types. 294 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 295 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 296 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 297 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 298 299 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 300 setTargetDAGCombine(ISD::SHL); 301 setTargetDAGCombine(ISD::SRL); 302 setTargetDAGCombine(ISD::SRA); 303 setTargetDAGCombine(ISD::SIGN_EXTEND); 304 setTargetDAGCombine(ISD::ZERO_EXTEND); 305 setTargetDAGCombine(ISD::ANY_EXTEND); 306 setTargetDAGCombine(ISD::SELECT_CC); 307 } 308 309 computeRegisterProperties(); 310 311 // ARM does not have f32 extending load. 312 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 313 314 // ARM does not have i1 sign extending load. 315 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 316 317 // ARM supports all 4 flavors of integer indexed load / store. 318 if (!Subtarget->isThumb1Only()) { 319 for (unsigned im = (unsigned)ISD::PRE_INC; 320 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 321 setIndexedLoadAction(im, MVT::i1, Legal); 322 setIndexedLoadAction(im, MVT::i8, Legal); 323 setIndexedLoadAction(im, MVT::i16, Legal); 324 setIndexedLoadAction(im, MVT::i32, Legal); 325 setIndexedStoreAction(im, MVT::i1, Legal); 326 setIndexedStoreAction(im, MVT::i8, Legal); 327 setIndexedStoreAction(im, MVT::i16, Legal); 328 setIndexedStoreAction(im, MVT::i32, Legal); 329 } 330 } 331 332 // i64 operation support. 333 if (Subtarget->isThumb1Only()) { 334 setOperationAction(ISD::MUL, MVT::i64, Expand); 335 setOperationAction(ISD::MULHU, MVT::i32, Expand); 336 setOperationAction(ISD::MULHS, MVT::i32, Expand); 337 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 338 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 339 } else { 340 setOperationAction(ISD::MUL, MVT::i64, Expand); 341 setOperationAction(ISD::MULHU, MVT::i32, Expand); 342 if (!Subtarget->hasV6Ops()) 343 setOperationAction(ISD::MULHS, MVT::i32, Expand); 344 } 345 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 346 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 347 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 348 setOperationAction(ISD::SRL, MVT::i64, Custom); 349 setOperationAction(ISD::SRA, MVT::i64, Custom); 350 351 // ARM does not have ROTL. 352 setOperationAction(ISD::ROTL, MVT::i32, Expand); 353 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 354 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 355 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 356 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 357 358 // Only ARMv6 has BSWAP. 359 if (!Subtarget->hasV6Ops()) 360 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 361 362 // These are expanded into libcalls. 363 setOperationAction(ISD::SDIV, MVT::i32, Expand); 364 setOperationAction(ISD::UDIV, MVT::i32, Expand); 365 setOperationAction(ISD::SREM, MVT::i32, Expand); 366 setOperationAction(ISD::UREM, MVT::i32, Expand); 367 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 368 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 369 370 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 371 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 372 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 373 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 374 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 375 376 // Use the default implementation. 377 setOperationAction(ISD::VASTART, MVT::Other, Custom); 378 setOperationAction(ISD::VAARG, MVT::Other, Expand); 379 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 380 setOperationAction(ISD::VAEND, MVT::Other, Expand); 381 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 382 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 383 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 384 // FIXME: Shouldn't need this, since no register is used, but the legalizer 385 // doesn't yet know how to not do that for SjLj. 386 setExceptionSelectorRegister(ARM::R0); 387 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 388 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 389 390 if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { 391 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 392 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 393 } 394 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 395 396 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) 397 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 398 // iff target supports vfp2. 399 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 400 401 // We want to custom lower some of our intrinsics. 402 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 403 404 setOperationAction(ISD::SETCC, MVT::i32, Expand); 405 setOperationAction(ISD::SETCC, MVT::f32, Expand); 406 setOperationAction(ISD::SETCC, MVT::f64, Expand); 407 setOperationAction(ISD::SELECT, MVT::i32, Expand); 408 setOperationAction(ISD::SELECT, MVT::f32, Expand); 409 setOperationAction(ISD::SELECT, MVT::f64, Expand); 410 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 411 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 412 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 413 414 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 415 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 416 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 417 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 418 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 419 420 // We don't support sin/cos/fmod/copysign/pow 421 setOperationAction(ISD::FSIN, MVT::f64, Expand); 422 setOperationAction(ISD::FSIN, MVT::f32, Expand); 423 setOperationAction(ISD::FCOS, MVT::f32, Expand); 424 setOperationAction(ISD::FCOS, MVT::f64, Expand); 425 setOperationAction(ISD::FREM, MVT::f64, Expand); 426 setOperationAction(ISD::FREM, MVT::f32, Expand); 427 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 428 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 429 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 430 } 431 setOperationAction(ISD::FPOW, MVT::f64, Expand); 432 setOperationAction(ISD::FPOW, MVT::f32, Expand); 433 434 // Various VFP goodness 435 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 436 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 437 if (Subtarget->hasVFP2()) { 438 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 439 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 440 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 441 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 442 } 443 // Special handling for half-precision FP. 444 if (!Subtarget->hasFP16()) { 445 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 446 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 447 } 448 } 449 450 // We have target-specific dag combine patterns for the following nodes: 451 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 452 setTargetDAGCombine(ISD::ADD); 453 setTargetDAGCombine(ISD::SUB); 454 455 setStackPointerRegisterToSaveRestore(ARM::SP); 456 setSchedulingPreference(SchedulingForRegPressure); 457 458 // FIXME: If-converter should use instruction latency to determine 459 // profitability rather than relying on fixed limits. 460 if (Subtarget->getCPUString() == "generic") { 461 // Generic (and overly aggressive) if-conversion limits. 462 setIfCvtBlockSizeLimit(10); 463 setIfCvtDupBlockSizeLimit(2); 464 } else if (Subtarget->hasV7Ops()) { 465 setIfCvtBlockSizeLimit(3); 466 setIfCvtDupBlockSizeLimit(1); 467 } else if (Subtarget->hasV6Ops()) { 468 setIfCvtBlockSizeLimit(2); 469 setIfCvtDupBlockSizeLimit(1); 470 } else { 471 setIfCvtBlockSizeLimit(3); 472 setIfCvtDupBlockSizeLimit(2); 473 } 474 475 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 476 // Do not enable CodePlacementOpt for now: it currently runs after the 477 // ARMConstantIslandPass and messes up branch relaxation and placement 478 // of constant islands. 479 // benefitFromCodePlacementOpt = true; 480} 481 482const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 483 switch (Opcode) { 484 default: return 0; 485 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 486 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 487 case ARMISD::CALL: return "ARMISD::CALL"; 488 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 489 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 490 case ARMISD::tCALL: return "ARMISD::tCALL"; 491 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 492 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 493 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 494 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 495 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 496 case ARMISD::CMP: return "ARMISD::CMP"; 497 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 498 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 499 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 500 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 501 case ARMISD::CMOV: return "ARMISD::CMOV"; 502 case ARMISD::CNEG: return "ARMISD::CNEG"; 503 504 case ARMISD::RBIT: return "ARMISD::RBIT"; 505 506 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 507 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 508 case ARMISD::SITOF: return "ARMISD::SITOF"; 509 case ARMISD::UITOF: return "ARMISD::UITOF"; 510 511 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 512 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 513 case ARMISD::RRX: return "ARMISD::RRX"; 514 515 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 516 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 517 518 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 519 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 520 521 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 522 523 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 524 525 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 526 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; 527 528 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 529 case ARMISD::VCGE: return "ARMISD::VCGE"; 530 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 531 case ARMISD::VCGT: return "ARMISD::VCGT"; 532 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 533 case ARMISD::VTST: return "ARMISD::VTST"; 534 535 case ARMISD::VSHL: return "ARMISD::VSHL"; 536 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 537 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 538 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 539 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 540 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 541 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 542 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 543 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 544 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 545 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 546 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 547 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 548 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 549 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 550 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 551 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 552 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 553 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 554 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 555 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 556 case ARMISD::VDUP: return "ARMISD::VDUP"; 557 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 558 case ARMISD::VEXT: return "ARMISD::VEXT"; 559 case ARMISD::VREV64: return "ARMISD::VREV64"; 560 case ARMISD::VREV32: return "ARMISD::VREV32"; 561 case ARMISD::VREV16: return "ARMISD::VREV16"; 562 case ARMISD::VZIP: return "ARMISD::VZIP"; 563 case ARMISD::VUZP: return "ARMISD::VUZP"; 564 case ARMISD::VTRN: return "ARMISD::VTRN"; 565 case ARMISD::FMAX: return "ARMISD::FMAX"; 566 case ARMISD::FMIN: return "ARMISD::FMIN"; 567 } 568} 569 570/// getFunctionAlignment - Return the Log2 alignment of this function. 571unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 572 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; 573} 574 575//===----------------------------------------------------------------------===// 576// Lowering Code 577//===----------------------------------------------------------------------===// 578 579/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 580static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 581 switch (CC) { 582 default: llvm_unreachable("Unknown condition code!"); 583 case ISD::SETNE: return ARMCC::NE; 584 case ISD::SETEQ: return ARMCC::EQ; 585 case ISD::SETGT: return ARMCC::GT; 586 case ISD::SETGE: return ARMCC::GE; 587 case ISD::SETLT: return ARMCC::LT; 588 case ISD::SETLE: return ARMCC::LE; 589 case ISD::SETUGT: return ARMCC::HI; 590 case ISD::SETUGE: return ARMCC::HS; 591 case ISD::SETULT: return ARMCC::LO; 592 case ISD::SETULE: return ARMCC::LS; 593 } 594} 595 596/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 597static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 598 ARMCC::CondCodes &CondCode2) { 599 CondCode2 = ARMCC::AL; 600 switch (CC) { 601 default: llvm_unreachable("Unknown FP condition!"); 602 case ISD::SETEQ: 603 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 604 case ISD::SETGT: 605 case ISD::SETOGT: CondCode = ARMCC::GT; break; 606 case ISD::SETGE: 607 case ISD::SETOGE: CondCode = ARMCC::GE; break; 608 case ISD::SETOLT: CondCode = ARMCC::MI; break; 609 case ISD::SETOLE: CondCode = ARMCC::LS; break; 610 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 611 case ISD::SETO: CondCode = ARMCC::VC; break; 612 case ISD::SETUO: CondCode = ARMCC::VS; break; 613 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 614 case ISD::SETUGT: CondCode = ARMCC::HI; break; 615 case ISD::SETUGE: CondCode = ARMCC::PL; break; 616 case ISD::SETLT: 617 case ISD::SETULT: CondCode = ARMCC::LT; break; 618 case ISD::SETLE: 619 case ISD::SETULE: CondCode = ARMCC::LE; break; 620 case ISD::SETNE: 621 case ISD::SETUNE: CondCode = ARMCC::NE; break; 622 } 623} 624 625//===----------------------------------------------------------------------===// 626// Calling Convention Implementation 627//===----------------------------------------------------------------------===// 628 629#include "ARMGenCallingConv.inc" 630 631// APCS f64 is in register pairs, possibly split to stack 632static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 633 CCValAssign::LocInfo &LocInfo, 634 CCState &State, bool CanFail) { 635 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; 636 637 // Try to get the first register. 638 if (unsigned Reg = State.AllocateReg(RegList, 4)) 639 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 640 else { 641 // For the 2nd half of a v2f64, do not fail. 642 if (CanFail) 643 return false; 644 645 // Put the whole thing on the stack. 646 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 647 State.AllocateStack(8, 4), 648 LocVT, LocInfo)); 649 return true; 650 } 651 652 // Try to get the second register. 653 if (unsigned Reg = State.AllocateReg(RegList, 4)) 654 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 655 else 656 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 657 State.AllocateStack(4, 4), 658 LocVT, LocInfo)); 659 return true; 660} 661 662static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 663 CCValAssign::LocInfo &LocInfo, 664 ISD::ArgFlagsTy &ArgFlags, 665 CCState &State) { 666 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 667 return false; 668 if (LocVT == MVT::v2f64 && 669 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 670 return false; 671 return true; // we handled it 672} 673 674// AAPCS f64 is in aligned register pairs 675static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 676 CCValAssign::LocInfo &LocInfo, 677 CCState &State, bool CanFail) { 678 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 679 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 680 681 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 682 if (Reg == 0) { 683 // For the 2nd half of a v2f64, do not just fail. 684 if (CanFail) 685 return false; 686 687 // Put the whole thing on the stack. 688 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 689 State.AllocateStack(8, 8), 690 LocVT, LocInfo)); 691 return true; 692 } 693 694 unsigned i; 695 for (i = 0; i < 2; ++i) 696 if (HiRegList[i] == Reg) 697 break; 698 699 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 700 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 701 LocVT, LocInfo)); 702 return true; 703} 704 705static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 706 CCValAssign::LocInfo &LocInfo, 707 ISD::ArgFlagsTy &ArgFlags, 708 CCState &State) { 709 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 710 return false; 711 if (LocVT == MVT::v2f64 && 712 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 713 return false; 714 return true; // we handled it 715} 716 717static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 718 CCValAssign::LocInfo &LocInfo, CCState &State) { 719 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 720 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 721 722 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 723 if (Reg == 0) 724 return false; // we didn't handle it 725 726 unsigned i; 727 for (i = 0; i < 2; ++i) 728 if (HiRegList[i] == Reg) 729 break; 730 731 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 732 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 733 LocVT, LocInfo)); 734 return true; 735} 736 737static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 738 CCValAssign::LocInfo &LocInfo, 739 ISD::ArgFlagsTy &ArgFlags, 740 CCState &State) { 741 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 742 return false; 743 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 744 return false; 745 return true; // we handled it 746} 747 748static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 749 CCValAssign::LocInfo &LocInfo, 750 ISD::ArgFlagsTy &ArgFlags, 751 CCState &State) { 752 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, 753 State); 754} 755 756/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 757/// given CallingConvention value. 758CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 759 bool Return, 760 bool isVarArg) const { 761 switch (CC) { 762 default: 763 llvm_unreachable("Unsupported calling convention"); 764 case CallingConv::C: 765 case CallingConv::Fast: 766 // Use target triple & subtarget features to do actual dispatch. 767 if (Subtarget->isAAPCS_ABI()) { 768 if (Subtarget->hasVFP2() && 769 FloatABIType == FloatABI::Hard && !isVarArg) 770 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 771 else 772 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 773 } else 774 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 775 case CallingConv::ARM_AAPCS_VFP: 776 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 777 case CallingConv::ARM_AAPCS: 778 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 779 case CallingConv::ARM_APCS: 780 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 781 } 782} 783 784/// LowerCallResult - Lower the result values of a call into the 785/// appropriate copies out of appropriate physical registers. 786SDValue 787ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 788 CallingConv::ID CallConv, bool isVarArg, 789 const SmallVectorImpl<ISD::InputArg> &Ins, 790 DebugLoc dl, SelectionDAG &DAG, 791 SmallVectorImpl<SDValue> &InVals) { 792 793 // Assign locations to each value returned by this call. 794 SmallVector<CCValAssign, 16> RVLocs; 795 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 796 RVLocs, *DAG.getContext()); 797 CCInfo.AnalyzeCallResult(Ins, 798 CCAssignFnForNode(CallConv, /* Return*/ true, 799 isVarArg)); 800 801 // Copy all of the result registers out of their specified physreg. 802 for (unsigned i = 0; i != RVLocs.size(); ++i) { 803 CCValAssign VA = RVLocs[i]; 804 805 SDValue Val; 806 if (VA.needsCustom()) { 807 // Handle f64 or half of a v2f64. 808 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 809 InFlag); 810 Chain = Lo.getValue(1); 811 InFlag = Lo.getValue(2); 812 VA = RVLocs[++i]; // skip ahead to next loc 813 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 814 InFlag); 815 Chain = Hi.getValue(1); 816 InFlag = Hi.getValue(2); 817 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 818 819 if (VA.getLocVT() == MVT::v2f64) { 820 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 821 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 822 DAG.getConstant(0, MVT::i32)); 823 824 VA = RVLocs[++i]; // skip ahead to next loc 825 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 826 Chain = Lo.getValue(1); 827 InFlag = Lo.getValue(2); 828 VA = RVLocs[++i]; // skip ahead to next loc 829 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 830 Chain = Hi.getValue(1); 831 InFlag = Hi.getValue(2); 832 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 833 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 834 DAG.getConstant(1, MVT::i32)); 835 } 836 } else { 837 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 838 InFlag); 839 Chain = Val.getValue(1); 840 InFlag = Val.getValue(2); 841 } 842 843 switch (VA.getLocInfo()) { 844 default: llvm_unreachable("Unknown loc info!"); 845 case CCValAssign::Full: break; 846 case CCValAssign::BCvt: 847 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 848 break; 849 } 850 851 InVals.push_back(Val); 852 } 853 854 return Chain; 855} 856 857/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 858/// by "Src" to address "Dst" of size "Size". Alignment information is 859/// specified by the specific parameter attribute. The copy will be passed as 860/// a byval function parameter. 861/// Sometimes what we are copying is the end of a larger object, the part that 862/// does not fit in registers. 863static SDValue 864CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 865 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 866 DebugLoc dl) { 867 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 868 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 869 /*isVolatile=*/false, /*AlwaysInline=*/false, 870 NULL, 0, NULL, 0); 871} 872 873/// LowerMemOpCallTo - Store the argument to the stack. 874SDValue 875ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 876 SDValue StackPtr, SDValue Arg, 877 DebugLoc dl, SelectionDAG &DAG, 878 const CCValAssign &VA, 879 ISD::ArgFlagsTy Flags) { 880 unsigned LocMemOffset = VA.getLocMemOffset(); 881 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 882 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 883 if (Flags.isByVal()) { 884 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 885 } 886 return DAG.getStore(Chain, dl, Arg, PtrOff, 887 PseudoSourceValue::getStack(), LocMemOffset, 888 false, false, 0); 889} 890 891void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 892 SDValue Chain, SDValue &Arg, 893 RegsToPassVector &RegsToPass, 894 CCValAssign &VA, CCValAssign &NextVA, 895 SDValue &StackPtr, 896 SmallVector<SDValue, 8> &MemOpChains, 897 ISD::ArgFlagsTy Flags) { 898 899 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 900 DAG.getVTList(MVT::i32, MVT::i32), Arg); 901 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 902 903 if (NextVA.isRegLoc()) 904 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 905 else { 906 assert(NextVA.isMemLoc()); 907 if (StackPtr.getNode() == 0) 908 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 909 910 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 911 dl, DAG, NextVA, 912 Flags)); 913 } 914} 915 916/// LowerCall - Lowering a call into a callseq_start <- 917/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 918/// nodes. 919SDValue 920ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 921 CallingConv::ID CallConv, bool isVarArg, 922 bool &isTailCall, 923 const SmallVectorImpl<ISD::OutputArg> &Outs, 924 const SmallVectorImpl<ISD::InputArg> &Ins, 925 DebugLoc dl, SelectionDAG &DAG, 926 SmallVectorImpl<SDValue> &InVals) { 927 // ARM target does not yet support tail call optimization. 928 isTailCall = false; 929 930 // Analyze operands of the call, assigning locations to each operand. 931 SmallVector<CCValAssign, 16> ArgLocs; 932 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 933 *DAG.getContext()); 934 CCInfo.AnalyzeCallOperands(Outs, 935 CCAssignFnForNode(CallConv, /* Return*/ false, 936 isVarArg)); 937 938 // Get a count of how many bytes are to be pushed on the stack. 939 unsigned NumBytes = CCInfo.getNextStackOffset(); 940 941 // Adjust the stack pointer for the new arguments... 942 // These operations are automatically eliminated by the prolog/epilog pass 943 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 944 945 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 946 947 RegsToPassVector RegsToPass; 948 SmallVector<SDValue, 8> MemOpChains; 949 950 // Walk the register/memloc assignments, inserting copies/loads. In the case 951 // of tail call optimization, arguments are handled later. 952 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 953 i != e; 954 ++i, ++realArgIdx) { 955 CCValAssign &VA = ArgLocs[i]; 956 SDValue Arg = Outs[realArgIdx].Val; 957 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 958 959 // Promote the value if needed. 960 switch (VA.getLocInfo()) { 961 default: llvm_unreachable("Unknown loc info!"); 962 case CCValAssign::Full: break; 963 case CCValAssign::SExt: 964 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 965 break; 966 case CCValAssign::ZExt: 967 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 968 break; 969 case CCValAssign::AExt: 970 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 971 break; 972 case CCValAssign::BCvt: 973 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 974 break; 975 } 976 977 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 978 if (VA.needsCustom()) { 979 if (VA.getLocVT() == MVT::v2f64) { 980 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 981 DAG.getConstant(0, MVT::i32)); 982 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 983 DAG.getConstant(1, MVT::i32)); 984 985 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 986 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 987 988 VA = ArgLocs[++i]; // skip ahead to next loc 989 if (VA.isRegLoc()) { 990 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 991 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 992 } else { 993 assert(VA.isMemLoc()); 994 995 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 996 dl, DAG, VA, Flags)); 997 } 998 } else { 999 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1000 StackPtr, MemOpChains, Flags); 1001 } 1002 } else if (VA.isRegLoc()) { 1003 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1004 } else { 1005 assert(VA.isMemLoc()); 1006 1007 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1008 dl, DAG, VA, Flags)); 1009 } 1010 } 1011 1012 if (!MemOpChains.empty()) 1013 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1014 &MemOpChains[0], MemOpChains.size()); 1015 1016 // Build a sequence of copy-to-reg nodes chained together with token chain 1017 // and flag operands which copy the outgoing args into the appropriate regs. 1018 SDValue InFlag; 1019 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1020 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1021 RegsToPass[i].second, InFlag); 1022 InFlag = Chain.getValue(1); 1023 } 1024 1025 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1026 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1027 // node so that legalize doesn't hack it. 1028 bool isDirect = false; 1029 bool isARMFunc = false; 1030 bool isLocalARMFunc = false; 1031 MachineFunction &MF = DAG.getMachineFunction(); 1032 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1033 1034 if (EnableARMLongCalls) { 1035 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1036 && "long-calls with non-static relocation model!"); 1037 // Handle a global address or an external symbol. If it's not one of 1038 // those, the target's already in a register, so we don't need to do 1039 // anything extra. 1040 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1041 const GlobalValue *GV = G->getGlobal(); 1042 // Create a constant pool entry for the callee address 1043 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1044 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1045 ARMPCLabelIndex, 1046 ARMCP::CPValue, 0); 1047 // Get the address of the callee into a register 1048 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1049 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1050 Callee = DAG.getLoad(getPointerTy(), dl, 1051 DAG.getEntryNode(), CPAddr, 1052 PseudoSourceValue::getConstantPool(), 0, 1053 false, false, 0); 1054 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1055 const char *Sym = S->getSymbol(); 1056 1057 // Create a constant pool entry for the callee address 1058 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1059 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1060 Sym, ARMPCLabelIndex, 0); 1061 // Get the address of the callee into a register 1062 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1063 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1064 Callee = DAG.getLoad(getPointerTy(), dl, 1065 DAG.getEntryNode(), CPAddr, 1066 PseudoSourceValue::getConstantPool(), 0, 1067 false, false, 0); 1068 } 1069 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1070 const GlobalValue *GV = G->getGlobal(); 1071 isDirect = true; 1072 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1073 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1074 getTargetMachine().getRelocationModel() != Reloc::Static; 1075 isARMFunc = !Subtarget->isThumb() || isStub; 1076 // ARM call to a local ARM function is predicable. 1077 isLocalARMFunc = !Subtarget->isThumb() && !isExt; 1078 // tBX takes a register source operand. 1079 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1080 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1081 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1082 ARMPCLabelIndex, 1083 ARMCP::CPValue, 4); 1084 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1085 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1086 Callee = DAG.getLoad(getPointerTy(), dl, 1087 DAG.getEntryNode(), CPAddr, 1088 PseudoSourceValue::getConstantPool(), 0, 1089 false, false, 0); 1090 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1091 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1092 getPointerTy(), Callee, PICLabel); 1093 } else 1094 Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); 1095 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1096 isDirect = true; 1097 bool isStub = Subtarget->isTargetDarwin() && 1098 getTargetMachine().getRelocationModel() != Reloc::Static; 1099 isARMFunc = !Subtarget->isThumb() || isStub; 1100 // tBX takes a register source operand. 1101 const char *Sym = S->getSymbol(); 1102 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1103 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1104 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1105 Sym, ARMPCLabelIndex, 4); 1106 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1107 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1108 Callee = DAG.getLoad(getPointerTy(), dl, 1109 DAG.getEntryNode(), CPAddr, 1110 PseudoSourceValue::getConstantPool(), 0, 1111 false, false, 0); 1112 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1113 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1114 getPointerTy(), Callee, PICLabel); 1115 } else 1116 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 1117 } 1118 1119 // FIXME: handle tail calls differently. 1120 unsigned CallOpc; 1121 if (Subtarget->isThumb()) { 1122 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1123 CallOpc = ARMISD::CALL_NOLINK; 1124 else 1125 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1126 } else { 1127 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1128 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1129 : ARMISD::CALL_NOLINK; 1130 } 1131 if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { 1132 // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK 1133 Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); 1134 InFlag = Chain.getValue(1); 1135 } 1136 1137 std::vector<SDValue> Ops; 1138 Ops.push_back(Chain); 1139 Ops.push_back(Callee); 1140 1141 // Add argument registers to the end of the list so that they are known live 1142 // into the call. 1143 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1144 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1145 RegsToPass[i].second.getValueType())); 1146 1147 if (InFlag.getNode()) 1148 Ops.push_back(InFlag); 1149 // Returns a chain and a flag for retval copy to use. 1150 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), 1151 &Ops[0], Ops.size()); 1152 InFlag = Chain.getValue(1); 1153 1154 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1155 DAG.getIntPtrConstant(0, true), InFlag); 1156 if (!Ins.empty()) 1157 InFlag = Chain.getValue(1); 1158 1159 // Handle result values, copying them out of physregs into vregs that we 1160 // return. 1161 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1162 dl, DAG, InVals); 1163} 1164 1165SDValue 1166ARMTargetLowering::LowerReturn(SDValue Chain, 1167 CallingConv::ID CallConv, bool isVarArg, 1168 const SmallVectorImpl<ISD::OutputArg> &Outs, 1169 DebugLoc dl, SelectionDAG &DAG) { 1170 1171 // CCValAssign - represent the assignment of the return value to a location. 1172 SmallVector<CCValAssign, 16> RVLocs; 1173 1174 // CCState - Info about the registers and stack slots. 1175 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1176 *DAG.getContext()); 1177 1178 // Analyze outgoing return values. 1179 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1180 isVarArg)); 1181 1182 // If this is the first return lowered for this function, add 1183 // the regs to the liveout set for the function. 1184 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1185 for (unsigned i = 0; i != RVLocs.size(); ++i) 1186 if (RVLocs[i].isRegLoc()) 1187 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1188 } 1189 1190 SDValue Flag; 1191 1192 // Copy the result values into the output registers. 1193 for (unsigned i = 0, realRVLocIdx = 0; 1194 i != RVLocs.size(); 1195 ++i, ++realRVLocIdx) { 1196 CCValAssign &VA = RVLocs[i]; 1197 assert(VA.isRegLoc() && "Can only return in registers!"); 1198 1199 SDValue Arg = Outs[realRVLocIdx].Val; 1200 1201 switch (VA.getLocInfo()) { 1202 default: llvm_unreachable("Unknown loc info!"); 1203 case CCValAssign::Full: break; 1204 case CCValAssign::BCvt: 1205 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1206 break; 1207 } 1208 1209 if (VA.needsCustom()) { 1210 if (VA.getLocVT() == MVT::v2f64) { 1211 // Extract the first half and return it in two registers. 1212 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1213 DAG.getConstant(0, MVT::i32)); 1214 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1215 DAG.getVTList(MVT::i32, MVT::i32), Half); 1216 1217 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1218 Flag = Chain.getValue(1); 1219 VA = RVLocs[++i]; // skip ahead to next loc 1220 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1221 HalfGPRs.getValue(1), Flag); 1222 Flag = Chain.getValue(1); 1223 VA = RVLocs[++i]; // skip ahead to next loc 1224 1225 // Extract the 2nd half and fall through to handle it as an f64 value. 1226 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1227 DAG.getConstant(1, MVT::i32)); 1228 } 1229 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1230 // available. 1231 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1232 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1233 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1234 Flag = Chain.getValue(1); 1235 VA = RVLocs[++i]; // skip ahead to next loc 1236 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1237 Flag); 1238 } else 1239 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1240 1241 // Guarantee that all emitted copies are 1242 // stuck together, avoiding something bad. 1243 Flag = Chain.getValue(1); 1244 } 1245 1246 SDValue result; 1247 if (Flag.getNode()) 1248 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1249 else // Return Void 1250 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1251 1252 return result; 1253} 1254 1255// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1256// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1257// one of the above mentioned nodes. It has to be wrapped because otherwise 1258// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1259// be used to form addressing mode. These wrapped nodes will be selected 1260// into MOVi. 1261static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1262 EVT PtrVT = Op.getValueType(); 1263 // FIXME there is no actual debug info here 1264 DebugLoc dl = Op.getDebugLoc(); 1265 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1266 SDValue Res; 1267 if (CP->isMachineConstantPoolEntry()) 1268 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1269 CP->getAlignment()); 1270 else 1271 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1272 CP->getAlignment()); 1273 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1274} 1275 1276SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { 1277 MachineFunction &MF = DAG.getMachineFunction(); 1278 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1279 unsigned ARMPCLabelIndex = 0; 1280 DebugLoc DL = Op.getDebugLoc(); 1281 EVT PtrVT = getPointerTy(); 1282 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1283 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1284 SDValue CPAddr; 1285 if (RelocM == Reloc::Static) { 1286 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1287 } else { 1288 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1289 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1290 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1291 ARMCP::CPBlockAddress, 1292 PCAdj); 1293 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1294 } 1295 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1296 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1297 PseudoSourceValue::getConstantPool(), 0, 1298 false, false, 0); 1299 if (RelocM == Reloc::Static) 1300 return Result; 1301 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1302 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1303} 1304 1305// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1306SDValue 1307ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1308 SelectionDAG &DAG) { 1309 DebugLoc dl = GA->getDebugLoc(); 1310 EVT PtrVT = getPointerTy(); 1311 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1312 MachineFunction &MF = DAG.getMachineFunction(); 1313 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1314 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1315 ARMConstantPoolValue *CPV = 1316 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1317 ARMCP::CPValue, PCAdj, "tlsgd", true); 1318 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1319 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1320 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1321 PseudoSourceValue::getConstantPool(), 0, 1322 false, false, 0); 1323 SDValue Chain = Argument.getValue(1); 1324 1325 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1326 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1327 1328 // call __tls_get_addr. 1329 ArgListTy Args; 1330 ArgListEntry Entry; 1331 Entry.Node = Argument; 1332 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1333 Args.push_back(Entry); 1334 // FIXME: is there useful debug info available here? 1335 std::pair<SDValue, SDValue> CallResult = 1336 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1337 false, false, false, false, 1338 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1339 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1340 return CallResult.first; 1341} 1342 1343// Lower ISD::GlobalTLSAddress using the "initial exec" or 1344// "local exec" model. 1345SDValue 1346ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1347 SelectionDAG &DAG) { 1348 const GlobalValue *GV = GA->getGlobal(); 1349 DebugLoc dl = GA->getDebugLoc(); 1350 SDValue Offset; 1351 SDValue Chain = DAG.getEntryNode(); 1352 EVT PtrVT = getPointerTy(); 1353 // Get the Thread Pointer 1354 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1355 1356 if (GV->isDeclaration()) { 1357 MachineFunction &MF = DAG.getMachineFunction(); 1358 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1359 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1360 // Initial exec model. 1361 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1362 ARMConstantPoolValue *CPV = 1363 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1364 ARMCP::CPValue, PCAdj, "gottpoff", true); 1365 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1366 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1367 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1368 PseudoSourceValue::getConstantPool(), 0, 1369 false, false, 0); 1370 Chain = Offset.getValue(1); 1371 1372 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1373 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1374 1375 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1376 PseudoSourceValue::getConstantPool(), 0, 1377 false, false, 0); 1378 } else { 1379 // local exec model 1380 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1381 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1382 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1383 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1384 PseudoSourceValue::getConstantPool(), 0, 1385 false, false, 0); 1386 } 1387 1388 // The address of the thread local variable is the add of the thread 1389 // pointer with the offset of the variable. 1390 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1391} 1392 1393SDValue 1394ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 1395 // TODO: implement the "local dynamic" model 1396 assert(Subtarget->isTargetELF() && 1397 "TLS not implemented for non-ELF targets"); 1398 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1399 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1400 // otherwise use the "Local Exec" TLS Model 1401 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1402 return LowerToTLSGeneralDynamicModel(GA, DAG); 1403 else 1404 return LowerToTLSExecModels(GA, DAG); 1405} 1406 1407SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1408 SelectionDAG &DAG) { 1409 EVT PtrVT = getPointerTy(); 1410 DebugLoc dl = Op.getDebugLoc(); 1411 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1412 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1413 if (RelocM == Reloc::PIC_) { 1414 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1415 ARMConstantPoolValue *CPV = 1416 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1417 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1418 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1419 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1420 CPAddr, 1421 PseudoSourceValue::getConstantPool(), 0, 1422 false, false, 0); 1423 SDValue Chain = Result.getValue(1); 1424 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1425 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1426 if (!UseGOTOFF) 1427 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1428 PseudoSourceValue::getGOT(), 0, 1429 false, false, 0); 1430 return Result; 1431 } else { 1432 // If we have T2 ops, we can materialize the address directly via movt/movw 1433 // pair. This is always cheaper. 1434 if (Subtarget->useMovt()) { 1435 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1436 DAG.getTargetGlobalAddress(GV, PtrVT)); 1437 } else { 1438 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1439 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1440 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1441 PseudoSourceValue::getConstantPool(), 0, 1442 false, false, 0); 1443 } 1444 } 1445} 1446 1447SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1448 SelectionDAG &DAG) { 1449 MachineFunction &MF = DAG.getMachineFunction(); 1450 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1451 unsigned ARMPCLabelIndex = 0; 1452 EVT PtrVT = getPointerTy(); 1453 DebugLoc dl = Op.getDebugLoc(); 1454 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1455 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1456 SDValue CPAddr; 1457 if (RelocM == Reloc::Static) 1458 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1459 else { 1460 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1461 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1462 ARMConstantPoolValue *CPV = 1463 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1464 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1465 } 1466 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1467 1468 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1469 PseudoSourceValue::getConstantPool(), 0, 1470 false, false, 0); 1471 SDValue Chain = Result.getValue(1); 1472 1473 if (RelocM == Reloc::PIC_) { 1474 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1475 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1476 } 1477 1478 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1479 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1480 PseudoSourceValue::getGOT(), 0, 1481 false, false, 0); 1482 1483 return Result; 1484} 1485 1486SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1487 SelectionDAG &DAG){ 1488 assert(Subtarget->isTargetELF() && 1489 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1490 MachineFunction &MF = DAG.getMachineFunction(); 1491 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1492 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1493 EVT PtrVT = getPointerTy(); 1494 DebugLoc dl = Op.getDebugLoc(); 1495 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1496 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1497 "_GLOBAL_OFFSET_TABLE_", 1498 ARMPCLabelIndex, PCAdj); 1499 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1500 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1501 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1502 PseudoSourceValue::getConstantPool(), 0, 1503 false, false, 0); 1504 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1505 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1506} 1507 1508SDValue 1509ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 1510 const ARMSubtarget *Subtarget) { 1511 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1512 DebugLoc dl = Op.getDebugLoc(); 1513 switch (IntNo) { 1514 default: return SDValue(); // Don't custom lower most intrinsics. 1515 case Intrinsic::arm_thread_pointer: { 1516 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1517 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1518 } 1519 case Intrinsic::eh_sjlj_lsda: { 1520 MachineFunction &MF = DAG.getMachineFunction(); 1521 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1522 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1523 EVT PtrVT = getPointerTy(); 1524 DebugLoc dl = Op.getDebugLoc(); 1525 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1526 SDValue CPAddr; 1527 unsigned PCAdj = (RelocM != Reloc::PIC_) 1528 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1529 ARMConstantPoolValue *CPV = 1530 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1531 ARMCP::CPLSDA, PCAdj); 1532 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1533 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1534 SDValue Result = 1535 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1536 PseudoSourceValue::getConstantPool(), 0, 1537 false, false, 0); 1538 SDValue Chain = Result.getValue(1); 1539 1540 if (RelocM == Reloc::PIC_) { 1541 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1542 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1543 } 1544 return Result; 1545 } 1546 case Intrinsic::eh_sjlj_setjmp: 1547 SDValue Val = Subtarget->isThumb() ? 1548 DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : 1549 DAG.getConstant(0, MVT::i32); 1550 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1), 1551 Val); 1552 } 1553} 1554 1555static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 1556 const ARMSubtarget *Subtarget) { 1557 DebugLoc dl = Op.getDebugLoc(); 1558 SDValue Op5 = Op.getOperand(5); 1559 SDValue Res; 1560 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); 1561 if (isDeviceBarrier) { 1562 if (Subtarget->hasV7Ops()) 1563 Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); 1564 else 1565 Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), 1566 DAG.getConstant(0, MVT::i32)); 1567 } else { 1568 if (Subtarget->hasV7Ops()) 1569 Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 1570 else 1571 Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 1572 DAG.getConstant(0, MVT::i32)); 1573 } 1574 return Res; 1575} 1576 1577static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 1578 MachineFunction &MF = DAG.getMachineFunction(); 1579 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 1580 1581 // vastart just stores the address of the VarArgsFrameIndex slot into the 1582 // memory location argument. 1583 DebugLoc dl = Op.getDebugLoc(); 1584 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1585 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1586 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1587 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, 1588 false, false, 0); 1589} 1590 1591SDValue 1592ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { 1593 SDNode *Node = Op.getNode(); 1594 DebugLoc dl = Node->getDebugLoc(); 1595 EVT VT = Node->getValueType(0); 1596 SDValue Chain = Op.getOperand(0); 1597 SDValue Size = Op.getOperand(1); 1598 SDValue Align = Op.getOperand(2); 1599 1600 // Chain the dynamic stack allocation so that it doesn't modify the stack 1601 // pointer when other instructions are using the stack. 1602 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); 1603 1604 unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue(); 1605 unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); 1606 if (AlignVal > StackAlign) 1607 // Do this now since selection pass cannot introduce new target 1608 // independent node. 1609 Align = DAG.getConstant(-(uint64_t)AlignVal, VT); 1610 1611 // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up 1612 // using a "add r, sp, r" instead. Negate the size now so we don't have to 1613 // do even more horrible hack later. 1614 MachineFunction &MF = DAG.getMachineFunction(); 1615 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1616 if (AFI->isThumb1OnlyFunction()) { 1617 bool Negate = true; 1618 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size); 1619 if (C) { 1620 uint32_t Val = C->getZExtValue(); 1621 if (Val <= 508 && ((Val & 3) == 0)) 1622 Negate = false; 1623 } 1624 if (Negate) 1625 Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); 1626 } 1627 1628 SDVTList VTList = DAG.getVTList(VT, MVT::Other); 1629 SDValue Ops1[] = { Chain, Size, Align }; 1630 SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); 1631 Chain = Res.getValue(1); 1632 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), 1633 DAG.getIntPtrConstant(0, true), SDValue()); 1634 SDValue Ops2[] = { Res, Chain }; 1635 return DAG.getMergeValues(Ops2, 2, dl); 1636} 1637 1638SDValue 1639ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 1640 SDValue &Root, SelectionDAG &DAG, 1641 DebugLoc dl) { 1642 MachineFunction &MF = DAG.getMachineFunction(); 1643 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1644 1645 TargetRegisterClass *RC; 1646 if (AFI->isThumb1OnlyFunction()) 1647 RC = ARM::tGPRRegisterClass; 1648 else 1649 RC = ARM::GPRRegisterClass; 1650 1651 // Transform the arguments stored in physical registers into virtual ones. 1652 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1653 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 1654 1655 SDValue ArgValue2; 1656 if (NextVA.isMemLoc()) { 1657 MachineFrameInfo *MFI = MF.getFrameInfo(); 1658 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); 1659 1660 // Create load node to retrieve arguments from the stack. 1661 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1662 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 1663 PseudoSourceValue::getFixedStack(FI), 0, 1664 false, false, 0); 1665 } else { 1666 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 1667 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 1668 } 1669 1670 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 1671} 1672 1673SDValue 1674ARMTargetLowering::LowerFormalArguments(SDValue Chain, 1675 CallingConv::ID CallConv, bool isVarArg, 1676 const SmallVectorImpl<ISD::InputArg> 1677 &Ins, 1678 DebugLoc dl, SelectionDAG &DAG, 1679 SmallVectorImpl<SDValue> &InVals) { 1680 1681 MachineFunction &MF = DAG.getMachineFunction(); 1682 MachineFrameInfo *MFI = MF.getFrameInfo(); 1683 1684 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1685 1686 // Assign locations to all of the incoming arguments. 1687 SmallVector<CCValAssign, 16> ArgLocs; 1688 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1689 *DAG.getContext()); 1690 CCInfo.AnalyzeFormalArguments(Ins, 1691 CCAssignFnForNode(CallConv, /* Return*/ false, 1692 isVarArg)); 1693 1694 SmallVector<SDValue, 16> ArgValues; 1695 1696 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1697 CCValAssign &VA = ArgLocs[i]; 1698 1699 // Arguments stored in registers. 1700 if (VA.isRegLoc()) { 1701 EVT RegVT = VA.getLocVT(); 1702 1703 SDValue ArgValue; 1704 if (VA.needsCustom()) { 1705 // f64 and vector types are split up into multiple registers or 1706 // combinations of registers and stack slots. 1707 if (VA.getLocVT() == MVT::v2f64) { 1708 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 1709 Chain, DAG, dl); 1710 VA = ArgLocs[++i]; // skip ahead to next loc 1711 SDValue ArgValue2; 1712 if (VA.isMemLoc()) { 1713 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), 1714 true, false); 1715 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1716 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 1717 PseudoSourceValue::getFixedStack(FI), 0, 1718 false, false, 0); 1719 } else { 1720 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 1721 Chain, DAG, dl); 1722 } 1723 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1724 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 1725 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 1726 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 1727 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 1728 } else 1729 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 1730 1731 } else { 1732 TargetRegisterClass *RC; 1733 1734 if (RegVT == MVT::f32) 1735 RC = ARM::SPRRegisterClass; 1736 else if (RegVT == MVT::f64) 1737 RC = ARM::DPRRegisterClass; 1738 else if (RegVT == MVT::v2f64) 1739 RC = ARM::QPRRegisterClass; 1740 else if (RegVT == MVT::i32) 1741 RC = (AFI->isThumb1OnlyFunction() ? 1742 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 1743 else 1744 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 1745 1746 // Transform the arguments in physical registers into virtual ones. 1747 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1748 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1749 } 1750 1751 // If this is an 8 or 16-bit value, it is really passed promoted 1752 // to 32 bits. Insert an assert[sz]ext to capture this, then 1753 // truncate to the right size. 1754 switch (VA.getLocInfo()) { 1755 default: llvm_unreachable("Unknown loc info!"); 1756 case CCValAssign::Full: break; 1757 case CCValAssign::BCvt: 1758 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 1759 break; 1760 case CCValAssign::SExt: 1761 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1762 DAG.getValueType(VA.getValVT())); 1763 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1764 break; 1765 case CCValAssign::ZExt: 1766 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1767 DAG.getValueType(VA.getValVT())); 1768 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1769 break; 1770 } 1771 1772 InVals.push_back(ArgValue); 1773 1774 } else { // VA.isRegLoc() 1775 1776 // sanity check 1777 assert(VA.isMemLoc()); 1778 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 1779 1780 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 1781 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 1782 true, false); 1783 1784 // Create load nodes to retrieve arguments from the stack. 1785 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1786 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 1787 PseudoSourceValue::getFixedStack(FI), 0, 1788 false, false, 0)); 1789 } 1790 } 1791 1792 // varargs 1793 if (isVarArg) { 1794 static const unsigned GPRArgRegs[] = { 1795 ARM::R0, ARM::R1, ARM::R2, ARM::R3 1796 }; 1797 1798 unsigned NumGPRs = CCInfo.getFirstUnallocated 1799 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 1800 1801 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 1802 unsigned VARegSize = (4 - NumGPRs) * 4; 1803 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 1804 unsigned ArgOffset = CCInfo.getNextStackOffset(); 1805 if (VARegSaveSize) { 1806 // If this function is vararg, store any remaining integer argument regs 1807 // to their spots on the stack so that they may be loaded by deferencing 1808 // the result of va_next. 1809 AFI->setVarArgsRegSaveSize(VARegSaveSize); 1810 AFI->setVarArgsFrameIndex( 1811 MFI->CreateFixedObject(VARegSaveSize, 1812 ArgOffset + VARegSaveSize - VARegSize, 1813 true, false)); 1814 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 1815 getPointerTy()); 1816 1817 SmallVector<SDValue, 4> MemOps; 1818 for (; NumGPRs < 4; ++NumGPRs) { 1819 TargetRegisterClass *RC; 1820 if (AFI->isThumb1OnlyFunction()) 1821 RC = ARM::tGPRRegisterClass; 1822 else 1823 RC = ARM::GPRRegisterClass; 1824 1825 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 1826 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 1827 SDValue Store = 1828 DAG.getStore(Val.getValue(1), dl, Val, FIN, 1829 PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, 1830 false, false, 0); 1831 MemOps.push_back(Store); 1832 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 1833 DAG.getConstant(4, getPointerTy())); 1834 } 1835 if (!MemOps.empty()) 1836 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1837 &MemOps[0], MemOps.size()); 1838 } else 1839 // This will point to the next argument passed via stack. 1840 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, 1841 true, false)); 1842 } 1843 1844 return Chain; 1845} 1846 1847/// isFloatingPointZero - Return true if this is +0.0. 1848static bool isFloatingPointZero(SDValue Op) { 1849 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1850 return CFP->getValueAPF().isPosZero(); 1851 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1852 // Maybe this has already been legalized into the constant pool? 1853 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 1854 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 1855 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 1856 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1857 return CFP->getValueAPF().isPosZero(); 1858 } 1859 } 1860 return false; 1861} 1862 1863/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 1864/// the given operands. 1865SDValue 1866ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 1867 SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { 1868 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 1869 unsigned C = RHSC->getZExtValue(); 1870 if (!isLegalICmpImmediate(C)) { 1871 // Constant does not fit, try adjusting it by one? 1872 switch (CC) { 1873 default: break; 1874 case ISD::SETLT: 1875 case ISD::SETGE: 1876 if (isLegalICmpImmediate(C-1)) { 1877 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 1878 RHS = DAG.getConstant(C-1, MVT::i32); 1879 } 1880 break; 1881 case ISD::SETULT: 1882 case ISD::SETUGE: 1883 if (C > 0 && isLegalICmpImmediate(C-1)) { 1884 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 1885 RHS = DAG.getConstant(C-1, MVT::i32); 1886 } 1887 break; 1888 case ISD::SETLE: 1889 case ISD::SETGT: 1890 if (isLegalICmpImmediate(C+1)) { 1891 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 1892 RHS = DAG.getConstant(C+1, MVT::i32); 1893 } 1894 break; 1895 case ISD::SETULE: 1896 case ISD::SETUGT: 1897 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { 1898 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 1899 RHS = DAG.getConstant(C+1, MVT::i32); 1900 } 1901 break; 1902 } 1903 } 1904 } 1905 1906 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 1907 ARMISD::NodeType CompareType; 1908 switch (CondCode) { 1909 default: 1910 CompareType = ARMISD::CMP; 1911 break; 1912 case ARMCC::EQ: 1913 case ARMCC::NE: 1914 // Uses only Z Flag 1915 CompareType = ARMISD::CMPZ; 1916 break; 1917 } 1918 ARMCC = DAG.getConstant(CondCode, MVT::i32); 1919 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 1920} 1921 1922/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 1923static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 1924 DebugLoc dl) { 1925 SDValue Cmp; 1926 if (!isFloatingPointZero(RHS)) 1927 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 1928 else 1929 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 1930 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 1931} 1932 1933SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { 1934 EVT VT = Op.getValueType(); 1935 SDValue LHS = Op.getOperand(0); 1936 SDValue RHS = Op.getOperand(1); 1937 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 1938 SDValue TrueVal = Op.getOperand(2); 1939 SDValue FalseVal = Op.getOperand(3); 1940 DebugLoc dl = Op.getDebugLoc(); 1941 1942 if (LHS.getValueType() == MVT::i32) { 1943 SDValue ARMCC; 1944 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1945 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); 1946 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); 1947 } 1948 1949 ARMCC::CondCodes CondCode, CondCode2; 1950 FPCCToARMCC(CC, CondCode, CondCode2); 1951 1952 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); 1953 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1954 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 1955 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 1956 ARMCC, CCR, Cmp); 1957 if (CondCode2 != ARMCC::AL) { 1958 SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); 1959 // FIXME: Needs another CMP because flag can have but one use. 1960 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 1961 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 1962 Result, TrueVal, ARMCC2, CCR, Cmp2); 1963 } 1964 return Result; 1965} 1966 1967SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { 1968 SDValue Chain = Op.getOperand(0); 1969 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 1970 SDValue LHS = Op.getOperand(2); 1971 SDValue RHS = Op.getOperand(3); 1972 SDValue Dest = Op.getOperand(4); 1973 DebugLoc dl = Op.getDebugLoc(); 1974 1975 if (LHS.getValueType() == MVT::i32) { 1976 SDValue ARMCC; 1977 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1978 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); 1979 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 1980 Chain, Dest, ARMCC, CCR,Cmp); 1981 } 1982 1983 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 1984 ARMCC::CondCodes CondCode, CondCode2; 1985 FPCCToARMCC(CC, CondCode, CondCode2); 1986 1987 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 1988 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); 1989 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1990 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 1991 SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; 1992 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 1993 if (CondCode2 != ARMCC::AL) { 1994 ARMCC = DAG.getConstant(CondCode2, MVT::i32); 1995 SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; 1996 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 1997 } 1998 return Res; 1999} 2000 2001SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { 2002 SDValue Chain = Op.getOperand(0); 2003 SDValue Table = Op.getOperand(1); 2004 SDValue Index = Op.getOperand(2); 2005 DebugLoc dl = Op.getDebugLoc(); 2006 2007 EVT PTy = getPointerTy(); 2008 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2009 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2010 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2011 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2012 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2013 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2014 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2015 if (Subtarget->isThumb2()) { 2016 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2017 // which does another jump to the destination. This also makes it easier 2018 // to translate it to TBB / TBH later. 2019 // FIXME: This might not work if the function is extremely large. 2020 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2021 Addr, Op.getOperand(2), JTI, UId); 2022 } 2023 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2024 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2025 PseudoSourceValue::getJumpTable(), 0, 2026 false, false, 0); 2027 Chain = Addr.getValue(1); 2028 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2029 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2030 } else { 2031 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2032 PseudoSourceValue::getJumpTable(), 0, false, false, 0); 2033 Chain = Addr.getValue(1); 2034 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2035 } 2036} 2037 2038static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2039 DebugLoc dl = Op.getDebugLoc(); 2040 unsigned Opc; 2041 2042 switch (Op.getOpcode()) { 2043 default: 2044 assert(0 && "Invalid opcode!"); 2045 case ISD::FP_TO_SINT: 2046 Opc = ARMISD::FTOSI; 2047 break; 2048 case ISD::FP_TO_UINT: 2049 Opc = ARMISD::FTOUI; 2050 break; 2051 } 2052 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2053 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 2054} 2055 2056static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2057 EVT VT = Op.getValueType(); 2058 DebugLoc dl = Op.getDebugLoc(); 2059 unsigned Opc; 2060 2061 switch (Op.getOpcode()) { 2062 default: 2063 assert(0 && "Invalid opcode!"); 2064 case ISD::SINT_TO_FP: 2065 Opc = ARMISD::SITOF; 2066 break; 2067 case ISD::UINT_TO_FP: 2068 Opc = ARMISD::UITOF; 2069 break; 2070 } 2071 2072 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 2073 return DAG.getNode(Opc, dl, VT, Op); 2074} 2075 2076static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 2077 // Implement fcopysign with a fabs and a conditional fneg. 2078 SDValue Tmp0 = Op.getOperand(0); 2079 SDValue Tmp1 = Op.getOperand(1); 2080 DebugLoc dl = Op.getDebugLoc(); 2081 EVT VT = Op.getValueType(); 2082 EVT SrcVT = Tmp1.getValueType(); 2083 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 2084 SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); 2085 SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); 2086 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2087 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); 2088} 2089 2090SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 2091 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 2092 MFI->setFrameAddressIsTaken(true); 2093 EVT VT = Op.getValueType(); 2094 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 2095 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2096 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 2097 ? ARM::R7 : ARM::R11; 2098 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 2099 while (Depth--) 2100 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, 2101 false, false, 0); 2102 return FrameAddr; 2103} 2104 2105SDValue 2106ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, 2107 SDValue Chain, 2108 SDValue Dst, SDValue Src, 2109 SDValue Size, unsigned Align, 2110 bool isVolatile, bool AlwaysInline, 2111 const Value *DstSV, uint64_t DstSVOff, 2112 const Value *SrcSV, uint64_t SrcSVOff){ 2113 // Do repeated 4-byte loads and stores. To be improved. 2114 // This requires 4-byte alignment. 2115 if ((Align & 3) != 0) 2116 return SDValue(); 2117 // This requires the copy size to be a constant, preferrably 2118 // within a subtarget-specific limit. 2119 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 2120 if (!ConstantSize) 2121 return SDValue(); 2122 uint64_t SizeVal = ConstantSize->getZExtValue(); 2123 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 2124 return SDValue(); 2125 2126 unsigned BytesLeft = SizeVal & 3; 2127 unsigned NumMemOps = SizeVal >> 2; 2128 unsigned EmittedNumMemOps = 0; 2129 EVT VT = MVT::i32; 2130 unsigned VTSize = 4; 2131 unsigned i = 0; 2132 const unsigned MAX_LOADS_IN_LDM = 6; 2133 SDValue TFOps[MAX_LOADS_IN_LDM]; 2134 SDValue Loads[MAX_LOADS_IN_LDM]; 2135 uint64_t SrcOff = 0, DstOff = 0; 2136 2137 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the 2138 // same number of stores. The loads and stores will get combined into 2139 // ldm/stm later on. 2140 while (EmittedNumMemOps < NumMemOps) { 2141 for (i = 0; 2142 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 2143 Loads[i] = DAG.getLoad(VT, dl, Chain, 2144 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 2145 DAG.getConstant(SrcOff, MVT::i32)), 2146 SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); 2147 TFOps[i] = Loads[i].getValue(1); 2148 SrcOff += VTSize; 2149 } 2150 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2151 2152 for (i = 0; 2153 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 2154 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 2155 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 2156 DAG.getConstant(DstOff, MVT::i32)), 2157 DstSV, DstSVOff + DstOff, isVolatile, false, 0); 2158 DstOff += VTSize; 2159 } 2160 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2161 2162 EmittedNumMemOps += i; 2163 } 2164 2165 if (BytesLeft == 0) 2166 return Chain; 2167 2168 // Issue loads / stores for the trailing (1 - 3) bytes. 2169 unsigned BytesLeftSave = BytesLeft; 2170 i = 0; 2171 while (BytesLeft) { 2172 if (BytesLeft >= 2) { 2173 VT = MVT::i16; 2174 VTSize = 2; 2175 } else { 2176 VT = MVT::i8; 2177 VTSize = 1; 2178 } 2179 2180 Loads[i] = DAG.getLoad(VT, dl, Chain, 2181 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 2182 DAG.getConstant(SrcOff, MVT::i32)), 2183 SrcSV, SrcSVOff + SrcOff, false, false, 0); 2184 TFOps[i] = Loads[i].getValue(1); 2185 ++i; 2186 SrcOff += VTSize; 2187 BytesLeft -= VTSize; 2188 } 2189 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2190 2191 i = 0; 2192 BytesLeft = BytesLeftSave; 2193 while (BytesLeft) { 2194 if (BytesLeft >= 2) { 2195 VT = MVT::i16; 2196 VTSize = 2; 2197 } else { 2198 VT = MVT::i8; 2199 VTSize = 1; 2200 } 2201 2202 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 2203 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 2204 DAG.getConstant(DstOff, MVT::i32)), 2205 DstSV, DstSVOff + DstOff, false, false, 0); 2206 ++i; 2207 DstOff += VTSize; 2208 BytesLeft -= VTSize; 2209 } 2210 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2211} 2212 2213/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to 2214/// expand a bit convert where either the source or destination type is i64 to 2215/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 2216/// operand type is illegal (e.g., v2f32 for a target that doesn't support 2217/// vectors), since the legalizer won't know what to do with that. 2218static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2219 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2220 DebugLoc dl = N->getDebugLoc(); 2221 SDValue Op = N->getOperand(0); 2222 2223 // This function is only supposed to be called for i64 types, either as the 2224 // source or destination of the bit convert. 2225 EVT SrcVT = Op.getValueType(); 2226 EVT DstVT = N->getValueType(0); 2227 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 2228 "ExpandBIT_CONVERT called for non-i64 type"); 2229 2230 // Turn i64->f64 into VMOVDRR. 2231 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 2232 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2233 DAG.getConstant(0, MVT::i32)); 2234 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2235 DAG.getConstant(1, MVT::i32)); 2236 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 2237 } 2238 2239 // Turn f64->i64 into VMOVRRD. 2240 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 2241 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2242 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2243 // Merge the pieces into a single i64 value. 2244 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2245 } 2246 2247 return SDValue(); 2248} 2249 2250/// getZeroVector - Returns a vector of specified type with all zero elements. 2251/// 2252static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2253 assert(VT.isVector() && "Expected a vector type"); 2254 2255 // Zero vectors are used to represent vector negation and in those cases 2256 // will be implemented with the NEON VNEG instruction. However, VNEG does 2257 // not support i64 elements, so sometimes the zero vectors will need to be 2258 // explicitly constructed. For those cases, and potentially other uses in 2259 // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted 2260 // to their dest type. This ensures they get CSE'd. 2261 SDValue Vec; 2262 SDValue Cst = DAG.getTargetConstant(0, MVT::i8); 2263 SmallVector<SDValue, 8> Ops; 2264 MVT TVT; 2265 2266 if (VT.getSizeInBits() == 64) { 2267 Ops.assign(8, Cst); TVT = MVT::v8i8; 2268 } else { 2269 Ops.assign(16, Cst); TVT = MVT::v16i8; 2270 } 2271 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); 2272 2273 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 2274} 2275 2276/// getOnesVector - Returns a vector of specified type with all bits set. 2277/// 2278static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2279 assert(VT.isVector() && "Expected a vector type"); 2280 2281 // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their 2282 // dest type. This ensures they get CSE'd. 2283 SDValue Vec; 2284 SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); 2285 SmallVector<SDValue, 8> Ops; 2286 MVT TVT; 2287 2288 if (VT.getSizeInBits() == 64) { 2289 Ops.assign(8, Cst); TVT = MVT::v8i8; 2290 } else { 2291 Ops.assign(16, Cst); TVT = MVT::v16i8; 2292 } 2293 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); 2294 2295 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 2296} 2297 2298/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2299/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2300SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { 2301 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2302 EVT VT = Op.getValueType(); 2303 unsigned VTBits = VT.getSizeInBits(); 2304 DebugLoc dl = Op.getDebugLoc(); 2305 SDValue ShOpLo = Op.getOperand(0); 2306 SDValue ShOpHi = Op.getOperand(1); 2307 SDValue ShAmt = Op.getOperand(2); 2308 SDValue ARMCC; 2309 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2310 2311 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2312 2313 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2314 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2315 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2316 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2317 DAG.getConstant(VTBits, MVT::i32)); 2318 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2319 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2320 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2321 2322 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2323 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2324 ARMCC, DAG, dl); 2325 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2326 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, 2327 CCR, Cmp); 2328 2329 SDValue Ops[2] = { Lo, Hi }; 2330 return DAG.getMergeValues(Ops, 2, dl); 2331} 2332 2333/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2334/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2335SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { 2336 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2337 EVT VT = Op.getValueType(); 2338 unsigned VTBits = VT.getSizeInBits(); 2339 DebugLoc dl = Op.getDebugLoc(); 2340 SDValue ShOpLo = Op.getOperand(0); 2341 SDValue ShOpHi = Op.getOperand(1); 2342 SDValue ShAmt = Op.getOperand(2); 2343 SDValue ARMCC; 2344 2345 assert(Op.getOpcode() == ISD::SHL_PARTS); 2346 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2347 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2348 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2349 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2350 DAG.getConstant(VTBits, MVT::i32)); 2351 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2352 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2353 2354 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2355 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2356 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2357 ARMCC, DAG, dl); 2358 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2359 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, 2360 CCR, Cmp); 2361 2362 SDValue Ops[2] = { Lo, Hi }; 2363 return DAG.getMergeValues(Ops, 2, dl); 2364} 2365 2366static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 2367 const ARMSubtarget *ST) { 2368 EVT VT = N->getValueType(0); 2369 DebugLoc dl = N->getDebugLoc(); 2370 2371 if (!ST->hasV6T2Ops()) 2372 return SDValue(); 2373 2374 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 2375 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 2376} 2377 2378static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2379 const ARMSubtarget *ST) { 2380 EVT VT = N->getValueType(0); 2381 DebugLoc dl = N->getDebugLoc(); 2382 2383 // Lower vector shifts on NEON to use VSHL. 2384 if (VT.isVector()) { 2385 assert(ST->hasNEON() && "unexpected vector shift"); 2386 2387 // Left shifts translate directly to the vshiftu intrinsic. 2388 if (N->getOpcode() == ISD::SHL) 2389 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2390 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2391 N->getOperand(0), N->getOperand(1)); 2392 2393 assert((N->getOpcode() == ISD::SRA || 2394 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2395 2396 // NEON uses the same intrinsics for both left and right shifts. For 2397 // right shifts, the shift amounts are negative, so negate the vector of 2398 // shift amounts. 2399 EVT ShiftVT = N->getOperand(1).getValueType(); 2400 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2401 getZeroVector(ShiftVT, DAG, dl), 2402 N->getOperand(1)); 2403 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2404 Intrinsic::arm_neon_vshifts : 2405 Intrinsic::arm_neon_vshiftu); 2406 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2407 DAG.getConstant(vshiftInt, MVT::i32), 2408 N->getOperand(0), NegatedCount); 2409 } 2410 2411 // We can get here for a node like i32 = ISD::SHL i32, i64 2412 if (VT != MVT::i64) 2413 return SDValue(); 2414 2415 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2416 "Unknown shift to lower!"); 2417 2418 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2419 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2420 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2421 return SDValue(); 2422 2423 // If we are in thumb mode, we don't have RRX. 2424 if (ST->isThumb1Only()) return SDValue(); 2425 2426 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2427 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2428 DAG.getConstant(0, MVT::i32)); 2429 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2430 DAG.getConstant(1, MVT::i32)); 2431 2432 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2433 // captures the result into a carry flag. 2434 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2435 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2436 2437 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2438 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2439 2440 // Merge the pieces into a single i64 value. 2441 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2442} 2443 2444static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2445 SDValue TmpOp0, TmpOp1; 2446 bool Invert = false; 2447 bool Swap = false; 2448 unsigned Opc = 0; 2449 2450 SDValue Op0 = Op.getOperand(0); 2451 SDValue Op1 = Op.getOperand(1); 2452 SDValue CC = Op.getOperand(2); 2453 EVT VT = Op.getValueType(); 2454 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2455 DebugLoc dl = Op.getDebugLoc(); 2456 2457 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2458 switch (SetCCOpcode) { 2459 default: llvm_unreachable("Illegal FP comparison"); break; 2460 case ISD::SETUNE: 2461 case ISD::SETNE: Invert = true; // Fallthrough 2462 case ISD::SETOEQ: 2463 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2464 case ISD::SETOLT: 2465 case ISD::SETLT: Swap = true; // Fallthrough 2466 case ISD::SETOGT: 2467 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2468 case ISD::SETOLE: 2469 case ISD::SETLE: Swap = true; // Fallthrough 2470 case ISD::SETOGE: 2471 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2472 case ISD::SETUGE: Swap = true; // Fallthrough 2473 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2474 case ISD::SETUGT: Swap = true; // Fallthrough 2475 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2476 case ISD::SETUEQ: Invert = true; // Fallthrough 2477 case ISD::SETONE: 2478 // Expand this to (OLT | OGT). 2479 TmpOp0 = Op0; 2480 TmpOp1 = Op1; 2481 Opc = ISD::OR; 2482 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2483 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2484 break; 2485 case ISD::SETUO: Invert = true; // Fallthrough 2486 case ISD::SETO: 2487 // Expand this to (OLT | OGE). 2488 TmpOp0 = Op0; 2489 TmpOp1 = Op1; 2490 Opc = ISD::OR; 2491 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2492 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2493 break; 2494 } 2495 } else { 2496 // Integer comparisons. 2497 switch (SetCCOpcode) { 2498 default: llvm_unreachable("Illegal integer comparison"); break; 2499 case ISD::SETNE: Invert = true; 2500 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2501 case ISD::SETLT: Swap = true; 2502 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2503 case ISD::SETLE: Swap = true; 2504 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2505 case ISD::SETULT: Swap = true; 2506 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2507 case ISD::SETULE: Swap = true; 2508 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2509 } 2510 2511 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2512 if (Opc == ARMISD::VCEQ) { 2513 2514 SDValue AndOp; 2515 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2516 AndOp = Op0; 2517 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 2518 AndOp = Op1; 2519 2520 // Ignore bitconvert. 2521 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 2522 AndOp = AndOp.getOperand(0); 2523 2524 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 2525 Opc = ARMISD::VTST; 2526 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 2527 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 2528 Invert = !Invert; 2529 } 2530 } 2531 } 2532 2533 if (Swap) 2534 std::swap(Op0, Op1); 2535 2536 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 2537 2538 if (Invert) 2539 Result = DAG.getNOT(dl, Result, VT); 2540 2541 return Result; 2542} 2543 2544/// isVMOVSplat - Check if the specified splat value corresponds to an immediate 2545/// VMOV instruction, and if so, return the constant being splatted. 2546static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, 2547 unsigned SplatBitSize, SelectionDAG &DAG) { 2548 switch (SplatBitSize) { 2549 case 8: 2550 // Any 1-byte value is OK. 2551 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 2552 return DAG.getTargetConstant(SplatBits, MVT::i8); 2553 2554 case 16: 2555 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 2556 if ((SplatBits & ~0xff) == 0 || 2557 (SplatBits & ~0xff00) == 0) 2558 return DAG.getTargetConstant(SplatBits, MVT::i16); 2559 break; 2560 2561 case 32: 2562 // NEON's 32-bit VMOV supports splat values where: 2563 // * only one byte is nonzero, or 2564 // * the least significant byte is 0xff and the second byte is nonzero, or 2565 // * the least significant 2 bytes are 0xff and the third is nonzero. 2566 if ((SplatBits & ~0xff) == 0 || 2567 (SplatBits & ~0xff00) == 0 || 2568 (SplatBits & ~0xff0000) == 0 || 2569 (SplatBits & ~0xff000000) == 0) 2570 return DAG.getTargetConstant(SplatBits, MVT::i32); 2571 2572 if ((SplatBits & ~0xffff) == 0 && 2573 ((SplatBits | SplatUndef) & 0xff) == 0xff) 2574 return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); 2575 2576 if ((SplatBits & ~0xffffff) == 0 && 2577 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) 2578 return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); 2579 2580 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 2581 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 2582 // VMOV.I32. A (very) minor optimization would be to replicate the value 2583 // and fall through here to test for a valid 64-bit splat. But, then the 2584 // caller would also need to check and handle the change in size. 2585 break; 2586 2587 case 64: { 2588 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 2589 uint64_t BitMask = 0xff; 2590 uint64_t Val = 0; 2591 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 2592 if (((SplatBits | SplatUndef) & BitMask) == BitMask) 2593 Val |= BitMask; 2594 else if ((SplatBits & BitMask) != 0) 2595 return SDValue(); 2596 BitMask <<= 8; 2597 } 2598 return DAG.getTargetConstant(Val, MVT::i64); 2599 } 2600 2601 default: 2602 llvm_unreachable("unexpected size for isVMOVSplat"); 2603 break; 2604 } 2605 2606 return SDValue(); 2607} 2608 2609/// getVMOVImm - If this is a build_vector of constants which can be 2610/// formed by using a VMOV instruction of the specified element size, 2611/// return the constant being splatted. The ByteSize field indicates the 2612/// number of bytes of each element [1248]. 2613SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 2614 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 2615 APInt SplatBits, SplatUndef; 2616 unsigned SplatBitSize; 2617 bool HasAnyUndefs; 2618 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 2619 HasAnyUndefs, ByteSize * 8)) 2620 return SDValue(); 2621 2622 if (SplatBitSize > ByteSize * 8) 2623 return SDValue(); 2624 2625 return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), 2626 SplatBitSize, DAG); 2627} 2628 2629static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 2630 bool &ReverseVEXT, unsigned &Imm) { 2631 unsigned NumElts = VT.getVectorNumElements(); 2632 ReverseVEXT = false; 2633 Imm = M[0]; 2634 2635 // If this is a VEXT shuffle, the immediate value is the index of the first 2636 // element. The other shuffle indices must be the successive elements after 2637 // the first one. 2638 unsigned ExpectedElt = Imm; 2639 for (unsigned i = 1; i < NumElts; ++i) { 2640 // Increment the expected index. If it wraps around, it may still be 2641 // a VEXT but the source vectors must be swapped. 2642 ExpectedElt += 1; 2643 if (ExpectedElt == NumElts * 2) { 2644 ExpectedElt = 0; 2645 ReverseVEXT = true; 2646 } 2647 2648 if (ExpectedElt != static_cast<unsigned>(M[i])) 2649 return false; 2650 } 2651 2652 // Adjust the index value if the source operands will be swapped. 2653 if (ReverseVEXT) 2654 Imm -= NumElts; 2655 2656 return true; 2657} 2658 2659/// isVREVMask - Check if a vector shuffle corresponds to a VREV 2660/// instruction with the specified blocksize. (The order of the elements 2661/// within each block of the vector is reversed.) 2662static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 2663 unsigned BlockSize) { 2664 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 2665 "Only possible block sizes for VREV are: 16, 32, 64"); 2666 2667 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2668 if (EltSz == 64) 2669 return false; 2670 2671 unsigned NumElts = VT.getVectorNumElements(); 2672 unsigned BlockElts = M[0] + 1; 2673 2674 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 2675 return false; 2676 2677 for (unsigned i = 0; i < NumElts; ++i) { 2678 if ((unsigned) M[i] != 2679 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 2680 return false; 2681 } 2682 2683 return true; 2684} 2685 2686static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 2687 unsigned &WhichResult) { 2688 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2689 if (EltSz == 64) 2690 return false; 2691 2692 unsigned NumElts = VT.getVectorNumElements(); 2693 WhichResult = (M[0] == 0 ? 0 : 1); 2694 for (unsigned i = 0; i < NumElts; i += 2) { 2695 if ((unsigned) M[i] != i + WhichResult || 2696 (unsigned) M[i+1] != i + NumElts + WhichResult) 2697 return false; 2698 } 2699 return true; 2700} 2701 2702/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 2703/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2704/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 2705static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2706 unsigned &WhichResult) { 2707 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2708 if (EltSz == 64) 2709 return false; 2710 2711 unsigned NumElts = VT.getVectorNumElements(); 2712 WhichResult = (M[0] == 0 ? 0 : 1); 2713 for (unsigned i = 0; i < NumElts; i += 2) { 2714 if ((unsigned) M[i] != i + WhichResult || 2715 (unsigned) M[i+1] != i + WhichResult) 2716 return false; 2717 } 2718 return true; 2719} 2720 2721static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 2722 unsigned &WhichResult) { 2723 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2724 if (EltSz == 64) 2725 return false; 2726 2727 unsigned NumElts = VT.getVectorNumElements(); 2728 WhichResult = (M[0] == 0 ? 0 : 1); 2729 for (unsigned i = 0; i != NumElts; ++i) { 2730 if ((unsigned) M[i] != 2 * i + WhichResult) 2731 return false; 2732 } 2733 2734 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2735 if (VT.is64BitVector() && EltSz == 32) 2736 return false; 2737 2738 return true; 2739} 2740 2741/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 2742/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2743/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 2744static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2745 unsigned &WhichResult) { 2746 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2747 if (EltSz == 64) 2748 return false; 2749 2750 unsigned Half = VT.getVectorNumElements() / 2; 2751 WhichResult = (M[0] == 0 ? 0 : 1); 2752 for (unsigned j = 0; j != 2; ++j) { 2753 unsigned Idx = WhichResult; 2754 for (unsigned i = 0; i != Half; ++i) { 2755 if ((unsigned) M[i + j * Half] != Idx) 2756 return false; 2757 Idx += 2; 2758 } 2759 } 2760 2761 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2762 if (VT.is64BitVector() && EltSz == 32) 2763 return false; 2764 2765 return true; 2766} 2767 2768static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 2769 unsigned &WhichResult) { 2770 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2771 if (EltSz == 64) 2772 return false; 2773 2774 unsigned NumElts = VT.getVectorNumElements(); 2775 WhichResult = (M[0] == 0 ? 0 : 1); 2776 unsigned Idx = WhichResult * NumElts / 2; 2777 for (unsigned i = 0; i != NumElts; i += 2) { 2778 if ((unsigned) M[i] != Idx || 2779 (unsigned) M[i+1] != Idx + NumElts) 2780 return false; 2781 Idx += 1; 2782 } 2783 2784 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2785 if (VT.is64BitVector() && EltSz == 32) 2786 return false; 2787 2788 return true; 2789} 2790 2791/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 2792/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2793/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 2794static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2795 unsigned &WhichResult) { 2796 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2797 if (EltSz == 64) 2798 return false; 2799 2800 unsigned NumElts = VT.getVectorNumElements(); 2801 WhichResult = (M[0] == 0 ? 0 : 1); 2802 unsigned Idx = WhichResult * NumElts / 2; 2803 for (unsigned i = 0; i != NumElts; i += 2) { 2804 if ((unsigned) M[i] != Idx || 2805 (unsigned) M[i+1] != Idx) 2806 return false; 2807 Idx += 1; 2808 } 2809 2810 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2811 if (VT.is64BitVector() && EltSz == 32) 2812 return false; 2813 2814 return true; 2815} 2816 2817 2818static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2819 // Canonicalize all-zeros and all-ones vectors. 2820 ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); 2821 if (ConstVal->isNullValue()) 2822 return getZeroVector(VT, DAG, dl); 2823 if (ConstVal->isAllOnesValue()) 2824 return getOnesVector(VT, DAG, dl); 2825 2826 EVT CanonicalVT; 2827 if (VT.is64BitVector()) { 2828 switch (Val.getValueType().getSizeInBits()) { 2829 case 8: CanonicalVT = MVT::v8i8; break; 2830 case 16: CanonicalVT = MVT::v4i16; break; 2831 case 32: CanonicalVT = MVT::v2i32; break; 2832 case 64: CanonicalVT = MVT::v1i64; break; 2833 default: llvm_unreachable("unexpected splat element type"); break; 2834 } 2835 } else { 2836 assert(VT.is128BitVector() && "unknown splat vector size"); 2837 switch (Val.getValueType().getSizeInBits()) { 2838 case 8: CanonicalVT = MVT::v16i8; break; 2839 case 16: CanonicalVT = MVT::v8i16; break; 2840 case 32: CanonicalVT = MVT::v4i32; break; 2841 case 64: CanonicalVT = MVT::v2i64; break; 2842 default: llvm_unreachable("unexpected splat element type"); break; 2843 } 2844 } 2845 2846 // Build a canonical splat for this value. 2847 SmallVector<SDValue, 8> Ops; 2848 Ops.assign(CanonicalVT.getVectorNumElements(), Val); 2849 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], 2850 Ops.size()); 2851 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); 2852} 2853 2854// If this is a case we can't handle, return null and let the default 2855// expansion code take care of it. 2856static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 2857 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 2858 DebugLoc dl = Op.getDebugLoc(); 2859 EVT VT = Op.getValueType(); 2860 2861 APInt SplatBits, SplatUndef; 2862 unsigned SplatBitSize; 2863 bool HasAnyUndefs; 2864 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 2865 if (SplatBitSize <= 64) { 2866 SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), 2867 SplatUndef.getZExtValue(), SplatBitSize, DAG); 2868 if (Val.getNode()) 2869 return BuildSplat(Val, VT, DAG, dl); 2870 } 2871 } 2872 2873 // If there are only 2 elements in a 128-bit vector, insert them into an 2874 // undef vector. This handles the common case for 128-bit vector argument 2875 // passing, where the insertions should be translated to subreg accesses 2876 // with no real instructions. 2877 if (VT.is128BitVector() && Op.getNumOperands() == 2) { 2878 SDValue Val = DAG.getUNDEF(VT); 2879 SDValue Op0 = Op.getOperand(0); 2880 SDValue Op1 = Op.getOperand(1); 2881 if (Op0.getOpcode() != ISD::UNDEF) 2882 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, 2883 DAG.getIntPtrConstant(0)); 2884 if (Op1.getOpcode() != ISD::UNDEF) 2885 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, 2886 DAG.getIntPtrConstant(1)); 2887 return Val; 2888 } 2889 2890 return SDValue(); 2891} 2892 2893/// isShuffleMaskLegal - Targets can use this to indicate that they only 2894/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 2895/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 2896/// are assumed to be legal. 2897bool 2898ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 2899 EVT VT) const { 2900 if (VT.getVectorNumElements() == 4 && 2901 (VT.is128BitVector() || VT.is64BitVector())) { 2902 unsigned PFIndexes[4]; 2903 for (unsigned i = 0; i != 4; ++i) { 2904 if (M[i] < 0) 2905 PFIndexes[i] = 8; 2906 else 2907 PFIndexes[i] = M[i]; 2908 } 2909 2910 // Compute the index in the perfect shuffle table. 2911 unsigned PFTableIndex = 2912 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 2913 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 2914 unsigned Cost = (PFEntry >> 30); 2915 2916 if (Cost <= 4) 2917 return true; 2918 } 2919 2920 bool ReverseVEXT; 2921 unsigned Imm, WhichResult; 2922 2923 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 2924 isVREVMask(M, VT, 64) || 2925 isVREVMask(M, VT, 32) || 2926 isVREVMask(M, VT, 16) || 2927 isVEXTMask(M, VT, ReverseVEXT, Imm) || 2928 isVTRNMask(M, VT, WhichResult) || 2929 isVUZPMask(M, VT, WhichResult) || 2930 isVZIPMask(M, VT, WhichResult) || 2931 isVTRN_v_undef_Mask(M, VT, WhichResult) || 2932 isVUZP_v_undef_Mask(M, VT, WhichResult) || 2933 isVZIP_v_undef_Mask(M, VT, WhichResult)); 2934} 2935 2936/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 2937/// the specified operations to build the shuffle. 2938static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 2939 SDValue RHS, SelectionDAG &DAG, 2940 DebugLoc dl) { 2941 unsigned OpNum = (PFEntry >> 26) & 0x0F; 2942 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 2943 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 2944 2945 enum { 2946 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 2947 OP_VREV, 2948 OP_VDUP0, 2949 OP_VDUP1, 2950 OP_VDUP2, 2951 OP_VDUP3, 2952 OP_VEXT1, 2953 OP_VEXT2, 2954 OP_VEXT3, 2955 OP_VUZPL, // VUZP, left result 2956 OP_VUZPR, // VUZP, right result 2957 OP_VZIPL, // VZIP, left result 2958 OP_VZIPR, // VZIP, right result 2959 OP_VTRNL, // VTRN, left result 2960 OP_VTRNR // VTRN, right result 2961 }; 2962 2963 if (OpNum == OP_COPY) { 2964 if (LHSID == (1*9+2)*9+3) return LHS; 2965 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 2966 return RHS; 2967 } 2968 2969 SDValue OpLHS, OpRHS; 2970 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 2971 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 2972 EVT VT = OpLHS.getValueType(); 2973 2974 switch (OpNum) { 2975 default: llvm_unreachable("Unknown shuffle opcode!"); 2976 case OP_VREV: 2977 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 2978 case OP_VDUP0: 2979 case OP_VDUP1: 2980 case OP_VDUP2: 2981 case OP_VDUP3: 2982 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 2983 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 2984 case OP_VEXT1: 2985 case OP_VEXT2: 2986 case OP_VEXT3: 2987 return DAG.getNode(ARMISD::VEXT, dl, VT, 2988 OpLHS, OpRHS, 2989 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 2990 case OP_VUZPL: 2991 case OP_VUZPR: 2992 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 2993 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 2994 case OP_VZIPL: 2995 case OP_VZIPR: 2996 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 2997 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 2998 case OP_VTRNL: 2999 case OP_VTRNR: 3000 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3001 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 3002 } 3003} 3004 3005static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3006 SDValue V1 = Op.getOperand(0); 3007 SDValue V2 = Op.getOperand(1); 3008 DebugLoc dl = Op.getDebugLoc(); 3009 EVT VT = Op.getValueType(); 3010 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3011 SmallVector<int, 8> ShuffleMask; 3012 3013 // Convert shuffles that are directly supported on NEON to target-specific 3014 // DAG nodes, instead of keeping them as shuffles and matching them again 3015 // during code selection. This is more efficient and avoids the possibility 3016 // of inconsistencies between legalization and selection. 3017 // FIXME: floating-point vectors should be canonicalized to integer vectors 3018 // of the same time so that they get CSEd properly. 3019 SVN->getMask(ShuffleMask); 3020 3021 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 3022 int Lane = SVN->getSplatIndex(); 3023 // If this is undef splat, generate it via "just" vdup, if possible. 3024 if (Lane == -1) Lane = 0; 3025 3026 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 3027 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 3028 } 3029 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 3030 DAG.getConstant(Lane, MVT::i32)); 3031 } 3032 3033 bool ReverseVEXT; 3034 unsigned Imm; 3035 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 3036 if (ReverseVEXT) 3037 std::swap(V1, V2); 3038 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 3039 DAG.getConstant(Imm, MVT::i32)); 3040 } 3041 3042 if (isVREVMask(ShuffleMask, VT, 64)) 3043 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 3044 if (isVREVMask(ShuffleMask, VT, 32)) 3045 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 3046 if (isVREVMask(ShuffleMask, VT, 16)) 3047 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 3048 3049 // Check for Neon shuffles that modify both input vectors in place. 3050 // If both results are used, i.e., if there are two shuffles with the same 3051 // source operands and with masks corresponding to both results of one of 3052 // these operations, DAG memoization will ensure that a single node is 3053 // used for both shuffles. 3054 unsigned WhichResult; 3055 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 3056 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3057 V1, V2).getValue(WhichResult); 3058 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 3059 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3060 V1, V2).getValue(WhichResult); 3061 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 3062 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3063 V1, V2).getValue(WhichResult); 3064 3065 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3066 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3067 V1, V1).getValue(WhichResult); 3068 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3069 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3070 V1, V1).getValue(WhichResult); 3071 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3072 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3073 V1, V1).getValue(WhichResult); 3074 3075 // If the shuffle is not directly supported and it has 4 elements, use 3076 // the PerfectShuffle-generated table to synthesize it from other shuffles. 3077 if (VT.getVectorNumElements() == 4 && 3078 (VT.is128BitVector() || VT.is64BitVector())) { 3079 unsigned PFIndexes[4]; 3080 for (unsigned i = 0; i != 4; ++i) { 3081 if (ShuffleMask[i] < 0) 3082 PFIndexes[i] = 8; 3083 else 3084 PFIndexes[i] = ShuffleMask[i]; 3085 } 3086 3087 // Compute the index in the perfect shuffle table. 3088 unsigned PFTableIndex = 3089 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3090 3091 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3092 unsigned Cost = (PFEntry >> 30); 3093 3094 if (Cost <= 4) 3095 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 3096 } 3097 3098 return SDValue(); 3099} 3100 3101static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 3102 EVT VT = Op.getValueType(); 3103 DebugLoc dl = Op.getDebugLoc(); 3104 SDValue Vec = Op.getOperand(0); 3105 SDValue Lane = Op.getOperand(1); 3106 assert(VT == MVT::i32 && 3107 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 3108 "unexpected type for custom-lowering vector extract"); 3109 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 3110} 3111 3112static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3113 // The only time a CONCAT_VECTORS operation can have legal types is when 3114 // two 64-bit vectors are concatenated to a 128-bit vector. 3115 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 3116 "unexpected CONCAT_VECTORS"); 3117 DebugLoc dl = Op.getDebugLoc(); 3118 SDValue Val = DAG.getUNDEF(MVT::v2f64); 3119 SDValue Op0 = Op.getOperand(0); 3120 SDValue Op1 = Op.getOperand(1); 3121 if (Op0.getOpcode() != ISD::UNDEF) 3122 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3123 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 3124 DAG.getIntPtrConstant(0)); 3125 if (Op1.getOpcode() != ISD::UNDEF) 3126 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3127 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 3128 DAG.getIntPtrConstant(1)); 3129 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 3130} 3131 3132SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 3133 switch (Op.getOpcode()) { 3134 default: llvm_unreachable("Don't know how to custom lower this!"); 3135 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3136 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 3137 case ISD::GlobalAddress: 3138 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 3139 LowerGlobalAddressELF(Op, DAG); 3140 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3141 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3142 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 3143 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 3144 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 3145 case ISD::VASTART: return LowerVASTART(Op, DAG); 3146 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 3147 case ISD::SINT_TO_FP: 3148 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 3149 case ISD::FP_TO_SINT: 3150 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 3151 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 3152 case ISD::RETURNADDR: break; 3153 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3154 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 3155 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 3156 Subtarget); 3157 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 3158 case ISD::SHL: 3159 case ISD::SRL: 3160 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 3161 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 3162 case ISD::SRL_PARTS: 3163 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 3164 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 3165 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 3166 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3167 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3168 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3169 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 3170 } 3171 return SDValue(); 3172} 3173 3174/// ReplaceNodeResults - Replace the results of node with an illegal result 3175/// type with new values built out of custom code. 3176void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3177 SmallVectorImpl<SDValue>&Results, 3178 SelectionDAG &DAG) { 3179 SDValue Res; 3180 switch (N->getOpcode()) { 3181 default: 3182 llvm_unreachable("Don't know how to custom expand this!"); 3183 break; 3184 case ISD::BIT_CONVERT: 3185 Res = ExpandBIT_CONVERT(N, DAG); 3186 break; 3187 case ISD::SRL: 3188 case ISD::SRA: 3189 Res = LowerShift(N, DAG, Subtarget); 3190 break; 3191 } 3192 if (Res.getNode()) 3193 Results.push_back(Res); 3194} 3195 3196//===----------------------------------------------------------------------===// 3197// ARM Scheduler Hooks 3198//===----------------------------------------------------------------------===// 3199 3200MachineBasicBlock * 3201ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 3202 MachineBasicBlock *BB, 3203 unsigned Size) const { 3204 unsigned dest = MI->getOperand(0).getReg(); 3205 unsigned ptr = MI->getOperand(1).getReg(); 3206 unsigned oldval = MI->getOperand(2).getReg(); 3207 unsigned newval = MI->getOperand(3).getReg(); 3208 unsigned scratch = BB->getParent()->getRegInfo() 3209 .createVirtualRegister(ARM::GPRRegisterClass); 3210 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3211 DebugLoc dl = MI->getDebugLoc(); 3212 bool isThumb2 = Subtarget->isThumb2(); 3213 3214 unsigned ldrOpc, strOpc; 3215 switch (Size) { 3216 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3217 case 1: 3218 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3219 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; 3220 break; 3221 case 2: 3222 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3223 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3224 break; 3225 case 4: 3226 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3227 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3228 break; 3229 } 3230 3231 MachineFunction *MF = BB->getParent(); 3232 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3233 MachineFunction::iterator It = BB; 3234 ++It; // insert the new blocks after the current block 3235 3236 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3237 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3238 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3239 MF->insert(It, loop1MBB); 3240 MF->insert(It, loop2MBB); 3241 MF->insert(It, exitMBB); 3242 exitMBB->transferSuccessors(BB); 3243 3244 // thisMBB: 3245 // ... 3246 // fallthrough --> loop1MBB 3247 BB->addSuccessor(loop1MBB); 3248 3249 // loop1MBB: 3250 // ldrex dest, [ptr] 3251 // cmp dest, oldval 3252 // bne exitMBB 3253 BB = loop1MBB; 3254 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3255 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 3256 .addReg(dest).addReg(oldval)); 3257 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3258 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3259 BB->addSuccessor(loop2MBB); 3260 BB->addSuccessor(exitMBB); 3261 3262 // loop2MBB: 3263 // strex scratch, newval, [ptr] 3264 // cmp scratch, #0 3265 // bne loop1MBB 3266 BB = loop2MBB; 3267 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 3268 .addReg(ptr)); 3269 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3270 .addReg(scratch).addImm(0)); 3271 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3272 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3273 BB->addSuccessor(loop1MBB); 3274 BB->addSuccessor(exitMBB); 3275 3276 // exitMBB: 3277 // ... 3278 BB = exitMBB; 3279 3280 MF->DeleteMachineInstr(MI); // The instruction is gone now. 3281 3282 return BB; 3283} 3284 3285MachineBasicBlock * 3286ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3287 unsigned Size, unsigned BinOpcode) const { 3288 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3289 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3290 3291 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3292 MachineFunction *MF = BB->getParent(); 3293 MachineFunction::iterator It = BB; 3294 ++It; 3295 3296 unsigned dest = MI->getOperand(0).getReg(); 3297 unsigned ptr = MI->getOperand(1).getReg(); 3298 unsigned incr = MI->getOperand(2).getReg(); 3299 DebugLoc dl = MI->getDebugLoc(); 3300 3301 bool isThumb2 = Subtarget->isThumb2(); 3302 unsigned ldrOpc, strOpc; 3303 switch (Size) { 3304 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3305 case 1: 3306 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3307 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 3308 break; 3309 case 2: 3310 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3311 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3312 break; 3313 case 4: 3314 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3315 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3316 break; 3317 } 3318 3319 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3320 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3321 MF->insert(It, loopMBB); 3322 MF->insert(It, exitMBB); 3323 exitMBB->transferSuccessors(BB); 3324 3325 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3326 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3327 unsigned scratch2 = (!BinOpcode) ? incr : 3328 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3329 3330 // thisMBB: 3331 // ... 3332 // fallthrough --> loopMBB 3333 BB->addSuccessor(loopMBB); 3334 3335 // loopMBB: 3336 // ldrex dest, ptr 3337 // <binop> scratch2, dest, incr 3338 // strex scratch, scratch2, ptr 3339 // cmp scratch, #0 3340 // bne- loopMBB 3341 // fallthrough --> exitMBB 3342 BB = loopMBB; 3343 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3344 if (BinOpcode) { 3345 // operand order needs to go the other way for NAND 3346 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 3347 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3348 addReg(incr).addReg(dest)).addReg(0); 3349 else 3350 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3351 addReg(dest).addReg(incr)).addReg(0); 3352 } 3353 3354 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 3355 .addReg(ptr)); 3356 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3357 .addReg(scratch).addImm(0)); 3358 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3359 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3360 3361 BB->addSuccessor(loopMBB); 3362 BB->addSuccessor(exitMBB); 3363 3364 // exitMBB: 3365 // ... 3366 BB = exitMBB; 3367 3368 MF->DeleteMachineInstr(MI); // The instruction is gone now. 3369 3370 return BB; 3371} 3372 3373MachineBasicBlock * 3374ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3375 MachineBasicBlock *BB, 3376 DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { 3377 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3378 DebugLoc dl = MI->getDebugLoc(); 3379 bool isThumb2 = Subtarget->isThumb2(); 3380 switch (MI->getOpcode()) { 3381 default: 3382 MI->dump(); 3383 llvm_unreachable("Unexpected instr type to insert"); 3384 3385 case ARM::ATOMIC_LOAD_ADD_I8: 3386 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3387 case ARM::ATOMIC_LOAD_ADD_I16: 3388 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3389 case ARM::ATOMIC_LOAD_ADD_I32: 3390 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3391 3392 case ARM::ATOMIC_LOAD_AND_I8: 3393 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3394 case ARM::ATOMIC_LOAD_AND_I16: 3395 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3396 case ARM::ATOMIC_LOAD_AND_I32: 3397 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3398 3399 case ARM::ATOMIC_LOAD_OR_I8: 3400 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3401 case ARM::ATOMIC_LOAD_OR_I16: 3402 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3403 case ARM::ATOMIC_LOAD_OR_I32: 3404 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3405 3406 case ARM::ATOMIC_LOAD_XOR_I8: 3407 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3408 case ARM::ATOMIC_LOAD_XOR_I16: 3409 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3410 case ARM::ATOMIC_LOAD_XOR_I32: 3411 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3412 3413 case ARM::ATOMIC_LOAD_NAND_I8: 3414 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3415 case ARM::ATOMIC_LOAD_NAND_I16: 3416 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3417 case ARM::ATOMIC_LOAD_NAND_I32: 3418 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3419 3420 case ARM::ATOMIC_LOAD_SUB_I8: 3421 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3422 case ARM::ATOMIC_LOAD_SUB_I16: 3423 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3424 case ARM::ATOMIC_LOAD_SUB_I32: 3425 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3426 3427 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 3428 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 3429 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 3430 3431 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 3432 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 3433 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 3434 3435 case ARM::tMOVCCr_pseudo: { 3436 // To "insert" a SELECT_CC instruction, we actually have to insert the 3437 // diamond control-flow pattern. The incoming instruction knows the 3438 // destination vreg to set, the condition code register to branch on, the 3439 // true/false values to select between, and a branch opcode to use. 3440 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3441 MachineFunction::iterator It = BB; 3442 ++It; 3443 3444 // thisMBB: 3445 // ... 3446 // TrueVal = ... 3447 // cmpTY ccX, r1, r2 3448 // bCC copy1MBB 3449 // fallthrough --> copy0MBB 3450 MachineBasicBlock *thisMBB = BB; 3451 MachineFunction *F = BB->getParent(); 3452 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 3453 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 3454 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 3455 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 3456 F->insert(It, copy0MBB); 3457 F->insert(It, sinkMBB); 3458 // Update machine-CFG edges by first adding all successors of the current 3459 // block to the new block which will contain the Phi node for the select. 3460 // Also inform sdisel of the edge changes. 3461 for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 3462 E = BB->succ_end(); I != E; ++I) { 3463 EM->insert(std::make_pair(*I, sinkMBB)); 3464 sinkMBB->addSuccessor(*I); 3465 } 3466 // Next, remove all successors of the current block, and add the true 3467 // and fallthrough blocks as its successors. 3468 while (!BB->succ_empty()) 3469 BB->removeSuccessor(BB->succ_begin()); 3470 BB->addSuccessor(copy0MBB); 3471 BB->addSuccessor(sinkMBB); 3472 3473 // copy0MBB: 3474 // %FalseValue = ... 3475 // # fallthrough to sinkMBB 3476 BB = copy0MBB; 3477 3478 // Update machine-CFG edges 3479 BB->addSuccessor(sinkMBB); 3480 3481 // sinkMBB: 3482 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3483 // ... 3484 BB = sinkMBB; 3485 BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) 3486 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 3487 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3488 3489 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 3490 return BB; 3491 } 3492 3493 case ARM::tANDsp: 3494 case ARM::tADDspr_: 3495 case ARM::tSUBspi_: 3496 case ARM::t2SUBrSPi_: 3497 case ARM::t2SUBrSPi12_: 3498 case ARM::t2SUBrSPs_: { 3499 MachineFunction *MF = BB->getParent(); 3500 unsigned DstReg = MI->getOperand(0).getReg(); 3501 unsigned SrcReg = MI->getOperand(1).getReg(); 3502 bool DstIsDead = MI->getOperand(0).isDead(); 3503 bool SrcIsKill = MI->getOperand(1).isKill(); 3504 3505 if (SrcReg != ARM::SP) { 3506 // Copy the source to SP from virtual register. 3507 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); 3508 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 3509 ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; 3510 BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) 3511 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3512 } 3513 3514 unsigned OpOpc = 0; 3515 bool NeedPred = false, NeedCC = false, NeedOp3 = false; 3516 switch (MI->getOpcode()) { 3517 default: 3518 llvm_unreachable("Unexpected pseudo instruction!"); 3519 case ARM::tANDsp: 3520 OpOpc = ARM::tAND; 3521 NeedPred = true; 3522 break; 3523 case ARM::tADDspr_: 3524 OpOpc = ARM::tADDspr; 3525 break; 3526 case ARM::tSUBspi_: 3527 OpOpc = ARM::tSUBspi; 3528 break; 3529 case ARM::t2SUBrSPi_: 3530 OpOpc = ARM::t2SUBrSPi; 3531 NeedPred = true; NeedCC = true; 3532 break; 3533 case ARM::t2SUBrSPi12_: 3534 OpOpc = ARM::t2SUBrSPi12; 3535 NeedPred = true; 3536 break; 3537 case ARM::t2SUBrSPs_: 3538 OpOpc = ARM::t2SUBrSPs; 3539 NeedPred = true; NeedCC = true; NeedOp3 = true; 3540 break; 3541 } 3542 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); 3543 if (OpOpc == ARM::tAND) 3544 AddDefaultT1CC(MIB); 3545 MIB.addReg(ARM::SP); 3546 MIB.addOperand(MI->getOperand(2)); 3547 if (NeedOp3) 3548 MIB.addOperand(MI->getOperand(3)); 3549 if (NeedPred) 3550 AddDefaultPred(MIB); 3551 if (NeedCC) 3552 AddDefaultCC(MIB); 3553 3554 // Copy the result from SP to virtual register. 3555 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); 3556 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 3557 ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; 3558 BuildMI(BB, dl, TII->get(CopyOpc)) 3559 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) 3560 .addReg(ARM::SP); 3561 MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 3562 return BB; 3563 } 3564 } 3565} 3566 3567//===----------------------------------------------------------------------===// 3568// ARM Optimization Hooks 3569//===----------------------------------------------------------------------===// 3570 3571static 3572SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 3573 TargetLowering::DAGCombinerInfo &DCI) { 3574 SelectionDAG &DAG = DCI.DAG; 3575 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3576 EVT VT = N->getValueType(0); 3577 unsigned Opc = N->getOpcode(); 3578 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 3579 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 3580 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 3581 ISD::CondCode CC = ISD::SETCC_INVALID; 3582 3583 if (isSlctCC) { 3584 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 3585 } else { 3586 SDValue CCOp = Slct.getOperand(0); 3587 if (CCOp.getOpcode() == ISD::SETCC) 3588 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 3589 } 3590 3591 bool DoXform = false; 3592 bool InvCC = false; 3593 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 3594 "Bad input!"); 3595 3596 if (LHS.getOpcode() == ISD::Constant && 3597 cast<ConstantSDNode>(LHS)->isNullValue()) { 3598 DoXform = true; 3599 } else if (CC != ISD::SETCC_INVALID && 3600 RHS.getOpcode() == ISD::Constant && 3601 cast<ConstantSDNode>(RHS)->isNullValue()) { 3602 std::swap(LHS, RHS); 3603 SDValue Op0 = Slct.getOperand(0); 3604 EVT OpVT = isSlctCC ? Op0.getValueType() : 3605 Op0.getOperand(0).getValueType(); 3606 bool isInt = OpVT.isInteger(); 3607 CC = ISD::getSetCCInverse(CC, isInt); 3608 3609 if (!TLI.isCondCodeLegal(CC, OpVT)) 3610 return SDValue(); // Inverse operator isn't legal. 3611 3612 DoXform = true; 3613 InvCC = true; 3614 } 3615 3616 if (DoXform) { 3617 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 3618 if (isSlctCC) 3619 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 3620 Slct.getOperand(0), Slct.getOperand(1), CC); 3621 SDValue CCOp = Slct.getOperand(0); 3622 if (InvCC) 3623 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 3624 CCOp.getOperand(0), CCOp.getOperand(1), CC); 3625 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 3626 CCOp, OtherOp, Result); 3627 } 3628 return SDValue(); 3629} 3630 3631/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3632static SDValue PerformADDCombine(SDNode *N, 3633 TargetLowering::DAGCombinerInfo &DCI) { 3634 // added by evan in r37685 with no testcase. 3635 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3636 3637 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 3638 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 3639 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 3640 if (Result.getNode()) return Result; 3641 } 3642 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 3643 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 3644 if (Result.getNode()) return Result; 3645 } 3646 3647 return SDValue(); 3648} 3649 3650/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 3651static SDValue PerformSUBCombine(SDNode *N, 3652 TargetLowering::DAGCombinerInfo &DCI) { 3653 // added by evan in r37685 with no testcase. 3654 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3655 3656 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 3657 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 3658 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 3659 if (Result.getNode()) return Result; 3660 } 3661 3662 return SDValue(); 3663} 3664 3665/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 3666/// ARMISD::VMOVRRD. 3667static SDValue PerformVMOVRRDCombine(SDNode *N, 3668 TargetLowering::DAGCombinerInfo &DCI) { 3669 // fmrrd(fmdrr x, y) -> x,y 3670 SDValue InDouble = N->getOperand(0); 3671 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 3672 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 3673 return SDValue(); 3674} 3675 3676/// getVShiftImm - Check if this is a valid build_vector for the immediate 3677/// operand of a vector shift operation, where all the elements of the 3678/// build_vector must have the same constant integer value. 3679static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 3680 // Ignore bit_converts. 3681 while (Op.getOpcode() == ISD::BIT_CONVERT) 3682 Op = Op.getOperand(0); 3683 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 3684 APInt SplatBits, SplatUndef; 3685 unsigned SplatBitSize; 3686 bool HasAnyUndefs; 3687 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 3688 HasAnyUndefs, ElementBits) || 3689 SplatBitSize > ElementBits) 3690 return false; 3691 Cnt = SplatBits.getSExtValue(); 3692 return true; 3693} 3694 3695/// isVShiftLImm - Check if this is a valid build_vector for the immediate 3696/// operand of a vector shift left operation. That value must be in the range: 3697/// 0 <= Value < ElementBits for a left shift; or 3698/// 0 <= Value <= ElementBits for a long left shift. 3699static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 3700 assert(VT.isVector() && "vector shift count is not a vector type"); 3701 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 3702 if (! getVShiftImm(Op, ElementBits, Cnt)) 3703 return false; 3704 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 3705} 3706 3707/// isVShiftRImm - Check if this is a valid build_vector for the immediate 3708/// operand of a vector shift right operation. For a shift opcode, the value 3709/// is positive, but for an intrinsic the value count must be negative. The 3710/// absolute value must be in the range: 3711/// 1 <= |Value| <= ElementBits for a right shift; or 3712/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 3713static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 3714 int64_t &Cnt) { 3715 assert(VT.isVector() && "vector shift count is not a vector type"); 3716 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 3717 if (! getVShiftImm(Op, ElementBits, Cnt)) 3718 return false; 3719 if (isIntrinsic) 3720 Cnt = -Cnt; 3721 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 3722} 3723 3724/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 3725static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 3726 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3727 switch (IntNo) { 3728 default: 3729 // Don't do anything for most intrinsics. 3730 break; 3731 3732 // Vector shifts: check for immediate versions and lower them. 3733 // Note: This is done during DAG combining instead of DAG legalizing because 3734 // the build_vectors for 64-bit vector element shift counts are generally 3735 // not legal, and it is hard to see their values after they get legalized to 3736 // loads from a constant pool. 3737 case Intrinsic::arm_neon_vshifts: 3738 case Intrinsic::arm_neon_vshiftu: 3739 case Intrinsic::arm_neon_vshiftls: 3740 case Intrinsic::arm_neon_vshiftlu: 3741 case Intrinsic::arm_neon_vshiftn: 3742 case Intrinsic::arm_neon_vrshifts: 3743 case Intrinsic::arm_neon_vrshiftu: 3744 case Intrinsic::arm_neon_vrshiftn: 3745 case Intrinsic::arm_neon_vqshifts: 3746 case Intrinsic::arm_neon_vqshiftu: 3747 case Intrinsic::arm_neon_vqshiftsu: 3748 case Intrinsic::arm_neon_vqshiftns: 3749 case Intrinsic::arm_neon_vqshiftnu: 3750 case Intrinsic::arm_neon_vqshiftnsu: 3751 case Intrinsic::arm_neon_vqrshiftns: 3752 case Intrinsic::arm_neon_vqrshiftnu: 3753 case Intrinsic::arm_neon_vqrshiftnsu: { 3754 EVT VT = N->getOperand(1).getValueType(); 3755 int64_t Cnt; 3756 unsigned VShiftOpc = 0; 3757 3758 switch (IntNo) { 3759 case Intrinsic::arm_neon_vshifts: 3760 case Intrinsic::arm_neon_vshiftu: 3761 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 3762 VShiftOpc = ARMISD::VSHL; 3763 break; 3764 } 3765 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 3766 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 3767 ARMISD::VSHRs : ARMISD::VSHRu); 3768 break; 3769 } 3770 return SDValue(); 3771 3772 case Intrinsic::arm_neon_vshiftls: 3773 case Intrinsic::arm_neon_vshiftlu: 3774 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 3775 break; 3776 llvm_unreachable("invalid shift count for vshll intrinsic"); 3777 3778 case Intrinsic::arm_neon_vrshifts: 3779 case Intrinsic::arm_neon_vrshiftu: 3780 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 3781 break; 3782 return SDValue(); 3783 3784 case Intrinsic::arm_neon_vqshifts: 3785 case Intrinsic::arm_neon_vqshiftu: 3786 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 3787 break; 3788 return SDValue(); 3789 3790 case Intrinsic::arm_neon_vqshiftsu: 3791 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 3792 break; 3793 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 3794 3795 case Intrinsic::arm_neon_vshiftn: 3796 case Intrinsic::arm_neon_vrshiftn: 3797 case Intrinsic::arm_neon_vqshiftns: 3798 case Intrinsic::arm_neon_vqshiftnu: 3799 case Intrinsic::arm_neon_vqshiftnsu: 3800 case Intrinsic::arm_neon_vqrshiftns: 3801 case Intrinsic::arm_neon_vqrshiftnu: 3802 case Intrinsic::arm_neon_vqrshiftnsu: 3803 // Narrowing shifts require an immediate right shift. 3804 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 3805 break; 3806 llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); 3807 3808 default: 3809 llvm_unreachable("unhandled vector shift"); 3810 } 3811 3812 switch (IntNo) { 3813 case Intrinsic::arm_neon_vshifts: 3814 case Intrinsic::arm_neon_vshiftu: 3815 // Opcode already set above. 3816 break; 3817 case Intrinsic::arm_neon_vshiftls: 3818 case Intrinsic::arm_neon_vshiftlu: 3819 if (Cnt == VT.getVectorElementType().getSizeInBits()) 3820 VShiftOpc = ARMISD::VSHLLi; 3821 else 3822 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 3823 ARMISD::VSHLLs : ARMISD::VSHLLu); 3824 break; 3825 case Intrinsic::arm_neon_vshiftn: 3826 VShiftOpc = ARMISD::VSHRN; break; 3827 case Intrinsic::arm_neon_vrshifts: 3828 VShiftOpc = ARMISD::VRSHRs; break; 3829 case Intrinsic::arm_neon_vrshiftu: 3830 VShiftOpc = ARMISD::VRSHRu; break; 3831 case Intrinsic::arm_neon_vrshiftn: 3832 VShiftOpc = ARMISD::VRSHRN; break; 3833 case Intrinsic::arm_neon_vqshifts: 3834 VShiftOpc = ARMISD::VQSHLs; break; 3835 case Intrinsic::arm_neon_vqshiftu: 3836 VShiftOpc = ARMISD::VQSHLu; break; 3837 case Intrinsic::arm_neon_vqshiftsu: 3838 VShiftOpc = ARMISD::VQSHLsu; break; 3839 case Intrinsic::arm_neon_vqshiftns: 3840 VShiftOpc = ARMISD::VQSHRNs; break; 3841 case Intrinsic::arm_neon_vqshiftnu: 3842 VShiftOpc = ARMISD::VQSHRNu; break; 3843 case Intrinsic::arm_neon_vqshiftnsu: 3844 VShiftOpc = ARMISD::VQSHRNsu; break; 3845 case Intrinsic::arm_neon_vqrshiftns: 3846 VShiftOpc = ARMISD::VQRSHRNs; break; 3847 case Intrinsic::arm_neon_vqrshiftnu: 3848 VShiftOpc = ARMISD::VQRSHRNu; break; 3849 case Intrinsic::arm_neon_vqrshiftnsu: 3850 VShiftOpc = ARMISD::VQRSHRNsu; break; 3851 } 3852 3853 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 3854 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 3855 } 3856 3857 case Intrinsic::arm_neon_vshiftins: { 3858 EVT VT = N->getOperand(1).getValueType(); 3859 int64_t Cnt; 3860 unsigned VShiftOpc = 0; 3861 3862 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 3863 VShiftOpc = ARMISD::VSLI; 3864 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 3865 VShiftOpc = ARMISD::VSRI; 3866 else { 3867 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 3868 } 3869 3870 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 3871 N->getOperand(1), N->getOperand(2), 3872 DAG.getConstant(Cnt, MVT::i32)); 3873 } 3874 3875 case Intrinsic::arm_neon_vqrshifts: 3876 case Intrinsic::arm_neon_vqrshiftu: 3877 // No immediate versions of these to check for. 3878 break; 3879 } 3880 3881 return SDValue(); 3882} 3883 3884/// PerformShiftCombine - Checks for immediate versions of vector shifts and 3885/// lowers them. As with the vector shift intrinsics, this is done during DAG 3886/// combining instead of DAG legalizing because the build_vectors for 64-bit 3887/// vector element shift counts are generally not legal, and it is hard to see 3888/// their values after they get legalized to loads from a constant pool. 3889static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 3890 const ARMSubtarget *ST) { 3891 EVT VT = N->getValueType(0); 3892 3893 // Nothing to be done for scalar shifts. 3894 if (! VT.isVector()) 3895 return SDValue(); 3896 3897 assert(ST->hasNEON() && "unexpected vector shift"); 3898 int64_t Cnt; 3899 3900 switch (N->getOpcode()) { 3901 default: llvm_unreachable("unexpected shift opcode"); 3902 3903 case ISD::SHL: 3904 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 3905 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 3906 DAG.getConstant(Cnt, MVT::i32)); 3907 break; 3908 3909 case ISD::SRA: 3910 case ISD::SRL: 3911 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 3912 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 3913 ARMISD::VSHRs : ARMISD::VSHRu); 3914 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 3915 DAG.getConstant(Cnt, MVT::i32)); 3916 } 3917 } 3918 return SDValue(); 3919} 3920 3921/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 3922/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 3923static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 3924 const ARMSubtarget *ST) { 3925 SDValue N0 = N->getOperand(0); 3926 3927 // Check for sign- and zero-extensions of vector extract operations of 8- 3928 // and 16-bit vector elements. NEON supports these directly. They are 3929 // handled during DAG combining because type legalization will promote them 3930 // to 32-bit types and it is messy to recognize the operations after that. 3931 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 3932 SDValue Vec = N0.getOperand(0); 3933 SDValue Lane = N0.getOperand(1); 3934 EVT VT = N->getValueType(0); 3935 EVT EltVT = N0.getValueType(); 3936 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3937 3938 if (VT == MVT::i32 && 3939 (EltVT == MVT::i8 || EltVT == MVT::i16) && 3940 TLI.isTypeLegal(Vec.getValueType())) { 3941 3942 unsigned Opc = 0; 3943 switch (N->getOpcode()) { 3944 default: llvm_unreachable("unexpected opcode"); 3945 case ISD::SIGN_EXTEND: 3946 Opc = ARMISD::VGETLANEs; 3947 break; 3948 case ISD::ZERO_EXTEND: 3949 case ISD::ANY_EXTEND: 3950 Opc = ARMISD::VGETLANEu; 3951 break; 3952 } 3953 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 3954 } 3955 } 3956 3957 return SDValue(); 3958} 3959 3960/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 3961/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 3962static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 3963 const ARMSubtarget *ST) { 3964 // If the target supports NEON, try to use vmax/vmin instructions for f32 3965 // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, 3966 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 3967 // a NaN; only do the transformation when it matches that behavior. 3968 3969 // For now only do this when using NEON for FP operations; if using VFP, it 3970 // is not obvious that the benefit outweighs the cost of switching to the 3971 // NEON pipeline. 3972 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 3973 N->getValueType(0) != MVT::f32) 3974 return SDValue(); 3975 3976 SDValue CondLHS = N->getOperand(0); 3977 SDValue CondRHS = N->getOperand(1); 3978 SDValue LHS = N->getOperand(2); 3979 SDValue RHS = N->getOperand(3); 3980 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 3981 3982 unsigned Opcode = 0; 3983 bool IsReversed; 3984 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 3985 IsReversed = false; // x CC y ? x : y 3986 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 3987 IsReversed = true ; // x CC y ? y : x 3988 } else { 3989 return SDValue(); 3990 } 3991 3992 bool IsUnordered; 3993 switch (CC) { 3994 default: break; 3995 case ISD::SETOLT: 3996 case ISD::SETOLE: 3997 case ISD::SETLT: 3998 case ISD::SETLE: 3999 case ISD::SETULT: 4000 case ISD::SETULE: 4001 // If LHS is NaN, an ordered comparison will be false and the result will 4002 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 4003 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4004 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 4005 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4006 break; 4007 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 4008 // will return -0, so vmin can only be used for unsafe math or if one of 4009 // the operands is known to be nonzero. 4010 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 4011 !UnsafeFPMath && 4012 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4013 break; 4014 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 4015 break; 4016 4017 case ISD::SETOGT: 4018 case ISD::SETOGE: 4019 case ISD::SETGT: 4020 case ISD::SETGE: 4021 case ISD::SETUGT: 4022 case ISD::SETUGE: 4023 // If LHS is NaN, an ordered comparison will be false and the result will 4024 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 4025 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4026 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 4027 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4028 break; 4029 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 4030 // will return +0, so vmax can only be used for unsafe math or if one of 4031 // the operands is known to be nonzero. 4032 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 4033 !UnsafeFPMath && 4034 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4035 break; 4036 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 4037 break; 4038 } 4039 4040 if (!Opcode) 4041 return SDValue(); 4042 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 4043} 4044 4045SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 4046 DAGCombinerInfo &DCI) const { 4047 switch (N->getOpcode()) { 4048 default: break; 4049 case ISD::ADD: return PerformADDCombine(N, DCI); 4050 case ISD::SUB: return PerformSUBCombine(N, DCI); 4051 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 4052 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 4053 case ISD::SHL: 4054 case ISD::SRA: 4055 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 4056 case ISD::SIGN_EXTEND: 4057 case ISD::ZERO_EXTEND: 4058 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 4059 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 4060 } 4061 return SDValue(); 4062} 4063 4064bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 4065 if (!Subtarget->hasV6Ops()) 4066 // Pre-v6 does not support unaligned mem access. 4067 return false; 4068 else { 4069 // v6+ may or may not support unaligned mem access depending on the system 4070 // configuration. 4071 // FIXME: This is pretty conservative. Should we provide cmdline option to 4072 // control the behaviour? 4073 if (!Subtarget->isTargetDarwin()) 4074 return false; 4075 } 4076 4077 switch (VT.getSimpleVT().SimpleTy) { 4078 default: 4079 return false; 4080 case MVT::i8: 4081 case MVT::i16: 4082 case MVT::i32: 4083 return true; 4084 // FIXME: VLD1 etc with standard alignment is legal. 4085 } 4086} 4087 4088static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 4089 if (V < 0) 4090 return false; 4091 4092 unsigned Scale = 1; 4093 switch (VT.getSimpleVT().SimpleTy) { 4094 default: return false; 4095 case MVT::i1: 4096 case MVT::i8: 4097 // Scale == 1; 4098 break; 4099 case MVT::i16: 4100 // Scale == 2; 4101 Scale = 2; 4102 break; 4103 case MVT::i32: 4104 // Scale == 4; 4105 Scale = 4; 4106 break; 4107 } 4108 4109 if ((V & (Scale - 1)) != 0) 4110 return false; 4111 V /= Scale; 4112 return V == (V & ((1LL << 5) - 1)); 4113} 4114 4115static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 4116 const ARMSubtarget *Subtarget) { 4117 bool isNeg = false; 4118 if (V < 0) { 4119 isNeg = true; 4120 V = - V; 4121 } 4122 4123 switch (VT.getSimpleVT().SimpleTy) { 4124 default: return false; 4125 case MVT::i1: 4126 case MVT::i8: 4127 case MVT::i16: 4128 case MVT::i32: 4129 // + imm12 or - imm8 4130 if (isNeg) 4131 return V == (V & ((1LL << 8) - 1)); 4132 return V == (V & ((1LL << 12) - 1)); 4133 case MVT::f32: 4134 case MVT::f64: 4135 // Same as ARM mode. FIXME: NEON? 4136 if (!Subtarget->hasVFP2()) 4137 return false; 4138 if ((V & 3) != 0) 4139 return false; 4140 V >>= 2; 4141 return V == (V & ((1LL << 8) - 1)); 4142 } 4143} 4144 4145/// isLegalAddressImmediate - Return true if the integer value can be used 4146/// as the offset of the target addressing mode for load / store of the 4147/// given type. 4148static bool isLegalAddressImmediate(int64_t V, EVT VT, 4149 const ARMSubtarget *Subtarget) { 4150 if (V == 0) 4151 return true; 4152 4153 if (!VT.isSimple()) 4154 return false; 4155 4156 if (Subtarget->isThumb1Only()) 4157 return isLegalT1AddressImmediate(V, VT); 4158 else if (Subtarget->isThumb2()) 4159 return isLegalT2AddressImmediate(V, VT, Subtarget); 4160 4161 // ARM mode. 4162 if (V < 0) 4163 V = - V; 4164 switch (VT.getSimpleVT().SimpleTy) { 4165 default: return false; 4166 case MVT::i1: 4167 case MVT::i8: 4168 case MVT::i32: 4169 // +- imm12 4170 return V == (V & ((1LL << 12) - 1)); 4171 case MVT::i16: 4172 // +- imm8 4173 return V == (V & ((1LL << 8) - 1)); 4174 case MVT::f32: 4175 case MVT::f64: 4176 if (!Subtarget->hasVFP2()) // FIXME: NEON? 4177 return false; 4178 if ((V & 3) != 0) 4179 return false; 4180 V >>= 2; 4181 return V == (V & ((1LL << 8) - 1)); 4182 } 4183} 4184 4185bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 4186 EVT VT) const { 4187 int Scale = AM.Scale; 4188 if (Scale < 0) 4189 return false; 4190 4191 switch (VT.getSimpleVT().SimpleTy) { 4192 default: return false; 4193 case MVT::i1: 4194 case MVT::i8: 4195 case MVT::i16: 4196 case MVT::i32: 4197 if (Scale == 1) 4198 return true; 4199 // r + r << imm 4200 Scale = Scale & ~1; 4201 return Scale == 2 || Scale == 4 || Scale == 8; 4202 case MVT::i64: 4203 // r + r 4204 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 4205 return true; 4206 return false; 4207 case MVT::isVoid: 4208 // Note, we allow "void" uses (basically, uses that aren't loads or 4209 // stores), because arm allows folding a scale into many arithmetic 4210 // operations. This should be made more precise and revisited later. 4211 4212 // Allow r << imm, but the imm has to be a multiple of two. 4213 if (Scale & 1) return false; 4214 return isPowerOf2_32(Scale); 4215 } 4216} 4217 4218/// isLegalAddressingMode - Return true if the addressing mode represented 4219/// by AM is legal for this target, for a load/store of the specified type. 4220bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 4221 const Type *Ty) const { 4222 EVT VT = getValueType(Ty, true); 4223 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 4224 return false; 4225 4226 // Can never fold addr of global into load/store. 4227 if (AM.BaseGV) 4228 return false; 4229 4230 switch (AM.Scale) { 4231 case 0: // no scale reg, must be "r+i" or "r", or "i". 4232 break; 4233 case 1: 4234 if (Subtarget->isThumb1Only()) 4235 return false; 4236 // FALL THROUGH. 4237 default: 4238 // ARM doesn't support any R+R*scale+imm addr modes. 4239 if (AM.BaseOffs) 4240 return false; 4241 4242 if (!VT.isSimple()) 4243 return false; 4244 4245 if (Subtarget->isThumb2()) 4246 return isLegalT2ScaledAddressingMode(AM, VT); 4247 4248 int Scale = AM.Scale; 4249 switch (VT.getSimpleVT().SimpleTy) { 4250 default: return false; 4251 case MVT::i1: 4252 case MVT::i8: 4253 case MVT::i32: 4254 if (Scale < 0) Scale = -Scale; 4255 if (Scale == 1) 4256 return true; 4257 // r + r << imm 4258 return isPowerOf2_32(Scale & ~1); 4259 case MVT::i16: 4260 case MVT::i64: 4261 // r + r 4262 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 4263 return true; 4264 return false; 4265 4266 case MVT::isVoid: 4267 // Note, we allow "void" uses (basically, uses that aren't loads or 4268 // stores), because arm allows folding a scale into many arithmetic 4269 // operations. This should be made more precise and revisited later. 4270 4271 // Allow r << imm, but the imm has to be a multiple of two. 4272 if (Scale & 1) return false; 4273 return isPowerOf2_32(Scale); 4274 } 4275 break; 4276 } 4277 return true; 4278} 4279 4280/// isLegalICmpImmediate - Return true if the specified immediate is legal 4281/// icmp immediate, that is the target has icmp instructions which can compare 4282/// a register against the immediate without having to materialize the 4283/// immediate into a register. 4284bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 4285 if (!Subtarget->isThumb()) 4286 return ARM_AM::getSOImmVal(Imm) != -1; 4287 if (Subtarget->isThumb2()) 4288 return ARM_AM::getT2SOImmVal(Imm) != -1; 4289 return Imm >= 0 && Imm <= 255; 4290} 4291 4292static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 4293 bool isSEXTLoad, SDValue &Base, 4294 SDValue &Offset, bool &isInc, 4295 SelectionDAG &DAG) { 4296 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 4297 return false; 4298 4299 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 4300 // AddressingMode 3 4301 Base = Ptr->getOperand(0); 4302 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 4303 int RHSC = (int)RHS->getZExtValue(); 4304 if (RHSC < 0 && RHSC > -256) { 4305 assert(Ptr->getOpcode() == ISD::ADD); 4306 isInc = false; 4307 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 4308 return true; 4309 } 4310 } 4311 isInc = (Ptr->getOpcode() == ISD::ADD); 4312 Offset = Ptr->getOperand(1); 4313 return true; 4314 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 4315 // AddressingMode 2 4316 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 4317 int RHSC = (int)RHS->getZExtValue(); 4318 if (RHSC < 0 && RHSC > -0x1000) { 4319 assert(Ptr->getOpcode() == ISD::ADD); 4320 isInc = false; 4321 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 4322 Base = Ptr->getOperand(0); 4323 return true; 4324 } 4325 } 4326 4327 if (Ptr->getOpcode() == ISD::ADD) { 4328 isInc = true; 4329 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 4330 if (ShOpcVal != ARM_AM::no_shift) { 4331 Base = Ptr->getOperand(1); 4332 Offset = Ptr->getOperand(0); 4333 } else { 4334 Base = Ptr->getOperand(0); 4335 Offset = Ptr->getOperand(1); 4336 } 4337 return true; 4338 } 4339 4340 isInc = (Ptr->getOpcode() == ISD::ADD); 4341 Base = Ptr->getOperand(0); 4342 Offset = Ptr->getOperand(1); 4343 return true; 4344 } 4345 4346 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 4347 return false; 4348} 4349 4350static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 4351 bool isSEXTLoad, SDValue &Base, 4352 SDValue &Offset, bool &isInc, 4353 SelectionDAG &DAG) { 4354 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 4355 return false; 4356 4357 Base = Ptr->getOperand(0); 4358 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 4359 int RHSC = (int)RHS->getZExtValue(); 4360 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 4361 assert(Ptr->getOpcode() == ISD::ADD); 4362 isInc = false; 4363 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 4364 return true; 4365 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 4366 isInc = Ptr->getOpcode() == ISD::ADD; 4367 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 4368 return true; 4369 } 4370 } 4371 4372 return false; 4373} 4374 4375/// getPreIndexedAddressParts - returns true by value, base pointer and 4376/// offset pointer and addressing mode by reference if the node's address 4377/// can be legally represented as pre-indexed load / store address. 4378bool 4379ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 4380 SDValue &Offset, 4381 ISD::MemIndexedMode &AM, 4382 SelectionDAG &DAG) const { 4383 if (Subtarget->isThumb1Only()) 4384 return false; 4385 4386 EVT VT; 4387 SDValue Ptr; 4388 bool isSEXTLoad = false; 4389 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 4390 Ptr = LD->getBasePtr(); 4391 VT = LD->getMemoryVT(); 4392 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 4393 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 4394 Ptr = ST->getBasePtr(); 4395 VT = ST->getMemoryVT(); 4396 } else 4397 return false; 4398 4399 bool isInc; 4400 bool isLegal = false; 4401 if (Subtarget->isThumb2()) 4402 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 4403 Offset, isInc, DAG); 4404 else 4405 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 4406 Offset, isInc, DAG); 4407 if (!isLegal) 4408 return false; 4409 4410 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 4411 return true; 4412} 4413 4414/// getPostIndexedAddressParts - returns true by value, base pointer and 4415/// offset pointer and addressing mode by reference if this node can be 4416/// combined with a load / store to form a post-indexed load / store. 4417bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 4418 SDValue &Base, 4419 SDValue &Offset, 4420 ISD::MemIndexedMode &AM, 4421 SelectionDAG &DAG) const { 4422 if (Subtarget->isThumb1Only()) 4423 return false; 4424 4425 EVT VT; 4426 SDValue Ptr; 4427 bool isSEXTLoad = false; 4428 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 4429 VT = LD->getMemoryVT(); 4430 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 4431 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 4432 VT = ST->getMemoryVT(); 4433 } else 4434 return false; 4435 4436 bool isInc; 4437 bool isLegal = false; 4438 if (Subtarget->isThumb2()) 4439 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 4440 isInc, DAG); 4441 else 4442 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 4443 isInc, DAG); 4444 if (!isLegal) 4445 return false; 4446 4447 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 4448 return true; 4449} 4450 4451void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 4452 const APInt &Mask, 4453 APInt &KnownZero, 4454 APInt &KnownOne, 4455 const SelectionDAG &DAG, 4456 unsigned Depth) const { 4457 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 4458 switch (Op.getOpcode()) { 4459 default: break; 4460 case ARMISD::CMOV: { 4461 // Bits are known zero/one if known on the LHS and RHS. 4462 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 4463 if (KnownZero == 0 && KnownOne == 0) return; 4464 4465 APInt KnownZeroRHS, KnownOneRHS; 4466 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 4467 KnownZeroRHS, KnownOneRHS, Depth+1); 4468 KnownZero &= KnownZeroRHS; 4469 KnownOne &= KnownOneRHS; 4470 return; 4471 } 4472 } 4473} 4474 4475//===----------------------------------------------------------------------===// 4476// ARM Inline Assembly Support 4477//===----------------------------------------------------------------------===// 4478 4479/// getConstraintType - Given a constraint letter, return the type of 4480/// constraint it is for this target. 4481ARMTargetLowering::ConstraintType 4482ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 4483 if (Constraint.size() == 1) { 4484 switch (Constraint[0]) { 4485 default: break; 4486 case 'l': return C_RegisterClass; 4487 case 'w': return C_RegisterClass; 4488 } 4489 } 4490 return TargetLowering::getConstraintType(Constraint); 4491} 4492 4493std::pair<unsigned, const TargetRegisterClass*> 4494ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4495 EVT VT) const { 4496 if (Constraint.size() == 1) { 4497 // GCC ARM Constraint Letters 4498 switch (Constraint[0]) { 4499 case 'l': 4500 if (Subtarget->isThumb()) 4501 return std::make_pair(0U, ARM::tGPRRegisterClass); 4502 else 4503 return std::make_pair(0U, ARM::GPRRegisterClass); 4504 case 'r': 4505 return std::make_pair(0U, ARM::GPRRegisterClass); 4506 case 'w': 4507 if (VT == MVT::f32) 4508 return std::make_pair(0U, ARM::SPRRegisterClass); 4509 if (VT.getSizeInBits() == 64) 4510 return std::make_pair(0U, ARM::DPRRegisterClass); 4511 if (VT.getSizeInBits() == 128) 4512 return std::make_pair(0U, ARM::QPRRegisterClass); 4513 break; 4514 } 4515 } 4516 if (StringRef("{cc}").equals_lower(Constraint)) 4517 return std::make_pair(0U, ARM::CCRRegisterClass); 4518 4519 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4520} 4521 4522std::vector<unsigned> ARMTargetLowering:: 4523getRegClassForInlineAsmConstraint(const std::string &Constraint, 4524 EVT VT) const { 4525 if (Constraint.size() != 1) 4526 return std::vector<unsigned>(); 4527 4528 switch (Constraint[0]) { // GCC ARM Constraint Letters 4529 default: break; 4530 case 'l': 4531 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 4532 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 4533 0); 4534 case 'r': 4535 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 4536 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 4537 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 4538 ARM::R12, ARM::LR, 0); 4539 case 'w': 4540 if (VT == MVT::f32) 4541 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 4542 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 4543 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 4544 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 4545 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 4546 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 4547 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 4548 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 4549 if (VT.getSizeInBits() == 64) 4550 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 4551 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 4552 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 4553 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 4554 if (VT.getSizeInBits() == 128) 4555 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 4556 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 4557 break; 4558 } 4559 4560 return std::vector<unsigned>(); 4561} 4562 4563/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 4564/// vector. If it is invalid, don't add anything to Ops. 4565void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 4566 char Constraint, 4567 bool hasMemory, 4568 std::vector<SDValue>&Ops, 4569 SelectionDAG &DAG) const { 4570 SDValue Result(0, 0); 4571 4572 switch (Constraint) { 4573 default: break; 4574 case 'I': case 'J': case 'K': case 'L': 4575 case 'M': case 'N': case 'O': 4576 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4577 if (!C) 4578 return; 4579 4580 int64_t CVal64 = C->getSExtValue(); 4581 int CVal = (int) CVal64; 4582 // None of these constraints allow values larger than 32 bits. Check 4583 // that the value fits in an int. 4584 if (CVal != CVal64) 4585 return; 4586 4587 switch (Constraint) { 4588 case 'I': 4589 if (Subtarget->isThumb1Only()) { 4590 // This must be a constant between 0 and 255, for ADD 4591 // immediates. 4592 if (CVal >= 0 && CVal <= 255) 4593 break; 4594 } else if (Subtarget->isThumb2()) { 4595 // A constant that can be used as an immediate value in a 4596 // data-processing instruction. 4597 if (ARM_AM::getT2SOImmVal(CVal) != -1) 4598 break; 4599 } else { 4600 // A constant that can be used as an immediate value in a 4601 // data-processing instruction. 4602 if (ARM_AM::getSOImmVal(CVal) != -1) 4603 break; 4604 } 4605 return; 4606 4607 case 'J': 4608 if (Subtarget->isThumb()) { // FIXME thumb2 4609 // This must be a constant between -255 and -1, for negated ADD 4610 // immediates. This can be used in GCC with an "n" modifier that 4611 // prints the negated value, for use with SUB instructions. It is 4612 // not useful otherwise but is implemented for compatibility. 4613 if (CVal >= -255 && CVal <= -1) 4614 break; 4615 } else { 4616 // This must be a constant between -4095 and 4095. It is not clear 4617 // what this constraint is intended for. Implemented for 4618 // compatibility with GCC. 4619 if (CVal >= -4095 && CVal <= 4095) 4620 break; 4621 } 4622 return; 4623 4624 case 'K': 4625 if (Subtarget->isThumb1Only()) { 4626 // A 32-bit value where only one byte has a nonzero value. Exclude 4627 // zero to match GCC. This constraint is used by GCC internally for 4628 // constants that can be loaded with a move/shift combination. 4629 // It is not useful otherwise but is implemented for compatibility. 4630 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 4631 break; 4632 } else if (Subtarget->isThumb2()) { 4633 // A constant whose bitwise inverse can be used as an immediate 4634 // value in a data-processing instruction. This can be used in GCC 4635 // with a "B" modifier that prints the inverted value, for use with 4636 // BIC and MVN instructions. It is not useful otherwise but is 4637 // implemented for compatibility. 4638 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 4639 break; 4640 } else { 4641 // A constant whose bitwise inverse can be used as an immediate 4642 // value in a data-processing instruction. This can be used in GCC 4643 // with a "B" modifier that prints the inverted value, for use with 4644 // BIC and MVN instructions. It is not useful otherwise but is 4645 // implemented for compatibility. 4646 if (ARM_AM::getSOImmVal(~CVal) != -1) 4647 break; 4648 } 4649 return; 4650 4651 case 'L': 4652 if (Subtarget->isThumb1Only()) { 4653 // This must be a constant between -7 and 7, 4654 // for 3-operand ADD/SUB immediate instructions. 4655 if (CVal >= -7 && CVal < 7) 4656 break; 4657 } else if (Subtarget->isThumb2()) { 4658 // A constant whose negation can be used as an immediate value in a 4659 // data-processing instruction. This can be used in GCC with an "n" 4660 // modifier that prints the negated value, for use with SUB 4661 // instructions. It is not useful otherwise but is implemented for 4662 // compatibility. 4663 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 4664 break; 4665 } else { 4666 // A constant whose negation can be used as an immediate value in a 4667 // data-processing instruction. This can be used in GCC with an "n" 4668 // modifier that prints the negated value, for use with SUB 4669 // instructions. It is not useful otherwise but is implemented for 4670 // compatibility. 4671 if (ARM_AM::getSOImmVal(-CVal) != -1) 4672 break; 4673 } 4674 return; 4675 4676 case 'M': 4677 if (Subtarget->isThumb()) { // FIXME thumb2 4678 // This must be a multiple of 4 between 0 and 1020, for 4679 // ADD sp + immediate. 4680 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 4681 break; 4682 } else { 4683 // A power of two or a constant between 0 and 32. This is used in 4684 // GCC for the shift amount on shifted register operands, but it is 4685 // useful in general for any shift amounts. 4686 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 4687 break; 4688 } 4689 return; 4690 4691 case 'N': 4692 if (Subtarget->isThumb()) { // FIXME thumb2 4693 // This must be a constant between 0 and 31, for shift amounts. 4694 if (CVal >= 0 && CVal <= 31) 4695 break; 4696 } 4697 return; 4698 4699 case 'O': 4700 if (Subtarget->isThumb()) { // FIXME thumb2 4701 // This must be a multiple of 4 between -508 and 508, for 4702 // ADD/SUB sp = sp + immediate. 4703 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 4704 break; 4705 } 4706 return; 4707 } 4708 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 4709 break; 4710 } 4711 4712 if (Result.getNode()) { 4713 Ops.push_back(Result); 4714 return; 4715 } 4716 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 4717 Ops, DAG); 4718} 4719 4720bool 4721ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 4722 // The ARM target isn't yet aware of offsets. 4723 return false; 4724} 4725 4726int ARM::getVFPf32Imm(const APFloat &FPImm) { 4727 APInt Imm = FPImm.bitcastToAPInt(); 4728 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 4729 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 4730 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 4731 4732 // We can handle 4 bits of mantissa. 4733 // mantissa = (16+UInt(e:f:g:h))/16. 4734 if (Mantissa & 0x7ffff) 4735 return -1; 4736 Mantissa >>= 19; 4737 if ((Mantissa & 0xf) != Mantissa) 4738 return -1; 4739 4740 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 4741 if (Exp < -3 || Exp > 4) 4742 return -1; 4743 Exp = ((Exp+3) & 0x7) ^ 4; 4744 4745 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 4746} 4747 4748int ARM::getVFPf64Imm(const APFloat &FPImm) { 4749 APInt Imm = FPImm.bitcastToAPInt(); 4750 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 4751 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 4752 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 4753 4754 // We can handle 4 bits of mantissa. 4755 // mantissa = (16+UInt(e:f:g:h))/16. 4756 if (Mantissa & 0xffffffffffffLL) 4757 return -1; 4758 Mantissa >>= 48; 4759 if ((Mantissa & 0xf) != Mantissa) 4760 return -1; 4761 4762 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 4763 if (Exp < -3 || Exp > 4) 4764 return -1; 4765 Exp = ((Exp+3) & 0x7) ^ 4; 4766 4767 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 4768} 4769 4770/// isFPImmLegal - Returns true if the target can instruction select the 4771/// specified FP immediate natively. If false, the legalizer will 4772/// materialize the FP immediate as a load from a constant pool. 4773bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 4774 if (!Subtarget->hasVFP3()) 4775 return false; 4776 if (VT == MVT::f32) 4777 return ARM::getVFPf32Imm(Imm) != -1; 4778 if (VT == MVT::f64) 4779 return ARM::getVFPf64Imm(Imm) != -1; 4780 return false; 4781} 4782