ARMISelLowering.cpp revision 505ad8bed3321bc4b99af8fba4844efe2fe9e67a
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ARM.h" 16#include "ARMAddressingModes.h" 17#include "ARMConstantPoolValue.h" 18#include "ARMISelLowering.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMPerfectShuffle.h" 21#include "ARMRegisterInfo.h" 22#include "ARMSubtarget.h" 23#include "ARMTargetMachine.h" 24#include "ARMTargetObjectFile.h" 25#include "llvm/CallingConv.h" 26#include "llvm/Constants.h" 27#include "llvm/Function.h" 28#include "llvm/GlobalValue.h" 29#include "llvm/Instruction.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Type.h" 32#include "llvm/CodeGen/CallingConvLower.h" 33#include "llvm/CodeGen/MachineBasicBlock.h" 34#include "llvm/CodeGen/MachineFrameInfo.h" 35#include "llvm/CodeGen/MachineFunction.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/PseudoSourceValue.h" 39#include "llvm/CodeGen/SelectionDAG.h" 40#include "llvm/MC/MCSectionMachO.h" 41#include "llvm/Target/TargetOptions.h" 42#include "llvm/ADT/VectorExtras.h" 43#include "llvm/Support/ErrorHandling.h" 44#include "llvm/Support/MathExtras.h" 45#include "llvm/Support/raw_ostream.h" 46#include <sstream> 47using namespace llvm; 48 49static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 50 CCValAssign::LocInfo &LocInfo, 51 ISD::ArgFlagsTy &ArgFlags, 52 CCState &State); 53static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 54 CCValAssign::LocInfo &LocInfo, 55 ISD::ArgFlagsTy &ArgFlags, 56 CCState &State); 57static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 58 CCValAssign::LocInfo &LocInfo, 59 ISD::ArgFlagsTy &ArgFlags, 60 CCState &State); 61static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 62 CCValAssign::LocInfo &LocInfo, 63 ISD::ArgFlagsTy &ArgFlags, 64 CCState &State); 65 66void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 67 EVT PromotedBitwiseVT) { 68 if (VT != PromotedLdStVT) { 69 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 70 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 71 PromotedLdStVT.getSimpleVT()); 72 73 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 74 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 75 PromotedLdStVT.getSimpleVT()); 76 } 77 78 EVT ElemTy = VT.getVectorElementType(); 79 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 80 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 81 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 82 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 83 if (ElemTy != MVT::i32) { 84 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 85 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 86 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 87 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 88 } 89 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 90 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 91 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); 92 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 93 if (VT.isInteger()) { 94 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 95 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 96 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 97 } 98 99 // Promote all bit-wise operations. 100 if (VT.isInteger() && VT != PromotedBitwiseVT) { 101 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 102 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 103 PromotedBitwiseVT.getSimpleVT()); 104 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 105 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 106 PromotedBitwiseVT.getSimpleVT()); 107 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 108 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 109 PromotedBitwiseVT.getSimpleVT()); 110 } 111 112 // Neon does not support vector divide/remainder operations. 113 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 114 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 115 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 116 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 117 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 118 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 119} 120 121void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 122 addRegisterClass(VT, ARM::DPRRegisterClass); 123 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 124} 125 126void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 127 addRegisterClass(VT, ARM::QPRRegisterClass); 128 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 129} 130 131static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 132 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 133 return new TargetLoweringObjectFileMachO(); 134 135 return new ARMElfTargetObjectFile(); 136} 137 138ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 139 : TargetLowering(TM, createTLOF(TM)) { 140 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 141 142 if (Subtarget->isTargetDarwin()) { 143 // Uses VFP for Thumb libfuncs if available. 144 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 145 // Single-precision floating-point arithmetic. 146 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 147 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 148 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 149 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 150 151 // Double-precision floating-point arithmetic. 152 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 153 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 154 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 155 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 156 157 // Single-precision comparisons. 158 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 159 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 160 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 161 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 162 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 163 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 164 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 165 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 166 167 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 168 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 169 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 170 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 171 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 172 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 173 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 174 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 175 176 // Double-precision comparisons. 177 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 178 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 179 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 180 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 181 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 182 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 183 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 184 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 185 186 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 187 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 188 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 189 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 190 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 191 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 192 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 193 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 194 195 // Floating-point to integer conversions. 196 // i64 conversions are done via library routines even when generating VFP 197 // instructions, so use the same ones. 198 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 199 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 200 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 201 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 202 203 // Conversions between floating types. 204 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 205 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 206 207 // Integer to floating-point conversions. 208 // i64 conversions are done via library routines even when generating VFP 209 // instructions, so use the same ones. 210 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 211 // e.g., __floatunsidf vs. __floatunssidfvfp. 212 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 213 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 214 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 215 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 216 } 217 } 218 219 // These libcalls are not available in 32-bit. 220 setLibcallName(RTLIB::SHL_I128, 0); 221 setLibcallName(RTLIB::SRL_I128, 0); 222 setLibcallName(RTLIB::SRA_I128, 0); 223 224 // Libcalls should use the AAPCS base standard ABI, even if hard float 225 // is in effect, as per the ARM RTABI specification, section 4.1.2. 226 if (Subtarget->isAAPCS_ABI()) { 227 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { 228 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), 229 CallingConv::ARM_AAPCS); 230 } 231 } 232 233 if (Subtarget->isThumb1Only()) 234 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 235 else 236 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 237 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 238 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 239 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 240 241 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 242 } 243 244 if (Subtarget->hasNEON()) { 245 addDRTypeForNEON(MVT::v2f32); 246 addDRTypeForNEON(MVT::v8i8); 247 addDRTypeForNEON(MVT::v4i16); 248 addDRTypeForNEON(MVT::v2i32); 249 addDRTypeForNEON(MVT::v1i64); 250 251 addQRTypeForNEON(MVT::v4f32); 252 addQRTypeForNEON(MVT::v2f64); 253 addQRTypeForNEON(MVT::v16i8); 254 addQRTypeForNEON(MVT::v8i16); 255 addQRTypeForNEON(MVT::v4i32); 256 addQRTypeForNEON(MVT::v2i64); 257 258 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 259 // neither Neon nor VFP support any arithmetic operations on it. 260 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 261 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 262 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 263 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 264 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 265 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 266 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 267 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 268 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 269 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 270 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 271 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 272 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 273 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 274 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 275 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 276 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 277 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 278 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 279 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 280 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 281 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 282 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 283 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 284 285 // Neon does not support some operations on v1i64 and v2i64 types. 286 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 287 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 288 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 289 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 290 291 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 292 setTargetDAGCombine(ISD::SHL); 293 setTargetDAGCombine(ISD::SRL); 294 setTargetDAGCombine(ISD::SRA); 295 setTargetDAGCombine(ISD::SIGN_EXTEND); 296 setTargetDAGCombine(ISD::ZERO_EXTEND); 297 setTargetDAGCombine(ISD::ANY_EXTEND); 298 setTargetDAGCombine(ISD::SELECT_CC); 299 } 300 301 computeRegisterProperties(); 302 303 // ARM does not have f32 extending load. 304 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 305 306 // ARM does not have i1 sign extending load. 307 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 308 309 // ARM supports all 4 flavors of integer indexed load / store. 310 if (!Subtarget->isThumb1Only()) { 311 for (unsigned im = (unsigned)ISD::PRE_INC; 312 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 313 setIndexedLoadAction(im, MVT::i1, Legal); 314 setIndexedLoadAction(im, MVT::i8, Legal); 315 setIndexedLoadAction(im, MVT::i16, Legal); 316 setIndexedLoadAction(im, MVT::i32, Legal); 317 setIndexedStoreAction(im, MVT::i1, Legal); 318 setIndexedStoreAction(im, MVT::i8, Legal); 319 setIndexedStoreAction(im, MVT::i16, Legal); 320 setIndexedStoreAction(im, MVT::i32, Legal); 321 } 322 } 323 324 // i64 operation support. 325 if (Subtarget->isThumb1Only()) { 326 setOperationAction(ISD::MUL, MVT::i64, Expand); 327 setOperationAction(ISD::MULHU, MVT::i32, Expand); 328 setOperationAction(ISD::MULHS, MVT::i32, Expand); 329 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 330 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 331 } else { 332 setOperationAction(ISD::MUL, MVT::i64, Expand); 333 setOperationAction(ISD::MULHU, MVT::i32, Expand); 334 if (!Subtarget->hasV6Ops()) 335 setOperationAction(ISD::MULHS, MVT::i32, Expand); 336 } 337 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 338 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 339 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 340 setOperationAction(ISD::SRL, MVT::i64, Custom); 341 setOperationAction(ISD::SRA, MVT::i64, Custom); 342 343 // ARM does not have ROTL. 344 setOperationAction(ISD::ROTL, MVT::i32, Expand); 345 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 346 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 347 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 348 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 349 350 // Only ARMv6 has BSWAP. 351 if (!Subtarget->hasV6Ops()) 352 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 353 354 // These are expanded into libcalls. 355 setOperationAction(ISD::SDIV, MVT::i32, Expand); 356 setOperationAction(ISD::UDIV, MVT::i32, Expand); 357 setOperationAction(ISD::SREM, MVT::i32, Expand); 358 setOperationAction(ISD::UREM, MVT::i32, Expand); 359 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 360 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 361 362 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 363 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 364 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 365 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 366 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 367 368 // Use the default implementation. 369 setOperationAction(ISD::VASTART, MVT::Other, Custom); 370 setOperationAction(ISD::VAARG, MVT::Other, Expand); 371 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 372 setOperationAction(ISD::VAEND, MVT::Other, Expand); 373 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 374 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 375 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 376 // FIXME: Shouldn't need this, since no register is used, but the legalizer 377 // doesn't yet know how to not do that for SjLj. 378 setExceptionSelectorRegister(ARM::R0); 379 if (Subtarget->isThumb()) 380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 381 else 382 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 383 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 384 385 if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { 386 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 387 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 388 } 389 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 390 391 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) 392 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 393 // iff target supports vfp2. 394 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 395 396 // We want to custom lower some of our intrinsics. 397 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 398 399 setOperationAction(ISD::SETCC, MVT::i32, Expand); 400 setOperationAction(ISD::SETCC, MVT::f32, Expand); 401 setOperationAction(ISD::SETCC, MVT::f64, Expand); 402 setOperationAction(ISD::SELECT, MVT::i32, Expand); 403 setOperationAction(ISD::SELECT, MVT::f32, Expand); 404 setOperationAction(ISD::SELECT, MVT::f64, Expand); 405 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 406 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 407 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 408 409 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 410 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 411 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 412 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 413 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 414 415 // We don't support sin/cos/fmod/copysign/pow 416 setOperationAction(ISD::FSIN, MVT::f64, Expand); 417 setOperationAction(ISD::FSIN, MVT::f32, Expand); 418 setOperationAction(ISD::FCOS, MVT::f32, Expand); 419 setOperationAction(ISD::FCOS, MVT::f64, Expand); 420 setOperationAction(ISD::FREM, MVT::f64, Expand); 421 setOperationAction(ISD::FREM, MVT::f32, Expand); 422 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 423 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 424 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 425 } 426 setOperationAction(ISD::FPOW, MVT::f64, Expand); 427 setOperationAction(ISD::FPOW, MVT::f32, Expand); 428 429 // Various VFP goodness 430 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 431 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 432 if (Subtarget->hasVFP2()) { 433 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 434 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 435 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 436 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 437 } 438 // Special handling for half-precision FP. 439 if (Subtarget->hasVFP3() && Subtarget->hasFP16()) { 440 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom); 441 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom); 442 } 443 } 444 445 // We have target-specific dag combine patterns for the following nodes: 446 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 447 setTargetDAGCombine(ISD::ADD); 448 setTargetDAGCombine(ISD::SUB); 449 450 setStackPointerRegisterToSaveRestore(ARM::SP); 451 setSchedulingPreference(SchedulingForRegPressure); 452 453 // FIXME: If-converter should use instruction latency to determine 454 // profitability rather than relying on fixed limits. 455 if (Subtarget->getCPUString() == "generic") { 456 // Generic (and overly aggressive) if-conversion limits. 457 setIfCvtBlockSizeLimit(10); 458 setIfCvtDupBlockSizeLimit(2); 459 } else if (Subtarget->hasV6Ops()) { 460 setIfCvtBlockSizeLimit(2); 461 setIfCvtDupBlockSizeLimit(1); 462 } else { 463 setIfCvtBlockSizeLimit(3); 464 setIfCvtDupBlockSizeLimit(2); 465 } 466 467 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 468 // Do not enable CodePlacementOpt for now: it currently runs after the 469 // ARMConstantIslandPass and messes up branch relaxation and placement 470 // of constant islands. 471 // benefitFromCodePlacementOpt = true; 472} 473 474const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 475 switch (Opcode) { 476 default: return 0; 477 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 478 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 479 case ARMISD::CALL: return "ARMISD::CALL"; 480 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 481 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 482 case ARMISD::tCALL: return "ARMISD::tCALL"; 483 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 484 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 485 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 486 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 487 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 488 case ARMISD::CMP: return "ARMISD::CMP"; 489 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 490 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 491 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 492 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 493 case ARMISD::CMOV: return "ARMISD::CMOV"; 494 case ARMISD::CNEG: return "ARMISD::CNEG"; 495 496 case ARMISD::RBIT: return "ARMISD::RBIT"; 497 498 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 499 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 500 case ARMISD::SITOF: return "ARMISD::SITOF"; 501 case ARMISD::UITOF: return "ARMISD::UITOF"; 502 case ARMISD::F16_TO_F32: return "ARMISD::F16_TO_F32"; 503 case ARMISD::F32_TO_F16: return "ARMISD::F32_TO_F16"; 504 505 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 506 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 507 case ARMISD::RRX: return "ARMISD::RRX"; 508 509 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 510 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 511 512 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 513 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 514 515 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 516 517 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 518 519 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 520 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; 521 522 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 523 case ARMISD::VCGE: return "ARMISD::VCGE"; 524 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 525 case ARMISD::VCGT: return "ARMISD::VCGT"; 526 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 527 case ARMISD::VTST: return "ARMISD::VTST"; 528 529 case ARMISD::VSHL: return "ARMISD::VSHL"; 530 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 531 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 532 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 533 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 534 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 535 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 536 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 537 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 538 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 539 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 540 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 541 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 542 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 543 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 544 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 545 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 546 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 547 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 548 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 549 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 550 case ARMISD::VDUP: return "ARMISD::VDUP"; 551 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 552 case ARMISD::VEXT: return "ARMISD::VEXT"; 553 case ARMISD::VREV64: return "ARMISD::VREV64"; 554 case ARMISD::VREV32: return "ARMISD::VREV32"; 555 case ARMISD::VREV16: return "ARMISD::VREV16"; 556 case ARMISD::VZIP: return "ARMISD::VZIP"; 557 case ARMISD::VUZP: return "ARMISD::VUZP"; 558 case ARMISD::VTRN: return "ARMISD::VTRN"; 559 case ARMISD::FMAX: return "ARMISD::FMAX"; 560 case ARMISD::FMIN: return "ARMISD::FMIN"; 561 } 562} 563 564/// getFunctionAlignment - Return the Log2 alignment of this function. 565unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 566 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; 567} 568 569//===----------------------------------------------------------------------===// 570// Lowering Code 571//===----------------------------------------------------------------------===// 572 573/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 574static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 575 switch (CC) { 576 default: llvm_unreachable("Unknown condition code!"); 577 case ISD::SETNE: return ARMCC::NE; 578 case ISD::SETEQ: return ARMCC::EQ; 579 case ISD::SETGT: return ARMCC::GT; 580 case ISD::SETGE: return ARMCC::GE; 581 case ISD::SETLT: return ARMCC::LT; 582 case ISD::SETLE: return ARMCC::LE; 583 case ISD::SETUGT: return ARMCC::HI; 584 case ISD::SETUGE: return ARMCC::HS; 585 case ISD::SETULT: return ARMCC::LO; 586 case ISD::SETULE: return ARMCC::LS; 587 } 588} 589 590/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 591static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 592 ARMCC::CondCodes &CondCode2) { 593 CondCode2 = ARMCC::AL; 594 switch (CC) { 595 default: llvm_unreachable("Unknown FP condition!"); 596 case ISD::SETEQ: 597 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 598 case ISD::SETGT: 599 case ISD::SETOGT: CondCode = ARMCC::GT; break; 600 case ISD::SETGE: 601 case ISD::SETOGE: CondCode = ARMCC::GE; break; 602 case ISD::SETOLT: CondCode = ARMCC::MI; break; 603 case ISD::SETOLE: CondCode = ARMCC::LS; break; 604 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 605 case ISD::SETO: CondCode = ARMCC::VC; break; 606 case ISD::SETUO: CondCode = ARMCC::VS; break; 607 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 608 case ISD::SETUGT: CondCode = ARMCC::HI; break; 609 case ISD::SETUGE: CondCode = ARMCC::PL; break; 610 case ISD::SETLT: 611 case ISD::SETULT: CondCode = ARMCC::LT; break; 612 case ISD::SETLE: 613 case ISD::SETULE: CondCode = ARMCC::LE; break; 614 case ISD::SETNE: 615 case ISD::SETUNE: CondCode = ARMCC::NE; break; 616 } 617} 618 619//===----------------------------------------------------------------------===// 620// Calling Convention Implementation 621//===----------------------------------------------------------------------===// 622 623#include "ARMGenCallingConv.inc" 624 625// APCS f64 is in register pairs, possibly split to stack 626static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 627 CCValAssign::LocInfo &LocInfo, 628 CCState &State, bool CanFail) { 629 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; 630 631 // Try to get the first register. 632 if (unsigned Reg = State.AllocateReg(RegList, 4)) 633 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 634 else { 635 // For the 2nd half of a v2f64, do not fail. 636 if (CanFail) 637 return false; 638 639 // Put the whole thing on the stack. 640 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 641 State.AllocateStack(8, 4), 642 LocVT, LocInfo)); 643 return true; 644 } 645 646 // Try to get the second register. 647 if (unsigned Reg = State.AllocateReg(RegList, 4)) 648 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 649 else 650 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 651 State.AllocateStack(4, 4), 652 LocVT, LocInfo)); 653 return true; 654} 655 656static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 657 CCValAssign::LocInfo &LocInfo, 658 ISD::ArgFlagsTy &ArgFlags, 659 CCState &State) { 660 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 661 return false; 662 if (LocVT == MVT::v2f64 && 663 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 664 return false; 665 return true; // we handled it 666} 667 668// AAPCS f64 is in aligned register pairs 669static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 670 CCValAssign::LocInfo &LocInfo, 671 CCState &State, bool CanFail) { 672 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 673 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 674 675 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 676 if (Reg == 0) { 677 // For the 2nd half of a v2f64, do not just fail. 678 if (CanFail) 679 return false; 680 681 // Put the whole thing on the stack. 682 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 683 State.AllocateStack(8, 8), 684 LocVT, LocInfo)); 685 return true; 686 } 687 688 unsigned i; 689 for (i = 0; i < 2; ++i) 690 if (HiRegList[i] == Reg) 691 break; 692 693 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 694 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 695 LocVT, LocInfo)); 696 return true; 697} 698 699static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 700 CCValAssign::LocInfo &LocInfo, 701 ISD::ArgFlagsTy &ArgFlags, 702 CCState &State) { 703 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 704 return false; 705 if (LocVT == MVT::v2f64 && 706 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 707 return false; 708 return true; // we handled it 709} 710 711static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 712 CCValAssign::LocInfo &LocInfo, CCState &State) { 713 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 714 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 715 716 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 717 if (Reg == 0) 718 return false; // we didn't handle it 719 720 unsigned i; 721 for (i = 0; i < 2; ++i) 722 if (HiRegList[i] == Reg) 723 break; 724 725 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 726 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 727 LocVT, LocInfo)); 728 return true; 729} 730 731static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 732 CCValAssign::LocInfo &LocInfo, 733 ISD::ArgFlagsTy &ArgFlags, 734 CCState &State) { 735 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 736 return false; 737 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 738 return false; 739 return true; // we handled it 740} 741 742static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 743 CCValAssign::LocInfo &LocInfo, 744 ISD::ArgFlagsTy &ArgFlags, 745 CCState &State) { 746 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, 747 State); 748} 749 750/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 751/// given CallingConvention value. 752CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 753 bool Return, 754 bool isVarArg) const { 755 switch (CC) { 756 default: 757 llvm_unreachable("Unsupported calling convention"); 758 case CallingConv::C: 759 case CallingConv::Fast: 760 // Use target triple & subtarget features to do actual dispatch. 761 if (Subtarget->isAAPCS_ABI()) { 762 if (Subtarget->hasVFP2() && 763 FloatABIType == FloatABI::Hard && !isVarArg) 764 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 765 else 766 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 767 } else 768 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 769 case CallingConv::ARM_AAPCS_VFP: 770 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 771 case CallingConv::ARM_AAPCS: 772 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 773 case CallingConv::ARM_APCS: 774 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 775 } 776} 777 778/// LowerCallResult - Lower the result values of a call into the 779/// appropriate copies out of appropriate physical registers. 780SDValue 781ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 782 CallingConv::ID CallConv, bool isVarArg, 783 const SmallVectorImpl<ISD::InputArg> &Ins, 784 DebugLoc dl, SelectionDAG &DAG, 785 SmallVectorImpl<SDValue> &InVals) { 786 787 // Assign locations to each value returned by this call. 788 SmallVector<CCValAssign, 16> RVLocs; 789 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 790 RVLocs, *DAG.getContext()); 791 CCInfo.AnalyzeCallResult(Ins, 792 CCAssignFnForNode(CallConv, /* Return*/ true, 793 isVarArg)); 794 795 // Copy all of the result registers out of their specified physreg. 796 for (unsigned i = 0; i != RVLocs.size(); ++i) { 797 CCValAssign VA = RVLocs[i]; 798 799 SDValue Val; 800 if (VA.needsCustom()) { 801 // Handle f64 or half of a v2f64. 802 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 803 InFlag); 804 Chain = Lo.getValue(1); 805 InFlag = Lo.getValue(2); 806 VA = RVLocs[++i]; // skip ahead to next loc 807 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 808 InFlag); 809 Chain = Hi.getValue(1); 810 InFlag = Hi.getValue(2); 811 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 812 813 if (VA.getLocVT() == MVT::v2f64) { 814 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 815 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 816 DAG.getConstant(0, MVT::i32)); 817 818 VA = RVLocs[++i]; // skip ahead to next loc 819 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 820 Chain = Lo.getValue(1); 821 InFlag = Lo.getValue(2); 822 VA = RVLocs[++i]; // skip ahead to next loc 823 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 824 Chain = Hi.getValue(1); 825 InFlag = Hi.getValue(2); 826 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 827 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 828 DAG.getConstant(1, MVT::i32)); 829 } 830 } else { 831 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 832 InFlag); 833 Chain = Val.getValue(1); 834 InFlag = Val.getValue(2); 835 } 836 837 switch (VA.getLocInfo()) { 838 default: llvm_unreachable("Unknown loc info!"); 839 case CCValAssign::Full: break; 840 case CCValAssign::BCvt: 841 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 842 break; 843 } 844 845 InVals.push_back(Val); 846 } 847 848 return Chain; 849} 850 851/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 852/// by "Src" to address "Dst" of size "Size". Alignment information is 853/// specified by the specific parameter attribute. The copy will be passed as 854/// a byval function parameter. 855/// Sometimes what we are copying is the end of a larger object, the part that 856/// does not fit in registers. 857static SDValue 858CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 859 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 860 DebugLoc dl) { 861 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 862 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 863 /*AlwaysInline=*/false, NULL, 0, NULL, 0); 864} 865 866/// LowerMemOpCallTo - Store the argument to the stack. 867SDValue 868ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 869 SDValue StackPtr, SDValue Arg, 870 DebugLoc dl, SelectionDAG &DAG, 871 const CCValAssign &VA, 872 ISD::ArgFlagsTy Flags) { 873 unsigned LocMemOffset = VA.getLocMemOffset(); 874 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 875 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 876 if (Flags.isByVal()) { 877 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 878 } 879 return DAG.getStore(Chain, dl, Arg, PtrOff, 880 PseudoSourceValue::getStack(), LocMemOffset, 881 false, false, 0); 882} 883 884void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 885 SDValue Chain, SDValue &Arg, 886 RegsToPassVector &RegsToPass, 887 CCValAssign &VA, CCValAssign &NextVA, 888 SDValue &StackPtr, 889 SmallVector<SDValue, 8> &MemOpChains, 890 ISD::ArgFlagsTy Flags) { 891 892 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 893 DAG.getVTList(MVT::i32, MVT::i32), Arg); 894 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 895 896 if (NextVA.isRegLoc()) 897 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 898 else { 899 assert(NextVA.isMemLoc()); 900 if (StackPtr.getNode() == 0) 901 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 902 903 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 904 dl, DAG, NextVA, 905 Flags)); 906 } 907} 908 909/// LowerCall - Lowering a call into a callseq_start <- 910/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 911/// nodes. 912SDValue 913ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 914 CallingConv::ID CallConv, bool isVarArg, 915 bool &isTailCall, 916 const SmallVectorImpl<ISD::OutputArg> &Outs, 917 const SmallVectorImpl<ISD::InputArg> &Ins, 918 DebugLoc dl, SelectionDAG &DAG, 919 SmallVectorImpl<SDValue> &InVals) { 920 // ARM target does not yet support tail call optimization. 921 isTailCall = false; 922 923 // Analyze operands of the call, assigning locations to each operand. 924 SmallVector<CCValAssign, 16> ArgLocs; 925 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 926 *DAG.getContext()); 927 CCInfo.AnalyzeCallOperands(Outs, 928 CCAssignFnForNode(CallConv, /* Return*/ false, 929 isVarArg)); 930 931 // Get a count of how many bytes are to be pushed on the stack. 932 unsigned NumBytes = CCInfo.getNextStackOffset(); 933 934 // Adjust the stack pointer for the new arguments... 935 // These operations are automatically eliminated by the prolog/epilog pass 936 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 937 938 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 939 940 RegsToPassVector RegsToPass; 941 SmallVector<SDValue, 8> MemOpChains; 942 943 // Walk the register/memloc assignments, inserting copies/loads. In the case 944 // of tail call optimization, arguments are handled later. 945 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 946 i != e; 947 ++i, ++realArgIdx) { 948 CCValAssign &VA = ArgLocs[i]; 949 SDValue Arg = Outs[realArgIdx].Val; 950 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 951 952 // Promote the value if needed. 953 switch (VA.getLocInfo()) { 954 default: llvm_unreachable("Unknown loc info!"); 955 case CCValAssign::Full: break; 956 case CCValAssign::SExt: 957 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 958 break; 959 case CCValAssign::ZExt: 960 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 961 break; 962 case CCValAssign::AExt: 963 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 964 break; 965 case CCValAssign::BCvt: 966 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 967 break; 968 } 969 970 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 971 if (VA.needsCustom()) { 972 if (VA.getLocVT() == MVT::v2f64) { 973 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 974 DAG.getConstant(0, MVT::i32)); 975 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 976 DAG.getConstant(1, MVT::i32)); 977 978 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 979 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 980 981 VA = ArgLocs[++i]; // skip ahead to next loc 982 if (VA.isRegLoc()) { 983 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 984 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 985 } else { 986 assert(VA.isMemLoc()); 987 988 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 989 dl, DAG, VA, Flags)); 990 } 991 } else { 992 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 993 StackPtr, MemOpChains, Flags); 994 } 995 } else if (VA.isRegLoc()) { 996 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 997 } else { 998 assert(VA.isMemLoc()); 999 1000 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1001 dl, DAG, VA, Flags)); 1002 } 1003 } 1004 1005 if (!MemOpChains.empty()) 1006 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1007 &MemOpChains[0], MemOpChains.size()); 1008 1009 // Build a sequence of copy-to-reg nodes chained together with token chain 1010 // and flag operands which copy the outgoing args into the appropriate regs. 1011 SDValue InFlag; 1012 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1013 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1014 RegsToPass[i].second, InFlag); 1015 InFlag = Chain.getValue(1); 1016 } 1017 1018 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1019 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1020 // node so that legalize doesn't hack it. 1021 bool isDirect = false; 1022 bool isARMFunc = false; 1023 bool isLocalARMFunc = false; 1024 MachineFunction &MF = DAG.getMachineFunction(); 1025 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1026 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1027 GlobalValue *GV = G->getGlobal(); 1028 isDirect = true; 1029 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1030 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1031 getTargetMachine().getRelocationModel() != Reloc::Static; 1032 isARMFunc = !Subtarget->isThumb() || isStub; 1033 // ARM call to a local ARM function is predicable. 1034 isLocalARMFunc = !Subtarget->isThumb() && !isExt; 1035 // tBX takes a register source operand. 1036 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1037 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1038 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1039 ARMPCLabelIndex, 1040 ARMCP::CPValue, 4); 1041 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1042 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1043 Callee = DAG.getLoad(getPointerTy(), dl, 1044 DAG.getEntryNode(), CPAddr, 1045 PseudoSourceValue::getConstantPool(), 0, 1046 false, false, 0); 1047 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1048 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1049 getPointerTy(), Callee, PICLabel); 1050 } else 1051 Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); 1052 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1053 isDirect = true; 1054 bool isStub = Subtarget->isTargetDarwin() && 1055 getTargetMachine().getRelocationModel() != Reloc::Static; 1056 isARMFunc = !Subtarget->isThumb() || isStub; 1057 // tBX takes a register source operand. 1058 const char *Sym = S->getSymbol(); 1059 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1060 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1061 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1062 Sym, ARMPCLabelIndex, 4); 1063 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1064 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1065 Callee = DAG.getLoad(getPointerTy(), dl, 1066 DAG.getEntryNode(), CPAddr, 1067 PseudoSourceValue::getConstantPool(), 0, 1068 false, false, 0); 1069 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1070 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1071 getPointerTy(), Callee, PICLabel); 1072 } else 1073 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 1074 } 1075 1076 // FIXME: handle tail calls differently. 1077 unsigned CallOpc; 1078 if (Subtarget->isThumb()) { 1079 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1080 CallOpc = ARMISD::CALL_NOLINK; 1081 else 1082 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1083 } else { 1084 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1085 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1086 : ARMISD::CALL_NOLINK; 1087 } 1088 if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { 1089 // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK 1090 Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); 1091 InFlag = Chain.getValue(1); 1092 } 1093 1094 std::vector<SDValue> Ops; 1095 Ops.push_back(Chain); 1096 Ops.push_back(Callee); 1097 1098 // Add argument registers to the end of the list so that they are known live 1099 // into the call. 1100 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1101 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1102 RegsToPass[i].second.getValueType())); 1103 1104 if (InFlag.getNode()) 1105 Ops.push_back(InFlag); 1106 // Returns a chain and a flag for retval copy to use. 1107 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), 1108 &Ops[0], Ops.size()); 1109 InFlag = Chain.getValue(1); 1110 1111 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1112 DAG.getIntPtrConstant(0, true), InFlag); 1113 if (!Ins.empty()) 1114 InFlag = Chain.getValue(1); 1115 1116 // Handle result values, copying them out of physregs into vregs that we 1117 // return. 1118 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1119 dl, DAG, InVals); 1120} 1121 1122SDValue 1123ARMTargetLowering::LowerReturn(SDValue Chain, 1124 CallingConv::ID CallConv, bool isVarArg, 1125 const SmallVectorImpl<ISD::OutputArg> &Outs, 1126 DebugLoc dl, SelectionDAG &DAG) { 1127 1128 // CCValAssign - represent the assignment of the return value to a location. 1129 SmallVector<CCValAssign, 16> RVLocs; 1130 1131 // CCState - Info about the registers and stack slots. 1132 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1133 *DAG.getContext()); 1134 1135 // Analyze outgoing return values. 1136 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1137 isVarArg)); 1138 1139 // If this is the first return lowered for this function, add 1140 // the regs to the liveout set for the function. 1141 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1142 for (unsigned i = 0; i != RVLocs.size(); ++i) 1143 if (RVLocs[i].isRegLoc()) 1144 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1145 } 1146 1147 SDValue Flag; 1148 1149 // Copy the result values into the output registers. 1150 for (unsigned i = 0, realRVLocIdx = 0; 1151 i != RVLocs.size(); 1152 ++i, ++realRVLocIdx) { 1153 CCValAssign &VA = RVLocs[i]; 1154 assert(VA.isRegLoc() && "Can only return in registers!"); 1155 1156 SDValue Arg = Outs[realRVLocIdx].Val; 1157 1158 switch (VA.getLocInfo()) { 1159 default: llvm_unreachable("Unknown loc info!"); 1160 case CCValAssign::Full: break; 1161 case CCValAssign::BCvt: 1162 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1163 break; 1164 } 1165 1166 if (VA.needsCustom()) { 1167 if (VA.getLocVT() == MVT::v2f64) { 1168 // Extract the first half and return it in two registers. 1169 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1170 DAG.getConstant(0, MVT::i32)); 1171 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1172 DAG.getVTList(MVT::i32, MVT::i32), Half); 1173 1174 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1175 Flag = Chain.getValue(1); 1176 VA = RVLocs[++i]; // skip ahead to next loc 1177 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1178 HalfGPRs.getValue(1), Flag); 1179 Flag = Chain.getValue(1); 1180 VA = RVLocs[++i]; // skip ahead to next loc 1181 1182 // Extract the 2nd half and fall through to handle it as an f64 value. 1183 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1184 DAG.getConstant(1, MVT::i32)); 1185 } 1186 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1187 // available. 1188 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1189 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1190 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1191 Flag = Chain.getValue(1); 1192 VA = RVLocs[++i]; // skip ahead to next loc 1193 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1194 Flag); 1195 } else 1196 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1197 1198 // Guarantee that all emitted copies are 1199 // stuck together, avoiding something bad. 1200 Flag = Chain.getValue(1); 1201 } 1202 1203 SDValue result; 1204 if (Flag.getNode()) 1205 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1206 else // Return Void 1207 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1208 1209 return result; 1210} 1211 1212// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1213// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1214// one of the above mentioned nodes. It has to be wrapped because otherwise 1215// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1216// be used to form addressing mode. These wrapped nodes will be selected 1217// into MOVi. 1218static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1219 EVT PtrVT = Op.getValueType(); 1220 // FIXME there is no actual debug info here 1221 DebugLoc dl = Op.getDebugLoc(); 1222 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1223 SDValue Res; 1224 if (CP->isMachineConstantPoolEntry()) 1225 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1226 CP->getAlignment()); 1227 else 1228 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1229 CP->getAlignment()); 1230 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1231} 1232 1233SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { 1234 MachineFunction &MF = DAG.getMachineFunction(); 1235 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1236 unsigned ARMPCLabelIndex = 0; 1237 DebugLoc DL = Op.getDebugLoc(); 1238 EVT PtrVT = getPointerTy(); 1239 BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1240 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1241 SDValue CPAddr; 1242 if (RelocM == Reloc::Static) { 1243 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1244 } else { 1245 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1246 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1247 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1248 ARMCP::CPBlockAddress, 1249 PCAdj); 1250 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1251 } 1252 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1253 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1254 PseudoSourceValue::getConstantPool(), 0, 1255 false, false, 0); 1256 if (RelocM == Reloc::Static) 1257 return Result; 1258 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1259 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1260} 1261 1262// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1263SDValue 1264ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1265 SelectionDAG &DAG) { 1266 DebugLoc dl = GA->getDebugLoc(); 1267 EVT PtrVT = getPointerTy(); 1268 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1269 MachineFunction &MF = DAG.getMachineFunction(); 1270 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1271 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1272 ARMConstantPoolValue *CPV = 1273 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1274 ARMCP::CPValue, PCAdj, "tlsgd", true); 1275 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1276 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1277 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1278 PseudoSourceValue::getConstantPool(), 0, 1279 false, false, 0); 1280 SDValue Chain = Argument.getValue(1); 1281 1282 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1283 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1284 1285 // call __tls_get_addr. 1286 ArgListTy Args; 1287 ArgListEntry Entry; 1288 Entry.Node = Argument; 1289 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1290 Args.push_back(Entry); 1291 // FIXME: is there useful debug info available here? 1292 std::pair<SDValue, SDValue> CallResult = 1293 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1294 false, false, false, false, 1295 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1296 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1297 return CallResult.first; 1298} 1299 1300// Lower ISD::GlobalTLSAddress using the "initial exec" or 1301// "local exec" model. 1302SDValue 1303ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1304 SelectionDAG &DAG) { 1305 GlobalValue *GV = GA->getGlobal(); 1306 DebugLoc dl = GA->getDebugLoc(); 1307 SDValue Offset; 1308 SDValue Chain = DAG.getEntryNode(); 1309 EVT PtrVT = getPointerTy(); 1310 // Get the Thread Pointer 1311 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1312 1313 if (GV->isDeclaration()) { 1314 MachineFunction &MF = DAG.getMachineFunction(); 1315 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1316 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1317 // Initial exec model. 1318 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1319 ARMConstantPoolValue *CPV = 1320 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1321 ARMCP::CPValue, PCAdj, "gottpoff", true); 1322 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1323 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1324 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1325 PseudoSourceValue::getConstantPool(), 0, 1326 false, false, 0); 1327 Chain = Offset.getValue(1); 1328 1329 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1330 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1331 1332 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1333 PseudoSourceValue::getConstantPool(), 0, 1334 false, false, 0); 1335 } else { 1336 // local exec model 1337 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1338 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1339 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1340 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1341 PseudoSourceValue::getConstantPool(), 0, 1342 false, false, 0); 1343 } 1344 1345 // The address of the thread local variable is the add of the thread 1346 // pointer with the offset of the variable. 1347 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1348} 1349 1350SDValue 1351ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 1352 // TODO: implement the "local dynamic" model 1353 assert(Subtarget->isTargetELF() && 1354 "TLS not implemented for non-ELF targets"); 1355 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1356 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1357 // otherwise use the "Local Exec" TLS Model 1358 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1359 return LowerToTLSGeneralDynamicModel(GA, DAG); 1360 else 1361 return LowerToTLSExecModels(GA, DAG); 1362} 1363 1364SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1365 SelectionDAG &DAG) { 1366 EVT PtrVT = getPointerTy(); 1367 DebugLoc dl = Op.getDebugLoc(); 1368 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1369 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1370 if (RelocM == Reloc::PIC_) { 1371 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1372 ARMConstantPoolValue *CPV = 1373 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1374 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1375 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1376 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1377 CPAddr, 1378 PseudoSourceValue::getConstantPool(), 0, 1379 false, false, 0); 1380 SDValue Chain = Result.getValue(1); 1381 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1382 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1383 if (!UseGOTOFF) 1384 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1385 PseudoSourceValue::getGOT(), 0, 1386 false, false, 0); 1387 return Result; 1388 } else { 1389 // If we have T2 ops, we can materialize the address directly via movt/movw 1390 // pair. This is always cheaper. 1391 if (Subtarget->useMovt()) { 1392 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1393 DAG.getTargetGlobalAddress(GV, PtrVT)); 1394 } else { 1395 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1396 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1397 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1398 PseudoSourceValue::getConstantPool(), 0, 1399 false, false, 0); 1400 } 1401 } 1402} 1403 1404SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1405 SelectionDAG &DAG) { 1406 MachineFunction &MF = DAG.getMachineFunction(); 1407 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1408 unsigned ARMPCLabelIndex = 0; 1409 EVT PtrVT = getPointerTy(); 1410 DebugLoc dl = Op.getDebugLoc(); 1411 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1412 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1413 SDValue CPAddr; 1414 if (RelocM == Reloc::Static) 1415 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1416 else { 1417 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1418 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1419 ARMConstantPoolValue *CPV = 1420 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1421 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1422 } 1423 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1424 1425 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1426 PseudoSourceValue::getConstantPool(), 0, 1427 false, false, 0); 1428 SDValue Chain = Result.getValue(1); 1429 1430 if (RelocM == Reloc::PIC_) { 1431 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1432 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1433 } 1434 1435 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1436 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1437 PseudoSourceValue::getGOT(), 0, 1438 false, false, 0); 1439 1440 return Result; 1441} 1442 1443SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1444 SelectionDAG &DAG){ 1445 assert(Subtarget->isTargetELF() && 1446 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1447 MachineFunction &MF = DAG.getMachineFunction(); 1448 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1449 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1450 EVT PtrVT = getPointerTy(); 1451 DebugLoc dl = Op.getDebugLoc(); 1452 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1453 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1454 "_GLOBAL_OFFSET_TABLE_", 1455 ARMPCLabelIndex, PCAdj); 1456 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1457 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1458 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1459 PseudoSourceValue::getConstantPool(), 0, 1460 false, false, 0); 1461 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1462 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1463} 1464 1465SDValue 1466ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 1467 const ARMSubtarget *Subtarget) { 1468 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1469 DebugLoc dl = Op.getDebugLoc(); 1470 switch (IntNo) { 1471 default: return SDValue(); // Don't custom lower most intrinsics. 1472 case Intrinsic::arm_thread_pointer: { 1473 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1474 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1475 } 1476 case Intrinsic::eh_sjlj_lsda: { 1477 MachineFunction &MF = DAG.getMachineFunction(); 1478 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1479 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1480 EVT PtrVT = getPointerTy(); 1481 DebugLoc dl = Op.getDebugLoc(); 1482 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1483 SDValue CPAddr; 1484 unsigned PCAdj = (RelocM != Reloc::PIC_) 1485 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1486 ARMConstantPoolValue *CPV = 1487 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1488 ARMCP::CPLSDA, PCAdj); 1489 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1490 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1491 SDValue Result = 1492 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1493 PseudoSourceValue::getConstantPool(), 0, 1494 false, false, 0); 1495 SDValue Chain = Result.getValue(1); 1496 1497 if (RelocM == Reloc::PIC_) { 1498 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1499 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1500 } 1501 return Result; 1502 } 1503 case Intrinsic::eh_sjlj_setjmp: 1504 SDValue Val = Subtarget->isThumb() ? 1505 DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : 1506 DAG.getConstant(0, MVT::i32); 1507 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1), 1508 Val); 1509 } 1510} 1511 1512static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 1513 const ARMSubtarget *Subtarget) { 1514 DebugLoc dl = Op.getDebugLoc(); 1515 SDValue Op5 = Op.getOperand(5); 1516 SDValue Res; 1517 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); 1518 if (isDeviceBarrier) { 1519 if (Subtarget->hasV7Ops()) 1520 Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); 1521 else 1522 Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), 1523 DAG.getConstant(0, MVT::i32)); 1524 } else { 1525 if (Subtarget->hasV7Ops()) 1526 Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 1527 else 1528 Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 1529 DAG.getConstant(0, MVT::i32)); 1530 } 1531 return Res; 1532} 1533 1534static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, 1535 unsigned VarArgsFrameIndex) { 1536 // vastart just stores the address of the VarArgsFrameIndex slot into the 1537 // memory location argument. 1538 DebugLoc dl = Op.getDebugLoc(); 1539 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1540 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1541 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1542 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, 1543 false, false, 0); 1544} 1545 1546SDValue 1547ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { 1548 SDNode *Node = Op.getNode(); 1549 DebugLoc dl = Node->getDebugLoc(); 1550 EVT VT = Node->getValueType(0); 1551 SDValue Chain = Op.getOperand(0); 1552 SDValue Size = Op.getOperand(1); 1553 SDValue Align = Op.getOperand(2); 1554 1555 // Chain the dynamic stack allocation so that it doesn't modify the stack 1556 // pointer when other instructions are using the stack. 1557 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); 1558 1559 unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue(); 1560 unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); 1561 if (AlignVal > StackAlign) 1562 // Do this now since selection pass cannot introduce new target 1563 // independent node. 1564 Align = DAG.getConstant(-(uint64_t)AlignVal, VT); 1565 1566 // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up 1567 // using a "add r, sp, r" instead. Negate the size now so we don't have to 1568 // do even more horrible hack later. 1569 MachineFunction &MF = DAG.getMachineFunction(); 1570 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1571 if (AFI->isThumb1OnlyFunction()) { 1572 bool Negate = true; 1573 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size); 1574 if (C) { 1575 uint32_t Val = C->getZExtValue(); 1576 if (Val <= 508 && ((Val & 3) == 0)) 1577 Negate = false; 1578 } 1579 if (Negate) 1580 Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); 1581 } 1582 1583 SDVTList VTList = DAG.getVTList(VT, MVT::Other); 1584 SDValue Ops1[] = { Chain, Size, Align }; 1585 SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); 1586 Chain = Res.getValue(1); 1587 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), 1588 DAG.getIntPtrConstant(0, true), SDValue()); 1589 SDValue Ops2[] = { Res, Chain }; 1590 return DAG.getMergeValues(Ops2, 2, dl); 1591} 1592 1593SDValue 1594ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 1595 SDValue &Root, SelectionDAG &DAG, 1596 DebugLoc dl) { 1597 MachineFunction &MF = DAG.getMachineFunction(); 1598 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1599 1600 TargetRegisterClass *RC; 1601 if (AFI->isThumb1OnlyFunction()) 1602 RC = ARM::tGPRRegisterClass; 1603 else 1604 RC = ARM::GPRRegisterClass; 1605 1606 // Transform the arguments stored in physical registers into virtual ones. 1607 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1608 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 1609 1610 SDValue ArgValue2; 1611 if (NextVA.isMemLoc()) { 1612 unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; 1613 MachineFrameInfo *MFI = MF.getFrameInfo(); 1614 int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), 1615 true, false); 1616 1617 // Create load node to retrieve arguments from the stack. 1618 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1619 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 1620 PseudoSourceValue::getFixedStack(FI), 0, 1621 false, false, 0); 1622 } else { 1623 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 1624 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 1625 } 1626 1627 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 1628} 1629 1630SDValue 1631ARMTargetLowering::LowerFormalArguments(SDValue Chain, 1632 CallingConv::ID CallConv, bool isVarArg, 1633 const SmallVectorImpl<ISD::InputArg> 1634 &Ins, 1635 DebugLoc dl, SelectionDAG &DAG, 1636 SmallVectorImpl<SDValue> &InVals) { 1637 1638 MachineFunction &MF = DAG.getMachineFunction(); 1639 MachineFrameInfo *MFI = MF.getFrameInfo(); 1640 1641 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1642 1643 // Assign locations to all of the incoming arguments. 1644 SmallVector<CCValAssign, 16> ArgLocs; 1645 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1646 *DAG.getContext()); 1647 CCInfo.AnalyzeFormalArguments(Ins, 1648 CCAssignFnForNode(CallConv, /* Return*/ false, 1649 isVarArg)); 1650 1651 SmallVector<SDValue, 16> ArgValues; 1652 1653 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1654 CCValAssign &VA = ArgLocs[i]; 1655 1656 // Arguments stored in registers. 1657 if (VA.isRegLoc()) { 1658 EVT RegVT = VA.getLocVT(); 1659 1660 SDValue ArgValue; 1661 if (VA.needsCustom()) { 1662 // f64 and vector types are split up into multiple registers or 1663 // combinations of registers and stack slots. 1664 RegVT = MVT::i32; 1665 1666 if (VA.getLocVT() == MVT::v2f64) { 1667 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 1668 Chain, DAG, dl); 1669 VA = ArgLocs[++i]; // skip ahead to next loc 1670 SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 1671 Chain, DAG, dl); 1672 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1673 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 1674 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 1675 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 1676 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 1677 } else 1678 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 1679 1680 } else { 1681 TargetRegisterClass *RC; 1682 1683 if (RegVT == MVT::f32) 1684 RC = ARM::SPRRegisterClass; 1685 else if (RegVT == MVT::f64) 1686 RC = ARM::DPRRegisterClass; 1687 else if (RegVT == MVT::v2f64) 1688 RC = ARM::QPRRegisterClass; 1689 else if (RegVT == MVT::i32) 1690 RC = (AFI->isThumb1OnlyFunction() ? 1691 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 1692 else 1693 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 1694 1695 // Transform the arguments in physical registers into virtual ones. 1696 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1697 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1698 } 1699 1700 // If this is an 8 or 16-bit value, it is really passed promoted 1701 // to 32 bits. Insert an assert[sz]ext to capture this, then 1702 // truncate to the right size. 1703 switch (VA.getLocInfo()) { 1704 default: llvm_unreachable("Unknown loc info!"); 1705 case CCValAssign::Full: break; 1706 case CCValAssign::BCvt: 1707 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 1708 break; 1709 case CCValAssign::SExt: 1710 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1711 DAG.getValueType(VA.getValVT())); 1712 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1713 break; 1714 case CCValAssign::ZExt: 1715 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1716 DAG.getValueType(VA.getValVT())); 1717 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1718 break; 1719 } 1720 1721 InVals.push_back(ArgValue); 1722 1723 } else { // VA.isRegLoc() 1724 1725 // sanity check 1726 assert(VA.isMemLoc()); 1727 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 1728 1729 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 1730 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 1731 true, false); 1732 1733 // Create load nodes to retrieve arguments from the stack. 1734 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1735 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 1736 PseudoSourceValue::getFixedStack(FI), 0, 1737 false, false, 0)); 1738 } 1739 } 1740 1741 // varargs 1742 if (isVarArg) { 1743 static const unsigned GPRArgRegs[] = { 1744 ARM::R0, ARM::R1, ARM::R2, ARM::R3 1745 }; 1746 1747 unsigned NumGPRs = CCInfo.getFirstUnallocated 1748 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 1749 1750 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 1751 unsigned VARegSize = (4 - NumGPRs) * 4; 1752 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 1753 unsigned ArgOffset = CCInfo.getNextStackOffset(); 1754 if (VARegSaveSize) { 1755 // If this function is vararg, store any remaining integer argument regs 1756 // to their spots on the stack so that they may be loaded by deferencing 1757 // the result of va_next. 1758 AFI->setVarArgsRegSaveSize(VARegSaveSize); 1759 VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + 1760 VARegSaveSize - VARegSize, 1761 true, false); 1762 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 1763 1764 SmallVector<SDValue, 4> MemOps; 1765 for (; NumGPRs < 4; ++NumGPRs) { 1766 TargetRegisterClass *RC; 1767 if (AFI->isThumb1OnlyFunction()) 1768 RC = ARM::tGPRRegisterClass; 1769 else 1770 RC = ARM::GPRRegisterClass; 1771 1772 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 1773 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 1774 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 1775 PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, 1776 false, false, 0); 1777 MemOps.push_back(Store); 1778 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 1779 DAG.getConstant(4, getPointerTy())); 1780 } 1781 if (!MemOps.empty()) 1782 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1783 &MemOps[0], MemOps.size()); 1784 } else 1785 // This will point to the next argument passed via stack. 1786 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); 1787 } 1788 1789 return Chain; 1790} 1791 1792/// isFloatingPointZero - Return true if this is +0.0. 1793static bool isFloatingPointZero(SDValue Op) { 1794 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1795 return CFP->getValueAPF().isPosZero(); 1796 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1797 // Maybe this has already been legalized into the constant pool? 1798 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 1799 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 1800 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 1801 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1802 return CFP->getValueAPF().isPosZero(); 1803 } 1804 } 1805 return false; 1806} 1807 1808/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 1809/// the given operands. 1810SDValue 1811ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 1812 SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { 1813 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 1814 unsigned C = RHSC->getZExtValue(); 1815 if (!isLegalICmpImmediate(C)) { 1816 // Constant does not fit, try adjusting it by one? 1817 switch (CC) { 1818 default: break; 1819 case ISD::SETLT: 1820 case ISD::SETGE: 1821 if (isLegalICmpImmediate(C-1)) { 1822 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 1823 RHS = DAG.getConstant(C-1, MVT::i32); 1824 } 1825 break; 1826 case ISD::SETULT: 1827 case ISD::SETUGE: 1828 if (C > 0 && isLegalICmpImmediate(C-1)) { 1829 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 1830 RHS = DAG.getConstant(C-1, MVT::i32); 1831 } 1832 break; 1833 case ISD::SETLE: 1834 case ISD::SETGT: 1835 if (isLegalICmpImmediate(C+1)) { 1836 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 1837 RHS = DAG.getConstant(C+1, MVT::i32); 1838 } 1839 break; 1840 case ISD::SETULE: 1841 case ISD::SETUGT: 1842 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { 1843 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 1844 RHS = DAG.getConstant(C+1, MVT::i32); 1845 } 1846 break; 1847 } 1848 } 1849 } 1850 1851 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 1852 ARMISD::NodeType CompareType; 1853 switch (CondCode) { 1854 default: 1855 CompareType = ARMISD::CMP; 1856 break; 1857 case ARMCC::EQ: 1858 case ARMCC::NE: 1859 // Uses only Z Flag 1860 CompareType = ARMISD::CMPZ; 1861 break; 1862 } 1863 ARMCC = DAG.getConstant(CondCode, MVT::i32); 1864 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 1865} 1866 1867/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 1868static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 1869 DebugLoc dl) { 1870 SDValue Cmp; 1871 if (!isFloatingPointZero(RHS)) 1872 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 1873 else 1874 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 1875 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 1876} 1877 1878SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { 1879 EVT VT = Op.getValueType(); 1880 SDValue LHS = Op.getOperand(0); 1881 SDValue RHS = Op.getOperand(1); 1882 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 1883 SDValue TrueVal = Op.getOperand(2); 1884 SDValue FalseVal = Op.getOperand(3); 1885 DebugLoc dl = Op.getDebugLoc(); 1886 1887 if (LHS.getValueType() == MVT::i32) { 1888 SDValue ARMCC; 1889 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1890 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); 1891 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); 1892 } 1893 1894 ARMCC::CondCodes CondCode, CondCode2; 1895 FPCCToARMCC(CC, CondCode, CondCode2); 1896 1897 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); 1898 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1899 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 1900 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 1901 ARMCC, CCR, Cmp); 1902 if (CondCode2 != ARMCC::AL) { 1903 SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); 1904 // FIXME: Needs another CMP because flag can have but one use. 1905 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 1906 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 1907 Result, TrueVal, ARMCC2, CCR, Cmp2); 1908 } 1909 return Result; 1910} 1911 1912SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { 1913 SDValue Chain = Op.getOperand(0); 1914 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 1915 SDValue LHS = Op.getOperand(2); 1916 SDValue RHS = Op.getOperand(3); 1917 SDValue Dest = Op.getOperand(4); 1918 DebugLoc dl = Op.getDebugLoc(); 1919 1920 if (LHS.getValueType() == MVT::i32) { 1921 SDValue ARMCC; 1922 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1923 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); 1924 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 1925 Chain, Dest, ARMCC, CCR,Cmp); 1926 } 1927 1928 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 1929 ARMCC::CondCodes CondCode, CondCode2; 1930 FPCCToARMCC(CC, CondCode, CondCode2); 1931 1932 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 1933 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); 1934 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1935 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 1936 SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; 1937 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 1938 if (CondCode2 != ARMCC::AL) { 1939 ARMCC = DAG.getConstant(CondCode2, MVT::i32); 1940 SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; 1941 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 1942 } 1943 return Res; 1944} 1945 1946SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { 1947 SDValue Chain = Op.getOperand(0); 1948 SDValue Table = Op.getOperand(1); 1949 SDValue Index = Op.getOperand(2); 1950 DebugLoc dl = Op.getDebugLoc(); 1951 1952 EVT PTy = getPointerTy(); 1953 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 1954 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 1955 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 1956 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 1957 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 1958 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 1959 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 1960 if (Subtarget->isThumb2()) { 1961 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 1962 // which does another jump to the destination. This also makes it easier 1963 // to translate it to TBB / TBH later. 1964 // FIXME: This might not work if the function is extremely large. 1965 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 1966 Addr, Op.getOperand(2), JTI, UId); 1967 } 1968 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 1969 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 1970 PseudoSourceValue::getJumpTable(), 0, 1971 false, false, 0); 1972 Chain = Addr.getValue(1); 1973 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 1974 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 1975 } else { 1976 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 1977 PseudoSourceValue::getJumpTable(), 0, false, false, 0); 1978 Chain = Addr.getValue(1); 1979 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 1980 } 1981} 1982 1983static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 1984 DebugLoc dl = Op.getDebugLoc(); 1985 unsigned Opc; 1986 1987 switch (Op.getOpcode()) { 1988 default: 1989 assert(0 && "Invalid opcode!"); 1990 case ISD::FP32_TO_FP16: 1991 Opc = ARMISD::F32_TO_F16; 1992 break; 1993 case ISD::FP_TO_SINT: 1994 Opc = ARMISD::FTOSI; 1995 break; 1996 case ISD::FP_TO_UINT: 1997 Opc = ARMISD::FTOUI; 1998 break; 1999 } 2000 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2001 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 2002} 2003 2004static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2005 EVT VT = Op.getValueType(); 2006 DebugLoc dl = Op.getDebugLoc(); 2007 unsigned Opc; 2008 2009 switch (Op.getOpcode()) { 2010 default: 2011 assert(0 && "Invalid opcode!"); 2012 case ISD::FP16_TO_FP32: 2013 Opc = ARMISD::F16_TO_F32; 2014 break; 2015 case ISD::SINT_TO_FP: 2016 Opc = ARMISD::SITOF; 2017 break; 2018 case ISD::UINT_TO_FP: 2019 Opc = ARMISD::UITOF; 2020 break; 2021 } 2022 2023 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 2024 return DAG.getNode(Opc, dl, VT, Op); 2025} 2026 2027static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 2028 // Implement fcopysign with a fabs and a conditional fneg. 2029 SDValue Tmp0 = Op.getOperand(0); 2030 SDValue Tmp1 = Op.getOperand(1); 2031 DebugLoc dl = Op.getDebugLoc(); 2032 EVT VT = Op.getValueType(); 2033 EVT SrcVT = Tmp1.getValueType(); 2034 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 2035 SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); 2036 SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); 2037 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2038 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); 2039} 2040 2041SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 2042 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 2043 MFI->setFrameAddressIsTaken(true); 2044 EVT VT = Op.getValueType(); 2045 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 2046 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2047 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 2048 ? ARM::R7 : ARM::R11; 2049 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 2050 while (Depth--) 2051 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, 2052 false, false, 0); 2053 return FrameAddr; 2054} 2055 2056SDValue 2057ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, 2058 SDValue Chain, 2059 SDValue Dst, SDValue Src, 2060 SDValue Size, unsigned Align, 2061 bool AlwaysInline, 2062 const Value *DstSV, uint64_t DstSVOff, 2063 const Value *SrcSV, uint64_t SrcSVOff){ 2064 // Do repeated 4-byte loads and stores. To be improved. 2065 // This requires 4-byte alignment. 2066 if ((Align & 3) != 0) 2067 return SDValue(); 2068 // This requires the copy size to be a constant, preferrably 2069 // within a subtarget-specific limit. 2070 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 2071 if (!ConstantSize) 2072 return SDValue(); 2073 uint64_t SizeVal = ConstantSize->getZExtValue(); 2074 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 2075 return SDValue(); 2076 2077 unsigned BytesLeft = SizeVal & 3; 2078 unsigned NumMemOps = SizeVal >> 2; 2079 unsigned EmittedNumMemOps = 0; 2080 EVT VT = MVT::i32; 2081 unsigned VTSize = 4; 2082 unsigned i = 0; 2083 const unsigned MAX_LOADS_IN_LDM = 6; 2084 SDValue TFOps[MAX_LOADS_IN_LDM]; 2085 SDValue Loads[MAX_LOADS_IN_LDM]; 2086 uint64_t SrcOff = 0, DstOff = 0; 2087 2088 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the 2089 // same number of stores. The loads and stores will get combined into 2090 // ldm/stm later on. 2091 while (EmittedNumMemOps < NumMemOps) { 2092 for (i = 0; 2093 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 2094 Loads[i] = DAG.getLoad(VT, dl, Chain, 2095 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 2096 DAG.getConstant(SrcOff, MVT::i32)), 2097 SrcSV, SrcSVOff + SrcOff, false, false, 0); 2098 TFOps[i] = Loads[i].getValue(1); 2099 SrcOff += VTSize; 2100 } 2101 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2102 2103 for (i = 0; 2104 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 2105 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 2106 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 2107 DAG.getConstant(DstOff, MVT::i32)), 2108 DstSV, DstSVOff + DstOff, false, false, 0); 2109 DstOff += VTSize; 2110 } 2111 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2112 2113 EmittedNumMemOps += i; 2114 } 2115 2116 if (BytesLeft == 0) 2117 return Chain; 2118 2119 // Issue loads / stores for the trailing (1 - 3) bytes. 2120 unsigned BytesLeftSave = BytesLeft; 2121 i = 0; 2122 while (BytesLeft) { 2123 if (BytesLeft >= 2) { 2124 VT = MVT::i16; 2125 VTSize = 2; 2126 } else { 2127 VT = MVT::i8; 2128 VTSize = 1; 2129 } 2130 2131 Loads[i] = DAG.getLoad(VT, dl, Chain, 2132 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 2133 DAG.getConstant(SrcOff, MVT::i32)), 2134 SrcSV, SrcSVOff + SrcOff, false, false, 0); 2135 TFOps[i] = Loads[i].getValue(1); 2136 ++i; 2137 SrcOff += VTSize; 2138 BytesLeft -= VTSize; 2139 } 2140 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2141 2142 i = 0; 2143 BytesLeft = BytesLeftSave; 2144 while (BytesLeft) { 2145 if (BytesLeft >= 2) { 2146 VT = MVT::i16; 2147 VTSize = 2; 2148 } else { 2149 VT = MVT::i8; 2150 VTSize = 1; 2151 } 2152 2153 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 2154 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 2155 DAG.getConstant(DstOff, MVT::i32)), 2156 DstSV, DstSVOff + DstOff, false, false, 0); 2157 ++i; 2158 DstOff += VTSize; 2159 BytesLeft -= VTSize; 2160 } 2161 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2162} 2163 2164static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2165 SDValue Op = N->getOperand(0); 2166 DebugLoc dl = N->getDebugLoc(); 2167 if (N->getValueType(0) == MVT::f64) { 2168 // Turn i64->f64 into VMOVDRR. 2169 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2170 DAG.getConstant(0, MVT::i32)); 2171 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2172 DAG.getConstant(1, MVT::i32)); 2173 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 2174 } 2175 2176 // Turn f64->i64 into VMOVRRD. 2177 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2178 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2179 2180 // Merge the pieces into a single i64 value. 2181 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2182} 2183 2184/// getZeroVector - Returns a vector of specified type with all zero elements. 2185/// 2186static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2187 assert(VT.isVector() && "Expected a vector type"); 2188 2189 // Zero vectors are used to represent vector negation and in those cases 2190 // will be implemented with the NEON VNEG instruction. However, VNEG does 2191 // not support i64 elements, so sometimes the zero vectors will need to be 2192 // explicitly constructed. For those cases, and potentially other uses in 2193 // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted 2194 // to their dest type. This ensures they get CSE'd. 2195 SDValue Vec; 2196 SDValue Cst = DAG.getTargetConstant(0, MVT::i8); 2197 SmallVector<SDValue, 8> Ops; 2198 MVT TVT; 2199 2200 if (VT.getSizeInBits() == 64) { 2201 Ops.assign(8, Cst); TVT = MVT::v8i8; 2202 } else { 2203 Ops.assign(16, Cst); TVT = MVT::v16i8; 2204 } 2205 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); 2206 2207 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 2208} 2209 2210/// getOnesVector - Returns a vector of specified type with all bits set. 2211/// 2212static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2213 assert(VT.isVector() && "Expected a vector type"); 2214 2215 // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their 2216 // dest type. This ensures they get CSE'd. 2217 SDValue Vec; 2218 SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); 2219 SmallVector<SDValue, 8> Ops; 2220 MVT TVT; 2221 2222 if (VT.getSizeInBits() == 64) { 2223 Ops.assign(8, Cst); TVT = MVT::v8i8; 2224 } else { 2225 Ops.assign(16, Cst); TVT = MVT::v16i8; 2226 } 2227 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); 2228 2229 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 2230} 2231 2232/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2233/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2234SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { 2235 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2236 EVT VT = Op.getValueType(); 2237 unsigned VTBits = VT.getSizeInBits(); 2238 DebugLoc dl = Op.getDebugLoc(); 2239 SDValue ShOpLo = Op.getOperand(0); 2240 SDValue ShOpHi = Op.getOperand(1); 2241 SDValue ShAmt = Op.getOperand(2); 2242 SDValue ARMCC; 2243 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2244 2245 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2246 2247 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2248 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2249 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2250 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2251 DAG.getConstant(VTBits, MVT::i32)); 2252 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2253 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2254 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2255 2256 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2257 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2258 ARMCC, DAG, dl); 2259 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2260 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, 2261 CCR, Cmp); 2262 2263 SDValue Ops[2] = { Lo, Hi }; 2264 return DAG.getMergeValues(Ops, 2, dl); 2265} 2266 2267/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2268/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2269SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { 2270 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2271 EVT VT = Op.getValueType(); 2272 unsigned VTBits = VT.getSizeInBits(); 2273 DebugLoc dl = Op.getDebugLoc(); 2274 SDValue ShOpLo = Op.getOperand(0); 2275 SDValue ShOpHi = Op.getOperand(1); 2276 SDValue ShAmt = Op.getOperand(2); 2277 SDValue ARMCC; 2278 2279 assert(Op.getOpcode() == ISD::SHL_PARTS); 2280 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2281 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2282 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2283 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2284 DAG.getConstant(VTBits, MVT::i32)); 2285 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2286 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2287 2288 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2289 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2290 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2291 ARMCC, DAG, dl); 2292 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2293 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, 2294 CCR, Cmp); 2295 2296 SDValue Ops[2] = { Lo, Hi }; 2297 return DAG.getMergeValues(Ops, 2, dl); 2298} 2299 2300static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 2301 const ARMSubtarget *ST) { 2302 EVT VT = N->getValueType(0); 2303 DebugLoc dl = N->getDebugLoc(); 2304 2305 if (!ST->hasV6T2Ops()) 2306 return SDValue(); 2307 2308 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 2309 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 2310} 2311 2312static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2313 const ARMSubtarget *ST) { 2314 EVT VT = N->getValueType(0); 2315 DebugLoc dl = N->getDebugLoc(); 2316 2317 // Lower vector shifts on NEON to use VSHL. 2318 if (VT.isVector()) { 2319 assert(ST->hasNEON() && "unexpected vector shift"); 2320 2321 // Left shifts translate directly to the vshiftu intrinsic. 2322 if (N->getOpcode() == ISD::SHL) 2323 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2324 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2325 N->getOperand(0), N->getOperand(1)); 2326 2327 assert((N->getOpcode() == ISD::SRA || 2328 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2329 2330 // NEON uses the same intrinsics for both left and right shifts. For 2331 // right shifts, the shift amounts are negative, so negate the vector of 2332 // shift amounts. 2333 EVT ShiftVT = N->getOperand(1).getValueType(); 2334 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2335 getZeroVector(ShiftVT, DAG, dl), 2336 N->getOperand(1)); 2337 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2338 Intrinsic::arm_neon_vshifts : 2339 Intrinsic::arm_neon_vshiftu); 2340 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2341 DAG.getConstant(vshiftInt, MVT::i32), 2342 N->getOperand(0), NegatedCount); 2343 } 2344 2345 // We can get here for a node like i32 = ISD::SHL i32, i64 2346 if (VT != MVT::i64) 2347 return SDValue(); 2348 2349 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2350 "Unknown shift to lower!"); 2351 2352 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2353 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2354 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2355 return SDValue(); 2356 2357 // If we are in thumb mode, we don't have RRX. 2358 if (ST->isThumb1Only()) return SDValue(); 2359 2360 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2361 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2362 DAG.getConstant(0, MVT::i32)); 2363 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2364 DAG.getConstant(1, MVT::i32)); 2365 2366 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2367 // captures the result into a carry flag. 2368 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2369 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2370 2371 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2372 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2373 2374 // Merge the pieces into a single i64 value. 2375 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2376} 2377 2378static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2379 SDValue TmpOp0, TmpOp1; 2380 bool Invert = false; 2381 bool Swap = false; 2382 unsigned Opc = 0; 2383 2384 SDValue Op0 = Op.getOperand(0); 2385 SDValue Op1 = Op.getOperand(1); 2386 SDValue CC = Op.getOperand(2); 2387 EVT VT = Op.getValueType(); 2388 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2389 DebugLoc dl = Op.getDebugLoc(); 2390 2391 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2392 switch (SetCCOpcode) { 2393 default: llvm_unreachable("Illegal FP comparison"); break; 2394 case ISD::SETUNE: 2395 case ISD::SETNE: Invert = true; // Fallthrough 2396 case ISD::SETOEQ: 2397 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2398 case ISD::SETOLT: 2399 case ISD::SETLT: Swap = true; // Fallthrough 2400 case ISD::SETOGT: 2401 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2402 case ISD::SETOLE: 2403 case ISD::SETLE: Swap = true; // Fallthrough 2404 case ISD::SETOGE: 2405 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2406 case ISD::SETUGE: Swap = true; // Fallthrough 2407 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2408 case ISD::SETUGT: Swap = true; // Fallthrough 2409 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2410 case ISD::SETUEQ: Invert = true; // Fallthrough 2411 case ISD::SETONE: 2412 // Expand this to (OLT | OGT). 2413 TmpOp0 = Op0; 2414 TmpOp1 = Op1; 2415 Opc = ISD::OR; 2416 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2417 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2418 break; 2419 case ISD::SETUO: Invert = true; // Fallthrough 2420 case ISD::SETO: 2421 // Expand this to (OLT | OGE). 2422 TmpOp0 = Op0; 2423 TmpOp1 = Op1; 2424 Opc = ISD::OR; 2425 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2426 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2427 break; 2428 } 2429 } else { 2430 // Integer comparisons. 2431 switch (SetCCOpcode) { 2432 default: llvm_unreachable("Illegal integer comparison"); break; 2433 case ISD::SETNE: Invert = true; 2434 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2435 case ISD::SETLT: Swap = true; 2436 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2437 case ISD::SETLE: Swap = true; 2438 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2439 case ISD::SETULT: Swap = true; 2440 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2441 case ISD::SETULE: Swap = true; 2442 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2443 } 2444 2445 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2446 if (Opc == ARMISD::VCEQ) { 2447 2448 SDValue AndOp; 2449 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2450 AndOp = Op0; 2451 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 2452 AndOp = Op1; 2453 2454 // Ignore bitconvert. 2455 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 2456 AndOp = AndOp.getOperand(0); 2457 2458 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 2459 Opc = ARMISD::VTST; 2460 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 2461 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 2462 Invert = !Invert; 2463 } 2464 } 2465 } 2466 2467 if (Swap) 2468 std::swap(Op0, Op1); 2469 2470 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 2471 2472 if (Invert) 2473 Result = DAG.getNOT(dl, Result, VT); 2474 2475 return Result; 2476} 2477 2478/// isVMOVSplat - Check if the specified splat value corresponds to an immediate 2479/// VMOV instruction, and if so, return the constant being splatted. 2480static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, 2481 unsigned SplatBitSize, SelectionDAG &DAG) { 2482 switch (SplatBitSize) { 2483 case 8: 2484 // Any 1-byte value is OK. 2485 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 2486 return DAG.getTargetConstant(SplatBits, MVT::i8); 2487 2488 case 16: 2489 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 2490 if ((SplatBits & ~0xff) == 0 || 2491 (SplatBits & ~0xff00) == 0) 2492 return DAG.getTargetConstant(SplatBits, MVT::i16); 2493 break; 2494 2495 case 32: 2496 // NEON's 32-bit VMOV supports splat values where: 2497 // * only one byte is nonzero, or 2498 // * the least significant byte is 0xff and the second byte is nonzero, or 2499 // * the least significant 2 bytes are 0xff and the third is nonzero. 2500 if ((SplatBits & ~0xff) == 0 || 2501 (SplatBits & ~0xff00) == 0 || 2502 (SplatBits & ~0xff0000) == 0 || 2503 (SplatBits & ~0xff000000) == 0) 2504 return DAG.getTargetConstant(SplatBits, MVT::i32); 2505 2506 if ((SplatBits & ~0xffff) == 0 && 2507 ((SplatBits | SplatUndef) & 0xff) == 0xff) 2508 return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); 2509 2510 if ((SplatBits & ~0xffffff) == 0 && 2511 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) 2512 return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); 2513 2514 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 2515 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 2516 // VMOV.I32. A (very) minor optimization would be to replicate the value 2517 // and fall through here to test for a valid 64-bit splat. But, then the 2518 // caller would also need to check and handle the change in size. 2519 break; 2520 2521 case 64: { 2522 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 2523 uint64_t BitMask = 0xff; 2524 uint64_t Val = 0; 2525 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 2526 if (((SplatBits | SplatUndef) & BitMask) == BitMask) 2527 Val |= BitMask; 2528 else if ((SplatBits & BitMask) != 0) 2529 return SDValue(); 2530 BitMask <<= 8; 2531 } 2532 return DAG.getTargetConstant(Val, MVT::i64); 2533 } 2534 2535 default: 2536 llvm_unreachable("unexpected size for isVMOVSplat"); 2537 break; 2538 } 2539 2540 return SDValue(); 2541} 2542 2543/// getVMOVImm - If this is a build_vector of constants which can be 2544/// formed by using a VMOV instruction of the specified element size, 2545/// return the constant being splatted. The ByteSize field indicates the 2546/// number of bytes of each element [1248]. 2547SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 2548 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 2549 APInt SplatBits, SplatUndef; 2550 unsigned SplatBitSize; 2551 bool HasAnyUndefs; 2552 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 2553 HasAnyUndefs, ByteSize * 8)) 2554 return SDValue(); 2555 2556 if (SplatBitSize > ByteSize * 8) 2557 return SDValue(); 2558 2559 return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), 2560 SplatBitSize, DAG); 2561} 2562 2563static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 2564 bool &ReverseVEXT, unsigned &Imm) { 2565 unsigned NumElts = VT.getVectorNumElements(); 2566 ReverseVEXT = false; 2567 Imm = M[0]; 2568 2569 // If this is a VEXT shuffle, the immediate value is the index of the first 2570 // element. The other shuffle indices must be the successive elements after 2571 // the first one. 2572 unsigned ExpectedElt = Imm; 2573 for (unsigned i = 1; i < NumElts; ++i) { 2574 // Increment the expected index. If it wraps around, it may still be 2575 // a VEXT but the source vectors must be swapped. 2576 ExpectedElt += 1; 2577 if (ExpectedElt == NumElts * 2) { 2578 ExpectedElt = 0; 2579 ReverseVEXT = true; 2580 } 2581 2582 if (ExpectedElt != static_cast<unsigned>(M[i])) 2583 return false; 2584 } 2585 2586 // Adjust the index value if the source operands will be swapped. 2587 if (ReverseVEXT) 2588 Imm -= NumElts; 2589 2590 return true; 2591} 2592 2593/// isVREVMask - Check if a vector shuffle corresponds to a VREV 2594/// instruction with the specified blocksize. (The order of the elements 2595/// within each block of the vector is reversed.) 2596static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 2597 unsigned BlockSize) { 2598 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 2599 "Only possible block sizes for VREV are: 16, 32, 64"); 2600 2601 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2602 if (EltSz == 64) 2603 return false; 2604 2605 unsigned NumElts = VT.getVectorNumElements(); 2606 unsigned BlockElts = M[0] + 1; 2607 2608 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 2609 return false; 2610 2611 for (unsigned i = 0; i < NumElts; ++i) { 2612 if ((unsigned) M[i] != 2613 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 2614 return false; 2615 } 2616 2617 return true; 2618} 2619 2620static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 2621 unsigned &WhichResult) { 2622 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2623 if (EltSz == 64) 2624 return false; 2625 2626 unsigned NumElts = VT.getVectorNumElements(); 2627 WhichResult = (M[0] == 0 ? 0 : 1); 2628 for (unsigned i = 0; i < NumElts; i += 2) { 2629 if ((unsigned) M[i] != i + WhichResult || 2630 (unsigned) M[i+1] != i + NumElts + WhichResult) 2631 return false; 2632 } 2633 return true; 2634} 2635 2636/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 2637/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2638/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 2639static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2640 unsigned &WhichResult) { 2641 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2642 if (EltSz == 64) 2643 return false; 2644 2645 unsigned NumElts = VT.getVectorNumElements(); 2646 WhichResult = (M[0] == 0 ? 0 : 1); 2647 for (unsigned i = 0; i < NumElts; i += 2) { 2648 if ((unsigned) M[i] != i + WhichResult || 2649 (unsigned) M[i+1] != i + WhichResult) 2650 return false; 2651 } 2652 return true; 2653} 2654 2655static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 2656 unsigned &WhichResult) { 2657 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2658 if (EltSz == 64) 2659 return false; 2660 2661 unsigned NumElts = VT.getVectorNumElements(); 2662 WhichResult = (M[0] == 0 ? 0 : 1); 2663 for (unsigned i = 0; i != NumElts; ++i) { 2664 if ((unsigned) M[i] != 2 * i + WhichResult) 2665 return false; 2666 } 2667 2668 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2669 if (VT.is64BitVector() && EltSz == 32) 2670 return false; 2671 2672 return true; 2673} 2674 2675/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 2676/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2677/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 2678static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2679 unsigned &WhichResult) { 2680 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2681 if (EltSz == 64) 2682 return false; 2683 2684 unsigned Half = VT.getVectorNumElements() / 2; 2685 WhichResult = (M[0] == 0 ? 0 : 1); 2686 for (unsigned j = 0; j != 2; ++j) { 2687 unsigned Idx = WhichResult; 2688 for (unsigned i = 0; i != Half; ++i) { 2689 if ((unsigned) M[i + j * Half] != Idx) 2690 return false; 2691 Idx += 2; 2692 } 2693 } 2694 2695 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2696 if (VT.is64BitVector() && EltSz == 32) 2697 return false; 2698 2699 return true; 2700} 2701 2702static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 2703 unsigned &WhichResult) { 2704 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2705 if (EltSz == 64) 2706 return false; 2707 2708 unsigned NumElts = VT.getVectorNumElements(); 2709 WhichResult = (M[0] == 0 ? 0 : 1); 2710 unsigned Idx = WhichResult * NumElts / 2; 2711 for (unsigned i = 0; i != NumElts; i += 2) { 2712 if ((unsigned) M[i] != Idx || 2713 (unsigned) M[i+1] != Idx + NumElts) 2714 return false; 2715 Idx += 1; 2716 } 2717 2718 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2719 if (VT.is64BitVector() && EltSz == 32) 2720 return false; 2721 2722 return true; 2723} 2724 2725/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 2726/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2727/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 2728static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2729 unsigned &WhichResult) { 2730 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2731 if (EltSz == 64) 2732 return false; 2733 2734 unsigned NumElts = VT.getVectorNumElements(); 2735 WhichResult = (M[0] == 0 ? 0 : 1); 2736 unsigned Idx = WhichResult * NumElts / 2; 2737 for (unsigned i = 0; i != NumElts; i += 2) { 2738 if ((unsigned) M[i] != Idx || 2739 (unsigned) M[i+1] != Idx) 2740 return false; 2741 Idx += 1; 2742 } 2743 2744 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2745 if (VT.is64BitVector() && EltSz == 32) 2746 return false; 2747 2748 return true; 2749} 2750 2751 2752static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2753 // Canonicalize all-zeros and all-ones vectors. 2754 ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); 2755 if (ConstVal->isNullValue()) 2756 return getZeroVector(VT, DAG, dl); 2757 if (ConstVal->isAllOnesValue()) 2758 return getOnesVector(VT, DAG, dl); 2759 2760 EVT CanonicalVT; 2761 if (VT.is64BitVector()) { 2762 switch (Val.getValueType().getSizeInBits()) { 2763 case 8: CanonicalVT = MVT::v8i8; break; 2764 case 16: CanonicalVT = MVT::v4i16; break; 2765 case 32: CanonicalVT = MVT::v2i32; break; 2766 case 64: CanonicalVT = MVT::v1i64; break; 2767 default: llvm_unreachable("unexpected splat element type"); break; 2768 } 2769 } else { 2770 assert(VT.is128BitVector() && "unknown splat vector size"); 2771 switch (Val.getValueType().getSizeInBits()) { 2772 case 8: CanonicalVT = MVT::v16i8; break; 2773 case 16: CanonicalVT = MVT::v8i16; break; 2774 case 32: CanonicalVT = MVT::v4i32; break; 2775 case 64: CanonicalVT = MVT::v2i64; break; 2776 default: llvm_unreachable("unexpected splat element type"); break; 2777 } 2778 } 2779 2780 // Build a canonical splat for this value. 2781 SmallVector<SDValue, 8> Ops; 2782 Ops.assign(CanonicalVT.getVectorNumElements(), Val); 2783 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], 2784 Ops.size()); 2785 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); 2786} 2787 2788// If this is a case we can't handle, return null and let the default 2789// expansion code take care of it. 2790static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 2791 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 2792 DebugLoc dl = Op.getDebugLoc(); 2793 EVT VT = Op.getValueType(); 2794 2795 APInt SplatBits, SplatUndef; 2796 unsigned SplatBitSize; 2797 bool HasAnyUndefs; 2798 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 2799 if (SplatBitSize <= 64) { 2800 SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), 2801 SplatUndef.getZExtValue(), SplatBitSize, DAG); 2802 if (Val.getNode()) 2803 return BuildSplat(Val, VT, DAG, dl); 2804 } 2805 } 2806 2807 // If there are only 2 elements in a 128-bit vector, insert them into an 2808 // undef vector. This handles the common case for 128-bit vector argument 2809 // passing, where the insertions should be translated to subreg accesses 2810 // with no real instructions. 2811 if (VT.is128BitVector() && Op.getNumOperands() == 2) { 2812 SDValue Val = DAG.getUNDEF(VT); 2813 SDValue Op0 = Op.getOperand(0); 2814 SDValue Op1 = Op.getOperand(1); 2815 if (Op0.getOpcode() != ISD::UNDEF) 2816 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, 2817 DAG.getIntPtrConstant(0)); 2818 if (Op1.getOpcode() != ISD::UNDEF) 2819 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, 2820 DAG.getIntPtrConstant(1)); 2821 return Val; 2822 } 2823 2824 return SDValue(); 2825} 2826 2827/// isShuffleMaskLegal - Targets can use this to indicate that they only 2828/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 2829/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 2830/// are assumed to be legal. 2831bool 2832ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 2833 EVT VT) const { 2834 if (VT.getVectorNumElements() == 4 && 2835 (VT.is128BitVector() || VT.is64BitVector())) { 2836 unsigned PFIndexes[4]; 2837 for (unsigned i = 0; i != 4; ++i) { 2838 if (M[i] < 0) 2839 PFIndexes[i] = 8; 2840 else 2841 PFIndexes[i] = M[i]; 2842 } 2843 2844 // Compute the index in the perfect shuffle table. 2845 unsigned PFTableIndex = 2846 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 2847 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 2848 unsigned Cost = (PFEntry >> 30); 2849 2850 if (Cost <= 4) 2851 return true; 2852 } 2853 2854 bool ReverseVEXT; 2855 unsigned Imm, WhichResult; 2856 2857 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 2858 isVREVMask(M, VT, 64) || 2859 isVREVMask(M, VT, 32) || 2860 isVREVMask(M, VT, 16) || 2861 isVEXTMask(M, VT, ReverseVEXT, Imm) || 2862 isVTRNMask(M, VT, WhichResult) || 2863 isVUZPMask(M, VT, WhichResult) || 2864 isVZIPMask(M, VT, WhichResult) || 2865 isVTRN_v_undef_Mask(M, VT, WhichResult) || 2866 isVUZP_v_undef_Mask(M, VT, WhichResult) || 2867 isVZIP_v_undef_Mask(M, VT, WhichResult)); 2868} 2869 2870/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 2871/// the specified operations to build the shuffle. 2872static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 2873 SDValue RHS, SelectionDAG &DAG, 2874 DebugLoc dl) { 2875 unsigned OpNum = (PFEntry >> 26) & 0x0F; 2876 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 2877 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 2878 2879 enum { 2880 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 2881 OP_VREV, 2882 OP_VDUP0, 2883 OP_VDUP1, 2884 OP_VDUP2, 2885 OP_VDUP3, 2886 OP_VEXT1, 2887 OP_VEXT2, 2888 OP_VEXT3, 2889 OP_VUZPL, // VUZP, left result 2890 OP_VUZPR, // VUZP, right result 2891 OP_VZIPL, // VZIP, left result 2892 OP_VZIPR, // VZIP, right result 2893 OP_VTRNL, // VTRN, left result 2894 OP_VTRNR // VTRN, right result 2895 }; 2896 2897 if (OpNum == OP_COPY) { 2898 if (LHSID == (1*9+2)*9+3) return LHS; 2899 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 2900 return RHS; 2901 } 2902 2903 SDValue OpLHS, OpRHS; 2904 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 2905 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 2906 EVT VT = OpLHS.getValueType(); 2907 2908 switch (OpNum) { 2909 default: llvm_unreachable("Unknown shuffle opcode!"); 2910 case OP_VREV: 2911 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 2912 case OP_VDUP0: 2913 case OP_VDUP1: 2914 case OP_VDUP2: 2915 case OP_VDUP3: 2916 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 2917 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 2918 case OP_VEXT1: 2919 case OP_VEXT2: 2920 case OP_VEXT3: 2921 return DAG.getNode(ARMISD::VEXT, dl, VT, 2922 OpLHS, OpRHS, 2923 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 2924 case OP_VUZPL: 2925 case OP_VUZPR: 2926 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 2927 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 2928 case OP_VZIPL: 2929 case OP_VZIPR: 2930 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 2931 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 2932 case OP_VTRNL: 2933 case OP_VTRNR: 2934 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 2935 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 2936 } 2937} 2938 2939static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 2940 SDValue V1 = Op.getOperand(0); 2941 SDValue V2 = Op.getOperand(1); 2942 DebugLoc dl = Op.getDebugLoc(); 2943 EVT VT = Op.getValueType(); 2944 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 2945 SmallVector<int, 8> ShuffleMask; 2946 2947 // Convert shuffles that are directly supported on NEON to target-specific 2948 // DAG nodes, instead of keeping them as shuffles and matching them again 2949 // during code selection. This is more efficient and avoids the possibility 2950 // of inconsistencies between legalization and selection. 2951 // FIXME: floating-point vectors should be canonicalized to integer vectors 2952 // of the same time so that they get CSEd properly. 2953 SVN->getMask(ShuffleMask); 2954 2955 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 2956 int Lane = SVN->getSplatIndex(); 2957 // If this is undef splat, generate it via "just" vdup, if possible. 2958 if (Lane == -1) Lane = 0; 2959 2960 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 2961 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 2962 } 2963 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 2964 DAG.getConstant(Lane, MVT::i32)); 2965 } 2966 2967 bool ReverseVEXT; 2968 unsigned Imm; 2969 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 2970 if (ReverseVEXT) 2971 std::swap(V1, V2); 2972 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 2973 DAG.getConstant(Imm, MVT::i32)); 2974 } 2975 2976 if (isVREVMask(ShuffleMask, VT, 64)) 2977 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 2978 if (isVREVMask(ShuffleMask, VT, 32)) 2979 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 2980 if (isVREVMask(ShuffleMask, VT, 16)) 2981 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 2982 2983 // Check for Neon shuffles that modify both input vectors in place. 2984 // If both results are used, i.e., if there are two shuffles with the same 2985 // source operands and with masks corresponding to both results of one of 2986 // these operations, DAG memoization will ensure that a single node is 2987 // used for both shuffles. 2988 unsigned WhichResult; 2989 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 2990 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 2991 V1, V2).getValue(WhichResult); 2992 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 2993 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 2994 V1, V2).getValue(WhichResult); 2995 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 2996 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 2997 V1, V2).getValue(WhichResult); 2998 2999 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3000 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3001 V1, V1).getValue(WhichResult); 3002 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3003 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3004 V1, V1).getValue(WhichResult); 3005 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3006 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3007 V1, V1).getValue(WhichResult); 3008 3009 // If the shuffle is not directly supported and it has 4 elements, use 3010 // the PerfectShuffle-generated table to synthesize it from other shuffles. 3011 if (VT.getVectorNumElements() == 4 && 3012 (VT.is128BitVector() || VT.is64BitVector())) { 3013 unsigned PFIndexes[4]; 3014 for (unsigned i = 0; i != 4; ++i) { 3015 if (ShuffleMask[i] < 0) 3016 PFIndexes[i] = 8; 3017 else 3018 PFIndexes[i] = ShuffleMask[i]; 3019 } 3020 3021 // Compute the index in the perfect shuffle table. 3022 unsigned PFTableIndex = 3023 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3024 3025 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3026 unsigned Cost = (PFEntry >> 30); 3027 3028 if (Cost <= 4) 3029 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 3030 } 3031 3032 return SDValue(); 3033} 3034 3035static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 3036 EVT VT = Op.getValueType(); 3037 DebugLoc dl = Op.getDebugLoc(); 3038 SDValue Vec = Op.getOperand(0); 3039 SDValue Lane = Op.getOperand(1); 3040 assert(VT == MVT::i32 && 3041 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 3042 "unexpected type for custom-lowering vector extract"); 3043 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 3044} 3045 3046static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3047 // The only time a CONCAT_VECTORS operation can have legal types is when 3048 // two 64-bit vectors are concatenated to a 128-bit vector. 3049 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 3050 "unexpected CONCAT_VECTORS"); 3051 DebugLoc dl = Op.getDebugLoc(); 3052 SDValue Val = DAG.getUNDEF(MVT::v2f64); 3053 SDValue Op0 = Op.getOperand(0); 3054 SDValue Op1 = Op.getOperand(1); 3055 if (Op0.getOpcode() != ISD::UNDEF) 3056 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3057 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 3058 DAG.getIntPtrConstant(0)); 3059 if (Op1.getOpcode() != ISD::UNDEF) 3060 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3061 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 3062 DAG.getIntPtrConstant(1)); 3063 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 3064} 3065 3066SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 3067 switch (Op.getOpcode()) { 3068 default: llvm_unreachable("Don't know how to custom lower this!"); 3069 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3070 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 3071 case ISD::GlobalAddress: 3072 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 3073 LowerGlobalAddressELF(Op, DAG); 3074 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3075 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3076 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 3077 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 3078 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 3079 case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); 3080 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 3081 case ISD::FP16_TO_FP32: 3082 case ISD::SINT_TO_FP: 3083 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 3084 case ISD::FP32_TO_FP16: 3085 case ISD::FP_TO_SINT: 3086 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 3087 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 3088 case ISD::RETURNADDR: break; 3089 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3090 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 3091 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 3092 Subtarget); 3093 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 3094 case ISD::SHL: 3095 case ISD::SRL: 3096 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 3097 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 3098 case ISD::SRL_PARTS: 3099 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 3100 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 3101 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 3102 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3103 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3104 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3105 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 3106 } 3107 return SDValue(); 3108} 3109 3110/// ReplaceNodeResults - Replace the results of node with an illegal result 3111/// type with new values built out of custom code. 3112void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3113 SmallVectorImpl<SDValue>&Results, 3114 SelectionDAG &DAG) { 3115 switch (N->getOpcode()) { 3116 default: 3117 llvm_unreachable("Don't know how to custom expand this!"); 3118 return; 3119 case ISD::BIT_CONVERT: 3120 Results.push_back(ExpandBIT_CONVERT(N, DAG)); 3121 return; 3122 case ISD::SRL: 3123 case ISD::SRA: { 3124 SDValue Res = LowerShift(N, DAG, Subtarget); 3125 if (Res.getNode()) 3126 Results.push_back(Res); 3127 return; 3128 } 3129 } 3130} 3131 3132//===----------------------------------------------------------------------===// 3133// ARM Scheduler Hooks 3134//===----------------------------------------------------------------------===// 3135 3136MachineBasicBlock * 3137ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 3138 MachineBasicBlock *BB, 3139 unsigned Size) const { 3140 unsigned dest = MI->getOperand(0).getReg(); 3141 unsigned ptr = MI->getOperand(1).getReg(); 3142 unsigned oldval = MI->getOperand(2).getReg(); 3143 unsigned newval = MI->getOperand(3).getReg(); 3144 unsigned scratch = BB->getParent()->getRegInfo() 3145 .createVirtualRegister(ARM::GPRRegisterClass); 3146 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3147 DebugLoc dl = MI->getDebugLoc(); 3148 bool isThumb2 = Subtarget->isThumb2(); 3149 3150 unsigned ldrOpc, strOpc; 3151 switch (Size) { 3152 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3153 case 1: 3154 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3155 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; 3156 break; 3157 case 2: 3158 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3159 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3160 break; 3161 case 4: 3162 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3163 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3164 break; 3165 } 3166 3167 MachineFunction *MF = BB->getParent(); 3168 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3169 MachineFunction::iterator It = BB; 3170 ++It; // insert the new blocks after the current block 3171 3172 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3173 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3174 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3175 MF->insert(It, loop1MBB); 3176 MF->insert(It, loop2MBB); 3177 MF->insert(It, exitMBB); 3178 exitMBB->transferSuccessors(BB); 3179 3180 // thisMBB: 3181 // ... 3182 // fallthrough --> loop1MBB 3183 BB->addSuccessor(loop1MBB); 3184 3185 // loop1MBB: 3186 // ldrex dest, [ptr] 3187 // cmp dest, oldval 3188 // bne exitMBB 3189 BB = loop1MBB; 3190 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3191 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 3192 .addReg(dest).addReg(oldval)); 3193 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3194 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3195 BB->addSuccessor(loop2MBB); 3196 BB->addSuccessor(exitMBB); 3197 3198 // loop2MBB: 3199 // strex scratch, newval, [ptr] 3200 // cmp scratch, #0 3201 // bne loop1MBB 3202 BB = loop2MBB; 3203 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 3204 .addReg(ptr)); 3205 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3206 .addReg(scratch).addImm(0)); 3207 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3208 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3209 BB->addSuccessor(loop1MBB); 3210 BB->addSuccessor(exitMBB); 3211 3212 // exitMBB: 3213 // ... 3214 BB = exitMBB; 3215 3216 MF->DeleteMachineInstr(MI); // The instruction is gone now. 3217 3218 return BB; 3219} 3220 3221MachineBasicBlock * 3222ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3223 unsigned Size, unsigned BinOpcode) const { 3224 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3225 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3226 3227 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3228 MachineFunction *MF = BB->getParent(); 3229 MachineFunction::iterator It = BB; 3230 ++It; 3231 3232 unsigned dest = MI->getOperand(0).getReg(); 3233 unsigned ptr = MI->getOperand(1).getReg(); 3234 unsigned incr = MI->getOperand(2).getReg(); 3235 DebugLoc dl = MI->getDebugLoc(); 3236 3237 bool isThumb2 = Subtarget->isThumb2(); 3238 unsigned ldrOpc, strOpc; 3239 switch (Size) { 3240 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3241 case 1: 3242 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3243 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 3244 break; 3245 case 2: 3246 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3247 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3248 break; 3249 case 4: 3250 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3251 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3252 break; 3253 } 3254 3255 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3256 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3257 MF->insert(It, loopMBB); 3258 MF->insert(It, exitMBB); 3259 exitMBB->transferSuccessors(BB); 3260 3261 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3262 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3263 unsigned scratch2 = (!BinOpcode) ? incr : 3264 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3265 3266 // thisMBB: 3267 // ... 3268 // fallthrough --> loopMBB 3269 BB->addSuccessor(loopMBB); 3270 3271 // loopMBB: 3272 // ldrex dest, ptr 3273 // <binop> scratch2, dest, incr 3274 // strex scratch, scratch2, ptr 3275 // cmp scratch, #0 3276 // bne- loopMBB 3277 // fallthrough --> exitMBB 3278 BB = loopMBB; 3279 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3280 if (BinOpcode) { 3281 // operand order needs to go the other way for NAND 3282 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 3283 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3284 addReg(incr).addReg(dest)).addReg(0); 3285 else 3286 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3287 addReg(dest).addReg(incr)).addReg(0); 3288 } 3289 3290 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 3291 .addReg(ptr)); 3292 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3293 .addReg(scratch).addImm(0)); 3294 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3295 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3296 3297 BB->addSuccessor(loopMBB); 3298 BB->addSuccessor(exitMBB); 3299 3300 // exitMBB: 3301 // ... 3302 BB = exitMBB; 3303 3304 MF->DeleteMachineInstr(MI); // The instruction is gone now. 3305 3306 return BB; 3307} 3308 3309MachineBasicBlock * 3310ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3311 MachineBasicBlock *BB, 3312 DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { 3313 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3314 DebugLoc dl = MI->getDebugLoc(); 3315 bool isThumb2 = Subtarget->isThumb2(); 3316 switch (MI->getOpcode()) { 3317 default: 3318 MI->dump(); 3319 llvm_unreachable("Unexpected instr type to insert"); 3320 3321 case ARM::ATOMIC_LOAD_ADD_I8: 3322 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3323 case ARM::ATOMIC_LOAD_ADD_I16: 3324 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3325 case ARM::ATOMIC_LOAD_ADD_I32: 3326 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3327 3328 case ARM::ATOMIC_LOAD_AND_I8: 3329 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3330 case ARM::ATOMIC_LOAD_AND_I16: 3331 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3332 case ARM::ATOMIC_LOAD_AND_I32: 3333 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3334 3335 case ARM::ATOMIC_LOAD_OR_I8: 3336 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3337 case ARM::ATOMIC_LOAD_OR_I16: 3338 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3339 case ARM::ATOMIC_LOAD_OR_I32: 3340 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3341 3342 case ARM::ATOMIC_LOAD_XOR_I8: 3343 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3344 case ARM::ATOMIC_LOAD_XOR_I16: 3345 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3346 case ARM::ATOMIC_LOAD_XOR_I32: 3347 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3348 3349 case ARM::ATOMIC_LOAD_NAND_I8: 3350 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3351 case ARM::ATOMIC_LOAD_NAND_I16: 3352 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3353 case ARM::ATOMIC_LOAD_NAND_I32: 3354 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3355 3356 case ARM::ATOMIC_LOAD_SUB_I8: 3357 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3358 case ARM::ATOMIC_LOAD_SUB_I16: 3359 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3360 case ARM::ATOMIC_LOAD_SUB_I32: 3361 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3362 3363 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 3364 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 3365 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 3366 3367 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 3368 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 3369 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 3370 3371 case ARM::tMOVCCr_pseudo: { 3372 // To "insert" a SELECT_CC instruction, we actually have to insert the 3373 // diamond control-flow pattern. The incoming instruction knows the 3374 // destination vreg to set, the condition code register to branch on, the 3375 // true/false values to select between, and a branch opcode to use. 3376 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3377 MachineFunction::iterator It = BB; 3378 ++It; 3379 3380 // thisMBB: 3381 // ... 3382 // TrueVal = ... 3383 // cmpTY ccX, r1, r2 3384 // bCC copy1MBB 3385 // fallthrough --> copy0MBB 3386 MachineBasicBlock *thisMBB = BB; 3387 MachineFunction *F = BB->getParent(); 3388 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 3389 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 3390 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 3391 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 3392 F->insert(It, copy0MBB); 3393 F->insert(It, sinkMBB); 3394 // Update machine-CFG edges by first adding all successors of the current 3395 // block to the new block which will contain the Phi node for the select. 3396 // Also inform sdisel of the edge changes. 3397 for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 3398 E = BB->succ_end(); I != E; ++I) { 3399 EM->insert(std::make_pair(*I, sinkMBB)); 3400 sinkMBB->addSuccessor(*I); 3401 } 3402 // Next, remove all successors of the current block, and add the true 3403 // and fallthrough blocks as its successors. 3404 while (!BB->succ_empty()) 3405 BB->removeSuccessor(BB->succ_begin()); 3406 BB->addSuccessor(copy0MBB); 3407 BB->addSuccessor(sinkMBB); 3408 3409 // copy0MBB: 3410 // %FalseValue = ... 3411 // # fallthrough to sinkMBB 3412 BB = copy0MBB; 3413 3414 // Update machine-CFG edges 3415 BB->addSuccessor(sinkMBB); 3416 3417 // sinkMBB: 3418 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3419 // ... 3420 BB = sinkMBB; 3421 BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) 3422 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 3423 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3424 3425 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 3426 return BB; 3427 } 3428 3429 case ARM::tANDsp: 3430 case ARM::tADDspr_: 3431 case ARM::tSUBspi_: 3432 case ARM::t2SUBrSPi_: 3433 case ARM::t2SUBrSPi12_: 3434 case ARM::t2SUBrSPs_: { 3435 MachineFunction *MF = BB->getParent(); 3436 unsigned DstReg = MI->getOperand(0).getReg(); 3437 unsigned SrcReg = MI->getOperand(1).getReg(); 3438 bool DstIsDead = MI->getOperand(0).isDead(); 3439 bool SrcIsKill = MI->getOperand(1).isKill(); 3440 3441 if (SrcReg != ARM::SP) { 3442 // Copy the source to SP from virtual register. 3443 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); 3444 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 3445 ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; 3446 BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) 3447 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3448 } 3449 3450 unsigned OpOpc = 0; 3451 bool NeedPred = false, NeedCC = false, NeedOp3 = false; 3452 switch (MI->getOpcode()) { 3453 default: 3454 llvm_unreachable("Unexpected pseudo instruction!"); 3455 case ARM::tANDsp: 3456 OpOpc = ARM::tAND; 3457 NeedPred = true; 3458 break; 3459 case ARM::tADDspr_: 3460 OpOpc = ARM::tADDspr; 3461 break; 3462 case ARM::tSUBspi_: 3463 OpOpc = ARM::tSUBspi; 3464 break; 3465 case ARM::t2SUBrSPi_: 3466 OpOpc = ARM::t2SUBrSPi; 3467 NeedPred = true; NeedCC = true; 3468 break; 3469 case ARM::t2SUBrSPi12_: 3470 OpOpc = ARM::t2SUBrSPi12; 3471 NeedPred = true; 3472 break; 3473 case ARM::t2SUBrSPs_: 3474 OpOpc = ARM::t2SUBrSPs; 3475 NeedPred = true; NeedCC = true; NeedOp3 = true; 3476 break; 3477 } 3478 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); 3479 if (OpOpc == ARM::tAND) 3480 AddDefaultT1CC(MIB); 3481 MIB.addReg(ARM::SP); 3482 MIB.addOperand(MI->getOperand(2)); 3483 if (NeedOp3) 3484 MIB.addOperand(MI->getOperand(3)); 3485 if (NeedPred) 3486 AddDefaultPred(MIB); 3487 if (NeedCC) 3488 AddDefaultCC(MIB); 3489 3490 // Copy the result from SP to virtual register. 3491 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); 3492 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 3493 ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; 3494 BuildMI(BB, dl, TII->get(CopyOpc)) 3495 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) 3496 .addReg(ARM::SP); 3497 MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 3498 return BB; 3499 } 3500 } 3501} 3502 3503//===----------------------------------------------------------------------===// 3504// ARM Optimization Hooks 3505//===----------------------------------------------------------------------===// 3506 3507static 3508SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 3509 TargetLowering::DAGCombinerInfo &DCI) { 3510 SelectionDAG &DAG = DCI.DAG; 3511 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3512 EVT VT = N->getValueType(0); 3513 unsigned Opc = N->getOpcode(); 3514 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 3515 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 3516 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 3517 ISD::CondCode CC = ISD::SETCC_INVALID; 3518 3519 if (isSlctCC) { 3520 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 3521 } else { 3522 SDValue CCOp = Slct.getOperand(0); 3523 if (CCOp.getOpcode() == ISD::SETCC) 3524 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 3525 } 3526 3527 bool DoXform = false; 3528 bool InvCC = false; 3529 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 3530 "Bad input!"); 3531 3532 if (LHS.getOpcode() == ISD::Constant && 3533 cast<ConstantSDNode>(LHS)->isNullValue()) { 3534 DoXform = true; 3535 } else if (CC != ISD::SETCC_INVALID && 3536 RHS.getOpcode() == ISD::Constant && 3537 cast<ConstantSDNode>(RHS)->isNullValue()) { 3538 std::swap(LHS, RHS); 3539 SDValue Op0 = Slct.getOperand(0); 3540 EVT OpVT = isSlctCC ? Op0.getValueType() : 3541 Op0.getOperand(0).getValueType(); 3542 bool isInt = OpVT.isInteger(); 3543 CC = ISD::getSetCCInverse(CC, isInt); 3544 3545 if (!TLI.isCondCodeLegal(CC, OpVT)) 3546 return SDValue(); // Inverse operator isn't legal. 3547 3548 DoXform = true; 3549 InvCC = true; 3550 } 3551 3552 if (DoXform) { 3553 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 3554 if (isSlctCC) 3555 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 3556 Slct.getOperand(0), Slct.getOperand(1), CC); 3557 SDValue CCOp = Slct.getOperand(0); 3558 if (InvCC) 3559 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 3560 CCOp.getOperand(0), CCOp.getOperand(1), CC); 3561 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 3562 CCOp, OtherOp, Result); 3563 } 3564 return SDValue(); 3565} 3566 3567/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3568static SDValue PerformADDCombine(SDNode *N, 3569 TargetLowering::DAGCombinerInfo &DCI) { 3570 // added by evan in r37685 with no testcase. 3571 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3572 3573 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 3574 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 3575 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 3576 if (Result.getNode()) return Result; 3577 } 3578 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 3579 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 3580 if (Result.getNode()) return Result; 3581 } 3582 3583 return SDValue(); 3584} 3585 3586/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 3587static SDValue PerformSUBCombine(SDNode *N, 3588 TargetLowering::DAGCombinerInfo &DCI) { 3589 // added by evan in r37685 with no testcase. 3590 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3591 3592 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 3593 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 3594 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 3595 if (Result.getNode()) return Result; 3596 } 3597 3598 return SDValue(); 3599} 3600 3601/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 3602/// ARMISD::VMOVRRD. 3603static SDValue PerformVMOVRRDCombine(SDNode *N, 3604 TargetLowering::DAGCombinerInfo &DCI) { 3605 // fmrrd(fmdrr x, y) -> x,y 3606 SDValue InDouble = N->getOperand(0); 3607 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 3608 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 3609 return SDValue(); 3610} 3611 3612/// getVShiftImm - Check if this is a valid build_vector for the immediate 3613/// operand of a vector shift operation, where all the elements of the 3614/// build_vector must have the same constant integer value. 3615static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 3616 // Ignore bit_converts. 3617 while (Op.getOpcode() == ISD::BIT_CONVERT) 3618 Op = Op.getOperand(0); 3619 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 3620 APInt SplatBits, SplatUndef; 3621 unsigned SplatBitSize; 3622 bool HasAnyUndefs; 3623 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 3624 HasAnyUndefs, ElementBits) || 3625 SplatBitSize > ElementBits) 3626 return false; 3627 Cnt = SplatBits.getSExtValue(); 3628 return true; 3629} 3630 3631/// isVShiftLImm - Check if this is a valid build_vector for the immediate 3632/// operand of a vector shift left operation. That value must be in the range: 3633/// 0 <= Value < ElementBits for a left shift; or 3634/// 0 <= Value <= ElementBits for a long left shift. 3635static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 3636 assert(VT.isVector() && "vector shift count is not a vector type"); 3637 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 3638 if (! getVShiftImm(Op, ElementBits, Cnt)) 3639 return false; 3640 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 3641} 3642 3643/// isVShiftRImm - Check if this is a valid build_vector for the immediate 3644/// operand of a vector shift right operation. For a shift opcode, the value 3645/// is positive, but for an intrinsic the value count must be negative. The 3646/// absolute value must be in the range: 3647/// 1 <= |Value| <= ElementBits for a right shift; or 3648/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 3649static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 3650 int64_t &Cnt) { 3651 assert(VT.isVector() && "vector shift count is not a vector type"); 3652 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 3653 if (! getVShiftImm(Op, ElementBits, Cnt)) 3654 return false; 3655 if (isIntrinsic) 3656 Cnt = -Cnt; 3657 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 3658} 3659 3660/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 3661static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 3662 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3663 switch (IntNo) { 3664 default: 3665 // Don't do anything for most intrinsics. 3666 break; 3667 3668 // Vector shifts: check for immediate versions and lower them. 3669 // Note: This is done during DAG combining instead of DAG legalizing because 3670 // the build_vectors for 64-bit vector element shift counts are generally 3671 // not legal, and it is hard to see their values after they get legalized to 3672 // loads from a constant pool. 3673 case Intrinsic::arm_neon_vshifts: 3674 case Intrinsic::arm_neon_vshiftu: 3675 case Intrinsic::arm_neon_vshiftls: 3676 case Intrinsic::arm_neon_vshiftlu: 3677 case Intrinsic::arm_neon_vshiftn: 3678 case Intrinsic::arm_neon_vrshifts: 3679 case Intrinsic::arm_neon_vrshiftu: 3680 case Intrinsic::arm_neon_vrshiftn: 3681 case Intrinsic::arm_neon_vqshifts: 3682 case Intrinsic::arm_neon_vqshiftu: 3683 case Intrinsic::arm_neon_vqshiftsu: 3684 case Intrinsic::arm_neon_vqshiftns: 3685 case Intrinsic::arm_neon_vqshiftnu: 3686 case Intrinsic::arm_neon_vqshiftnsu: 3687 case Intrinsic::arm_neon_vqrshiftns: 3688 case Intrinsic::arm_neon_vqrshiftnu: 3689 case Intrinsic::arm_neon_vqrshiftnsu: { 3690 EVT VT = N->getOperand(1).getValueType(); 3691 int64_t Cnt; 3692 unsigned VShiftOpc = 0; 3693 3694 switch (IntNo) { 3695 case Intrinsic::arm_neon_vshifts: 3696 case Intrinsic::arm_neon_vshiftu: 3697 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 3698 VShiftOpc = ARMISD::VSHL; 3699 break; 3700 } 3701 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 3702 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 3703 ARMISD::VSHRs : ARMISD::VSHRu); 3704 break; 3705 } 3706 return SDValue(); 3707 3708 case Intrinsic::arm_neon_vshiftls: 3709 case Intrinsic::arm_neon_vshiftlu: 3710 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 3711 break; 3712 llvm_unreachable("invalid shift count for vshll intrinsic"); 3713 3714 case Intrinsic::arm_neon_vrshifts: 3715 case Intrinsic::arm_neon_vrshiftu: 3716 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 3717 break; 3718 return SDValue(); 3719 3720 case Intrinsic::arm_neon_vqshifts: 3721 case Intrinsic::arm_neon_vqshiftu: 3722 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 3723 break; 3724 return SDValue(); 3725 3726 case Intrinsic::arm_neon_vqshiftsu: 3727 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 3728 break; 3729 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 3730 3731 case Intrinsic::arm_neon_vshiftn: 3732 case Intrinsic::arm_neon_vrshiftn: 3733 case Intrinsic::arm_neon_vqshiftns: 3734 case Intrinsic::arm_neon_vqshiftnu: 3735 case Intrinsic::arm_neon_vqshiftnsu: 3736 case Intrinsic::arm_neon_vqrshiftns: 3737 case Intrinsic::arm_neon_vqrshiftnu: 3738 case Intrinsic::arm_neon_vqrshiftnsu: 3739 // Narrowing shifts require an immediate right shift. 3740 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 3741 break; 3742 llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); 3743 3744 default: 3745 llvm_unreachable("unhandled vector shift"); 3746 } 3747 3748 switch (IntNo) { 3749 case Intrinsic::arm_neon_vshifts: 3750 case Intrinsic::arm_neon_vshiftu: 3751 // Opcode already set above. 3752 break; 3753 case Intrinsic::arm_neon_vshiftls: 3754 case Intrinsic::arm_neon_vshiftlu: 3755 if (Cnt == VT.getVectorElementType().getSizeInBits()) 3756 VShiftOpc = ARMISD::VSHLLi; 3757 else 3758 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 3759 ARMISD::VSHLLs : ARMISD::VSHLLu); 3760 break; 3761 case Intrinsic::arm_neon_vshiftn: 3762 VShiftOpc = ARMISD::VSHRN; break; 3763 case Intrinsic::arm_neon_vrshifts: 3764 VShiftOpc = ARMISD::VRSHRs; break; 3765 case Intrinsic::arm_neon_vrshiftu: 3766 VShiftOpc = ARMISD::VRSHRu; break; 3767 case Intrinsic::arm_neon_vrshiftn: 3768 VShiftOpc = ARMISD::VRSHRN; break; 3769 case Intrinsic::arm_neon_vqshifts: 3770 VShiftOpc = ARMISD::VQSHLs; break; 3771 case Intrinsic::arm_neon_vqshiftu: 3772 VShiftOpc = ARMISD::VQSHLu; break; 3773 case Intrinsic::arm_neon_vqshiftsu: 3774 VShiftOpc = ARMISD::VQSHLsu; break; 3775 case Intrinsic::arm_neon_vqshiftns: 3776 VShiftOpc = ARMISD::VQSHRNs; break; 3777 case Intrinsic::arm_neon_vqshiftnu: 3778 VShiftOpc = ARMISD::VQSHRNu; break; 3779 case Intrinsic::arm_neon_vqshiftnsu: 3780 VShiftOpc = ARMISD::VQSHRNsu; break; 3781 case Intrinsic::arm_neon_vqrshiftns: 3782 VShiftOpc = ARMISD::VQRSHRNs; break; 3783 case Intrinsic::arm_neon_vqrshiftnu: 3784 VShiftOpc = ARMISD::VQRSHRNu; break; 3785 case Intrinsic::arm_neon_vqrshiftnsu: 3786 VShiftOpc = ARMISD::VQRSHRNsu; break; 3787 } 3788 3789 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 3790 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 3791 } 3792 3793 case Intrinsic::arm_neon_vshiftins: { 3794 EVT VT = N->getOperand(1).getValueType(); 3795 int64_t Cnt; 3796 unsigned VShiftOpc = 0; 3797 3798 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 3799 VShiftOpc = ARMISD::VSLI; 3800 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 3801 VShiftOpc = ARMISD::VSRI; 3802 else { 3803 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 3804 } 3805 3806 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 3807 N->getOperand(1), N->getOperand(2), 3808 DAG.getConstant(Cnt, MVT::i32)); 3809 } 3810 3811 case Intrinsic::arm_neon_vqrshifts: 3812 case Intrinsic::arm_neon_vqrshiftu: 3813 // No immediate versions of these to check for. 3814 break; 3815 } 3816 3817 return SDValue(); 3818} 3819 3820/// PerformShiftCombine - Checks for immediate versions of vector shifts and 3821/// lowers them. As with the vector shift intrinsics, this is done during DAG 3822/// combining instead of DAG legalizing because the build_vectors for 64-bit 3823/// vector element shift counts are generally not legal, and it is hard to see 3824/// their values after they get legalized to loads from a constant pool. 3825static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 3826 const ARMSubtarget *ST) { 3827 EVT VT = N->getValueType(0); 3828 3829 // Nothing to be done for scalar shifts. 3830 if (! VT.isVector()) 3831 return SDValue(); 3832 3833 assert(ST->hasNEON() && "unexpected vector shift"); 3834 int64_t Cnt; 3835 3836 switch (N->getOpcode()) { 3837 default: llvm_unreachable("unexpected shift opcode"); 3838 3839 case ISD::SHL: 3840 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 3841 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 3842 DAG.getConstant(Cnt, MVT::i32)); 3843 break; 3844 3845 case ISD::SRA: 3846 case ISD::SRL: 3847 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 3848 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 3849 ARMISD::VSHRs : ARMISD::VSHRu); 3850 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 3851 DAG.getConstant(Cnt, MVT::i32)); 3852 } 3853 } 3854 return SDValue(); 3855} 3856 3857/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 3858/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 3859static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 3860 const ARMSubtarget *ST) { 3861 SDValue N0 = N->getOperand(0); 3862 3863 // Check for sign- and zero-extensions of vector extract operations of 8- 3864 // and 16-bit vector elements. NEON supports these directly. They are 3865 // handled during DAG combining because type legalization will promote them 3866 // to 32-bit types and it is messy to recognize the operations after that. 3867 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 3868 SDValue Vec = N0.getOperand(0); 3869 SDValue Lane = N0.getOperand(1); 3870 EVT VT = N->getValueType(0); 3871 EVT EltVT = N0.getValueType(); 3872 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3873 3874 if (VT == MVT::i32 && 3875 (EltVT == MVT::i8 || EltVT == MVT::i16) && 3876 TLI.isTypeLegal(Vec.getValueType())) { 3877 3878 unsigned Opc = 0; 3879 switch (N->getOpcode()) { 3880 default: llvm_unreachable("unexpected opcode"); 3881 case ISD::SIGN_EXTEND: 3882 Opc = ARMISD::VGETLANEs; 3883 break; 3884 case ISD::ZERO_EXTEND: 3885 case ISD::ANY_EXTEND: 3886 Opc = ARMISD::VGETLANEu; 3887 break; 3888 } 3889 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 3890 } 3891 } 3892 3893 return SDValue(); 3894} 3895 3896/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 3897/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 3898static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 3899 const ARMSubtarget *ST) { 3900 // If the target supports NEON, try to use vmax/vmin instructions for f32 3901 // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, 3902 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 3903 // a NaN; only do the transformation when it matches that behavior. 3904 3905 // For now only do this when using NEON for FP operations; if using VFP, it 3906 // is not obvious that the benefit outweighs the cost of switching to the 3907 // NEON pipeline. 3908 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 3909 N->getValueType(0) != MVT::f32) 3910 return SDValue(); 3911 3912 SDValue CondLHS = N->getOperand(0); 3913 SDValue CondRHS = N->getOperand(1); 3914 SDValue LHS = N->getOperand(2); 3915 SDValue RHS = N->getOperand(3); 3916 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 3917 3918 unsigned Opcode = 0; 3919 bool IsReversed; 3920 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 3921 IsReversed = false; // x CC y ? x : y 3922 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 3923 IsReversed = true ; // x CC y ? y : x 3924 } else { 3925 return SDValue(); 3926 } 3927 3928 bool IsUnordered; 3929 switch (CC) { 3930 default: break; 3931 case ISD::SETOLT: 3932 case ISD::SETOLE: 3933 case ISD::SETLT: 3934 case ISD::SETLE: 3935 case ISD::SETULT: 3936 case ISD::SETULE: 3937 // If LHS is NaN, an ordered comparison will be false and the result will 3938 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 3939 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 3940 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 3941 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 3942 break; 3943 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 3944 // will return -0, so vmin can only be used for unsafe math or if one of 3945 // the operands is known to be nonzero. 3946 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 3947 !UnsafeFPMath && 3948 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 3949 break; 3950 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 3951 break; 3952 3953 case ISD::SETOGT: 3954 case ISD::SETOGE: 3955 case ISD::SETGT: 3956 case ISD::SETGE: 3957 case ISD::SETUGT: 3958 case ISD::SETUGE: 3959 // If LHS is NaN, an ordered comparison will be false and the result will 3960 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 3961 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 3962 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 3963 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 3964 break; 3965 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 3966 // will return +0, so vmax can only be used for unsafe math or if one of 3967 // the operands is known to be nonzero. 3968 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 3969 !UnsafeFPMath && 3970 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 3971 break; 3972 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 3973 break; 3974 } 3975 3976 if (!Opcode) 3977 return SDValue(); 3978 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 3979} 3980 3981SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 3982 DAGCombinerInfo &DCI) const { 3983 switch (N->getOpcode()) { 3984 default: break; 3985 case ISD::ADD: return PerformADDCombine(N, DCI); 3986 case ISD::SUB: return PerformSUBCombine(N, DCI); 3987 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 3988 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 3989 case ISD::SHL: 3990 case ISD::SRA: 3991 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 3992 case ISD::SIGN_EXTEND: 3993 case ISD::ZERO_EXTEND: 3994 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 3995 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 3996 } 3997 return SDValue(); 3998} 3999 4000bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 4001 if (!Subtarget->hasV6Ops()) 4002 // Pre-v6 does not support unaligned mem access. 4003 return false; 4004 else { 4005 // v6+ may or may not support unaligned mem access depending on the system 4006 // configuration. 4007 // FIXME: This is pretty conservative. Should we provide cmdline option to 4008 // control the behaviour? 4009 if (!Subtarget->isTargetDarwin()) 4010 return false; 4011 } 4012 4013 switch (VT.getSimpleVT().SimpleTy) { 4014 default: 4015 return false; 4016 case MVT::i8: 4017 case MVT::i16: 4018 case MVT::i32: 4019 return true; 4020 // FIXME: VLD1 etc with standard alignment is legal. 4021 } 4022} 4023 4024static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 4025 if (V < 0) 4026 return false; 4027 4028 unsigned Scale = 1; 4029 switch (VT.getSimpleVT().SimpleTy) { 4030 default: return false; 4031 case MVT::i1: 4032 case MVT::i8: 4033 // Scale == 1; 4034 break; 4035 case MVT::i16: 4036 // Scale == 2; 4037 Scale = 2; 4038 break; 4039 case MVT::i32: 4040 // Scale == 4; 4041 Scale = 4; 4042 break; 4043 } 4044 4045 if ((V & (Scale - 1)) != 0) 4046 return false; 4047 V /= Scale; 4048 return V == (V & ((1LL << 5) - 1)); 4049} 4050 4051static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 4052 const ARMSubtarget *Subtarget) { 4053 bool isNeg = false; 4054 if (V < 0) { 4055 isNeg = true; 4056 V = - V; 4057 } 4058 4059 switch (VT.getSimpleVT().SimpleTy) { 4060 default: return false; 4061 case MVT::i1: 4062 case MVT::i8: 4063 case MVT::i16: 4064 case MVT::i32: 4065 // + imm12 or - imm8 4066 if (isNeg) 4067 return V == (V & ((1LL << 8) - 1)); 4068 return V == (V & ((1LL << 12) - 1)); 4069 case MVT::f32: 4070 case MVT::f64: 4071 // Same as ARM mode. FIXME: NEON? 4072 if (!Subtarget->hasVFP2()) 4073 return false; 4074 if ((V & 3) != 0) 4075 return false; 4076 V >>= 2; 4077 return V == (V & ((1LL << 8) - 1)); 4078 } 4079} 4080 4081/// isLegalAddressImmediate - Return true if the integer value can be used 4082/// as the offset of the target addressing mode for load / store of the 4083/// given type. 4084static bool isLegalAddressImmediate(int64_t V, EVT VT, 4085 const ARMSubtarget *Subtarget) { 4086 if (V == 0) 4087 return true; 4088 4089 if (!VT.isSimple()) 4090 return false; 4091 4092 if (Subtarget->isThumb1Only()) 4093 return isLegalT1AddressImmediate(V, VT); 4094 else if (Subtarget->isThumb2()) 4095 return isLegalT2AddressImmediate(V, VT, Subtarget); 4096 4097 // ARM mode. 4098 if (V < 0) 4099 V = - V; 4100 switch (VT.getSimpleVT().SimpleTy) { 4101 default: return false; 4102 case MVT::i1: 4103 case MVT::i8: 4104 case MVT::i32: 4105 // +- imm12 4106 return V == (V & ((1LL << 12) - 1)); 4107 case MVT::i16: 4108 // +- imm8 4109 return V == (V & ((1LL << 8) - 1)); 4110 case MVT::f32: 4111 case MVT::f64: 4112 if (!Subtarget->hasVFP2()) // FIXME: NEON? 4113 return false; 4114 if ((V & 3) != 0) 4115 return false; 4116 V >>= 2; 4117 return V == (V & ((1LL << 8) - 1)); 4118 } 4119} 4120 4121bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 4122 EVT VT) const { 4123 int Scale = AM.Scale; 4124 if (Scale < 0) 4125 return false; 4126 4127 switch (VT.getSimpleVT().SimpleTy) { 4128 default: return false; 4129 case MVT::i1: 4130 case MVT::i8: 4131 case MVT::i16: 4132 case MVT::i32: 4133 if (Scale == 1) 4134 return true; 4135 // r + r << imm 4136 Scale = Scale & ~1; 4137 return Scale == 2 || Scale == 4 || Scale == 8; 4138 case MVT::i64: 4139 // r + r 4140 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 4141 return true; 4142 return false; 4143 case MVT::isVoid: 4144 // Note, we allow "void" uses (basically, uses that aren't loads or 4145 // stores), because arm allows folding a scale into many arithmetic 4146 // operations. This should be made more precise and revisited later. 4147 4148 // Allow r << imm, but the imm has to be a multiple of two. 4149 if (Scale & 1) return false; 4150 return isPowerOf2_32(Scale); 4151 } 4152} 4153 4154/// isLegalAddressingMode - Return true if the addressing mode represented 4155/// by AM is legal for this target, for a load/store of the specified type. 4156bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 4157 const Type *Ty) const { 4158 EVT VT = getValueType(Ty, true); 4159 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 4160 return false; 4161 4162 // Can never fold addr of global into load/store. 4163 if (AM.BaseGV) 4164 return false; 4165 4166 switch (AM.Scale) { 4167 case 0: // no scale reg, must be "r+i" or "r", or "i". 4168 break; 4169 case 1: 4170 if (Subtarget->isThumb1Only()) 4171 return false; 4172 // FALL THROUGH. 4173 default: 4174 // ARM doesn't support any R+R*scale+imm addr modes. 4175 if (AM.BaseOffs) 4176 return false; 4177 4178 if (!VT.isSimple()) 4179 return false; 4180 4181 if (Subtarget->isThumb2()) 4182 return isLegalT2ScaledAddressingMode(AM, VT); 4183 4184 int Scale = AM.Scale; 4185 switch (VT.getSimpleVT().SimpleTy) { 4186 default: return false; 4187 case MVT::i1: 4188 case MVT::i8: 4189 case MVT::i32: 4190 if (Scale < 0) Scale = -Scale; 4191 if (Scale == 1) 4192 return true; 4193 // r + r << imm 4194 return isPowerOf2_32(Scale & ~1); 4195 case MVT::i16: 4196 case MVT::i64: 4197 // r + r 4198 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 4199 return true; 4200 return false; 4201 4202 case MVT::isVoid: 4203 // Note, we allow "void" uses (basically, uses that aren't loads or 4204 // stores), because arm allows folding a scale into many arithmetic 4205 // operations. This should be made more precise and revisited later. 4206 4207 // Allow r << imm, but the imm has to be a multiple of two. 4208 if (Scale & 1) return false; 4209 return isPowerOf2_32(Scale); 4210 } 4211 break; 4212 } 4213 return true; 4214} 4215 4216/// isLegalICmpImmediate - Return true if the specified immediate is legal 4217/// icmp immediate, that is the target has icmp instructions which can compare 4218/// a register against the immediate without having to materialize the 4219/// immediate into a register. 4220bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 4221 if (!Subtarget->isThumb()) 4222 return ARM_AM::getSOImmVal(Imm) != -1; 4223 if (Subtarget->isThumb2()) 4224 return ARM_AM::getT2SOImmVal(Imm) != -1; 4225 return Imm >= 0 && Imm <= 255; 4226} 4227 4228static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 4229 bool isSEXTLoad, SDValue &Base, 4230 SDValue &Offset, bool &isInc, 4231 SelectionDAG &DAG) { 4232 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 4233 return false; 4234 4235 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 4236 // AddressingMode 3 4237 Base = Ptr->getOperand(0); 4238 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 4239 int RHSC = (int)RHS->getZExtValue(); 4240 if (RHSC < 0 && RHSC > -256) { 4241 assert(Ptr->getOpcode() == ISD::ADD); 4242 isInc = false; 4243 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 4244 return true; 4245 } 4246 } 4247 isInc = (Ptr->getOpcode() == ISD::ADD); 4248 Offset = Ptr->getOperand(1); 4249 return true; 4250 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 4251 // AddressingMode 2 4252 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 4253 int RHSC = (int)RHS->getZExtValue(); 4254 if (RHSC < 0 && RHSC > -0x1000) { 4255 assert(Ptr->getOpcode() == ISD::ADD); 4256 isInc = false; 4257 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 4258 Base = Ptr->getOperand(0); 4259 return true; 4260 } 4261 } 4262 4263 if (Ptr->getOpcode() == ISD::ADD) { 4264 isInc = true; 4265 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 4266 if (ShOpcVal != ARM_AM::no_shift) { 4267 Base = Ptr->getOperand(1); 4268 Offset = Ptr->getOperand(0); 4269 } else { 4270 Base = Ptr->getOperand(0); 4271 Offset = Ptr->getOperand(1); 4272 } 4273 return true; 4274 } 4275 4276 isInc = (Ptr->getOpcode() == ISD::ADD); 4277 Base = Ptr->getOperand(0); 4278 Offset = Ptr->getOperand(1); 4279 return true; 4280 } 4281 4282 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 4283 return false; 4284} 4285 4286static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 4287 bool isSEXTLoad, SDValue &Base, 4288 SDValue &Offset, bool &isInc, 4289 SelectionDAG &DAG) { 4290 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 4291 return false; 4292 4293 Base = Ptr->getOperand(0); 4294 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 4295 int RHSC = (int)RHS->getZExtValue(); 4296 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 4297 assert(Ptr->getOpcode() == ISD::ADD); 4298 isInc = false; 4299 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 4300 return true; 4301 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 4302 isInc = Ptr->getOpcode() == ISD::ADD; 4303 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 4304 return true; 4305 } 4306 } 4307 4308 return false; 4309} 4310 4311/// getPreIndexedAddressParts - returns true by value, base pointer and 4312/// offset pointer and addressing mode by reference if the node's address 4313/// can be legally represented as pre-indexed load / store address. 4314bool 4315ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 4316 SDValue &Offset, 4317 ISD::MemIndexedMode &AM, 4318 SelectionDAG &DAG) const { 4319 if (Subtarget->isThumb1Only()) 4320 return false; 4321 4322 EVT VT; 4323 SDValue Ptr; 4324 bool isSEXTLoad = false; 4325 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 4326 Ptr = LD->getBasePtr(); 4327 VT = LD->getMemoryVT(); 4328 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 4329 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 4330 Ptr = ST->getBasePtr(); 4331 VT = ST->getMemoryVT(); 4332 } else 4333 return false; 4334 4335 bool isInc; 4336 bool isLegal = false; 4337 if (Subtarget->isThumb2()) 4338 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 4339 Offset, isInc, DAG); 4340 else 4341 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 4342 Offset, isInc, DAG); 4343 if (!isLegal) 4344 return false; 4345 4346 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 4347 return true; 4348} 4349 4350/// getPostIndexedAddressParts - returns true by value, base pointer and 4351/// offset pointer and addressing mode by reference if this node can be 4352/// combined with a load / store to form a post-indexed load / store. 4353bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 4354 SDValue &Base, 4355 SDValue &Offset, 4356 ISD::MemIndexedMode &AM, 4357 SelectionDAG &DAG) const { 4358 if (Subtarget->isThumb1Only()) 4359 return false; 4360 4361 EVT VT; 4362 SDValue Ptr; 4363 bool isSEXTLoad = false; 4364 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 4365 VT = LD->getMemoryVT(); 4366 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 4367 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 4368 VT = ST->getMemoryVT(); 4369 } else 4370 return false; 4371 4372 bool isInc; 4373 bool isLegal = false; 4374 if (Subtarget->isThumb2()) 4375 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 4376 isInc, DAG); 4377 else 4378 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 4379 isInc, DAG); 4380 if (!isLegal) 4381 return false; 4382 4383 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 4384 return true; 4385} 4386 4387void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 4388 const APInt &Mask, 4389 APInt &KnownZero, 4390 APInt &KnownOne, 4391 const SelectionDAG &DAG, 4392 unsigned Depth) const { 4393 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 4394 switch (Op.getOpcode()) { 4395 default: break; 4396 case ARMISD::CMOV: { 4397 // Bits are known zero/one if known on the LHS and RHS. 4398 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 4399 if (KnownZero == 0 && KnownOne == 0) return; 4400 4401 APInt KnownZeroRHS, KnownOneRHS; 4402 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 4403 KnownZeroRHS, KnownOneRHS, Depth+1); 4404 KnownZero &= KnownZeroRHS; 4405 KnownOne &= KnownOneRHS; 4406 return; 4407 } 4408 } 4409} 4410 4411//===----------------------------------------------------------------------===// 4412// ARM Inline Assembly Support 4413//===----------------------------------------------------------------------===// 4414 4415/// getConstraintType - Given a constraint letter, return the type of 4416/// constraint it is for this target. 4417ARMTargetLowering::ConstraintType 4418ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 4419 if (Constraint.size() == 1) { 4420 switch (Constraint[0]) { 4421 default: break; 4422 case 'l': return C_RegisterClass; 4423 case 'w': return C_RegisterClass; 4424 } 4425 } 4426 return TargetLowering::getConstraintType(Constraint); 4427} 4428 4429std::pair<unsigned, const TargetRegisterClass*> 4430ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4431 EVT VT) const { 4432 if (Constraint.size() == 1) { 4433 // GCC ARM Constraint Letters 4434 switch (Constraint[0]) { 4435 case 'l': 4436 if (Subtarget->isThumb()) 4437 return std::make_pair(0U, ARM::tGPRRegisterClass); 4438 else 4439 return std::make_pair(0U, ARM::GPRRegisterClass); 4440 case 'r': 4441 return std::make_pair(0U, ARM::GPRRegisterClass); 4442 case 'w': 4443 if (VT == MVT::f32) 4444 return std::make_pair(0U, ARM::SPRRegisterClass); 4445 if (VT.getSizeInBits() == 64) 4446 return std::make_pair(0U, ARM::DPRRegisterClass); 4447 if (VT.getSizeInBits() == 128) 4448 return std::make_pair(0U, ARM::QPRRegisterClass); 4449 break; 4450 } 4451 } 4452 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4453} 4454 4455std::vector<unsigned> ARMTargetLowering:: 4456getRegClassForInlineAsmConstraint(const std::string &Constraint, 4457 EVT VT) const { 4458 if (Constraint.size() != 1) 4459 return std::vector<unsigned>(); 4460 4461 switch (Constraint[0]) { // GCC ARM Constraint Letters 4462 default: break; 4463 case 'l': 4464 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 4465 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 4466 0); 4467 case 'r': 4468 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 4469 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 4470 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 4471 ARM::R12, ARM::LR, 0); 4472 case 'w': 4473 if (VT == MVT::f32) 4474 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 4475 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 4476 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 4477 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 4478 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 4479 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 4480 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 4481 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 4482 if (VT.getSizeInBits() == 64) 4483 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 4484 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 4485 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 4486 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 4487 if (VT.getSizeInBits() == 128) 4488 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 4489 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 4490 break; 4491 } 4492 4493 return std::vector<unsigned>(); 4494} 4495 4496/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 4497/// vector. If it is invalid, don't add anything to Ops. 4498void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 4499 char Constraint, 4500 bool hasMemory, 4501 std::vector<SDValue>&Ops, 4502 SelectionDAG &DAG) const { 4503 SDValue Result(0, 0); 4504 4505 switch (Constraint) { 4506 default: break; 4507 case 'I': case 'J': case 'K': case 'L': 4508 case 'M': case 'N': case 'O': 4509 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4510 if (!C) 4511 return; 4512 4513 int64_t CVal64 = C->getSExtValue(); 4514 int CVal = (int) CVal64; 4515 // None of these constraints allow values larger than 32 bits. Check 4516 // that the value fits in an int. 4517 if (CVal != CVal64) 4518 return; 4519 4520 switch (Constraint) { 4521 case 'I': 4522 if (Subtarget->isThumb1Only()) { 4523 // This must be a constant between 0 and 255, for ADD 4524 // immediates. 4525 if (CVal >= 0 && CVal <= 255) 4526 break; 4527 } else if (Subtarget->isThumb2()) { 4528 // A constant that can be used as an immediate value in a 4529 // data-processing instruction. 4530 if (ARM_AM::getT2SOImmVal(CVal) != -1) 4531 break; 4532 } else { 4533 // A constant that can be used as an immediate value in a 4534 // data-processing instruction. 4535 if (ARM_AM::getSOImmVal(CVal) != -1) 4536 break; 4537 } 4538 return; 4539 4540 case 'J': 4541 if (Subtarget->isThumb()) { // FIXME thumb2 4542 // This must be a constant between -255 and -1, for negated ADD 4543 // immediates. This can be used in GCC with an "n" modifier that 4544 // prints the negated value, for use with SUB instructions. It is 4545 // not useful otherwise but is implemented for compatibility. 4546 if (CVal >= -255 && CVal <= -1) 4547 break; 4548 } else { 4549 // This must be a constant between -4095 and 4095. It is not clear 4550 // what this constraint is intended for. Implemented for 4551 // compatibility with GCC. 4552 if (CVal >= -4095 && CVal <= 4095) 4553 break; 4554 } 4555 return; 4556 4557 case 'K': 4558 if (Subtarget->isThumb1Only()) { 4559 // A 32-bit value where only one byte has a nonzero value. Exclude 4560 // zero to match GCC. This constraint is used by GCC internally for 4561 // constants that can be loaded with a move/shift combination. 4562 // It is not useful otherwise but is implemented for compatibility. 4563 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 4564 break; 4565 } else if (Subtarget->isThumb2()) { 4566 // A constant whose bitwise inverse can be used as an immediate 4567 // value in a data-processing instruction. This can be used in GCC 4568 // with a "B" modifier that prints the inverted value, for use with 4569 // BIC and MVN instructions. It is not useful otherwise but is 4570 // implemented for compatibility. 4571 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 4572 break; 4573 } else { 4574 // A constant whose bitwise inverse can be used as an immediate 4575 // value in a data-processing instruction. This can be used in GCC 4576 // with a "B" modifier that prints the inverted value, for use with 4577 // BIC and MVN instructions. It is not useful otherwise but is 4578 // implemented for compatibility. 4579 if (ARM_AM::getSOImmVal(~CVal) != -1) 4580 break; 4581 } 4582 return; 4583 4584 case 'L': 4585 if (Subtarget->isThumb1Only()) { 4586 // This must be a constant between -7 and 7, 4587 // for 3-operand ADD/SUB immediate instructions. 4588 if (CVal >= -7 && CVal < 7) 4589 break; 4590 } else if (Subtarget->isThumb2()) { 4591 // A constant whose negation can be used as an immediate value in a 4592 // data-processing instruction. This can be used in GCC with an "n" 4593 // modifier that prints the negated value, for use with SUB 4594 // instructions. It is not useful otherwise but is implemented for 4595 // compatibility. 4596 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 4597 break; 4598 } else { 4599 // A constant whose negation can be used as an immediate value in a 4600 // data-processing instruction. This can be used in GCC with an "n" 4601 // modifier that prints the negated value, for use with SUB 4602 // instructions. It is not useful otherwise but is implemented for 4603 // compatibility. 4604 if (ARM_AM::getSOImmVal(-CVal) != -1) 4605 break; 4606 } 4607 return; 4608 4609 case 'M': 4610 if (Subtarget->isThumb()) { // FIXME thumb2 4611 // This must be a multiple of 4 between 0 and 1020, for 4612 // ADD sp + immediate. 4613 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 4614 break; 4615 } else { 4616 // A power of two or a constant between 0 and 32. This is used in 4617 // GCC for the shift amount on shifted register operands, but it is 4618 // useful in general for any shift amounts. 4619 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 4620 break; 4621 } 4622 return; 4623 4624 case 'N': 4625 if (Subtarget->isThumb()) { // FIXME thumb2 4626 // This must be a constant between 0 and 31, for shift amounts. 4627 if (CVal >= 0 && CVal <= 31) 4628 break; 4629 } 4630 return; 4631 4632 case 'O': 4633 if (Subtarget->isThumb()) { // FIXME thumb2 4634 // This must be a multiple of 4 between -508 and 508, for 4635 // ADD/SUB sp = sp + immediate. 4636 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 4637 break; 4638 } 4639 return; 4640 } 4641 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 4642 break; 4643 } 4644 4645 if (Result.getNode()) { 4646 Ops.push_back(Result); 4647 return; 4648 } 4649 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 4650 Ops, DAG); 4651} 4652 4653bool 4654ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 4655 // The ARM target isn't yet aware of offsets. 4656 return false; 4657} 4658 4659int ARM::getVFPf32Imm(const APFloat &FPImm) { 4660 APInt Imm = FPImm.bitcastToAPInt(); 4661 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 4662 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 4663 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 4664 4665 // We can handle 4 bits of mantissa. 4666 // mantissa = (16+UInt(e:f:g:h))/16. 4667 if (Mantissa & 0x7ffff) 4668 return -1; 4669 Mantissa >>= 19; 4670 if ((Mantissa & 0xf) != Mantissa) 4671 return -1; 4672 4673 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 4674 if (Exp < -3 || Exp > 4) 4675 return -1; 4676 Exp = ((Exp+3) & 0x7) ^ 4; 4677 4678 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 4679} 4680 4681int ARM::getVFPf64Imm(const APFloat &FPImm) { 4682 APInt Imm = FPImm.bitcastToAPInt(); 4683 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 4684 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 4685 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 4686 4687 // We can handle 4 bits of mantissa. 4688 // mantissa = (16+UInt(e:f:g:h))/16. 4689 if (Mantissa & 0xffffffffffffLL) 4690 return -1; 4691 Mantissa >>= 48; 4692 if ((Mantissa & 0xf) != Mantissa) 4693 return -1; 4694 4695 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 4696 if (Exp < -3 || Exp > 4) 4697 return -1; 4698 Exp = ((Exp+3) & 0x7) ^ 4; 4699 4700 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 4701} 4702 4703/// isFPImmLegal - Returns true if the target can instruction select the 4704/// specified FP immediate natively. If false, the legalizer will 4705/// materialize the FP immediate as a load from a constant pool. 4706bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 4707 if (!Subtarget->hasVFP3()) 4708 return false; 4709 if (VT == MVT::f32) 4710 return ARM::getVFPf32Imm(Imm) != -1; 4711 if (VT == MVT::f64) 4712 return ARM::getVFPf64Imm(Imm) != -1; 4713 return false; 4714} 4715