ARMISelLowering.cpp revision 171422980d99d7689b2cac01504b981e87b61905
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ARM.h" 16#include "ARMAddressingModes.h" 17#include "ARMConstantPoolValue.h" 18#include "ARMISelLowering.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMPerfectShuffle.h" 21#include "ARMRegisterInfo.h" 22#include "ARMSubtarget.h" 23#include "ARMTargetMachine.h" 24#include "ARMTargetObjectFile.h" 25#include "llvm/CallingConv.h" 26#include "llvm/Constants.h" 27#include "llvm/Function.h" 28#include "llvm/GlobalValue.h" 29#include "llvm/Instruction.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Type.h" 32#include "llvm/CodeGen/CallingConvLower.h" 33#include "llvm/CodeGen/MachineBasicBlock.h" 34#include "llvm/CodeGen/MachineFrameInfo.h" 35#include "llvm/CodeGen/MachineFunction.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/PseudoSourceValue.h" 39#include "llvm/CodeGen/SelectionDAG.h" 40#include "llvm/Target/TargetOptions.h" 41#include "llvm/ADT/VectorExtras.h" 42#include "llvm/Support/CommandLine.h" 43#include "llvm/Support/ErrorHandling.h" 44#include "llvm/Support/MathExtras.h" 45#include <sstream> 46using namespace llvm; 47 48static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 49 CCValAssign::LocInfo &LocInfo, 50 ISD::ArgFlagsTy &ArgFlags, 51 CCState &State); 52static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 53 CCValAssign::LocInfo &LocInfo, 54 ISD::ArgFlagsTy &ArgFlags, 55 CCState &State); 56static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 57 CCValAssign::LocInfo &LocInfo, 58 ISD::ArgFlagsTy &ArgFlags, 59 CCState &State); 60static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 61 CCValAssign::LocInfo &LocInfo, 62 ISD::ArgFlagsTy &ArgFlags, 63 CCState &State); 64 65void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 66 EVT PromotedBitwiseVT) { 67 if (VT != PromotedLdStVT) { 68 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 69 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 70 PromotedLdStVT.getSimpleVT()); 71 72 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 73 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 74 PromotedLdStVT.getSimpleVT()); 75 } 76 77 EVT ElemTy = VT.getVectorElementType(); 78 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 79 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 80 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 81 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 82 if (ElemTy != MVT::i32) { 83 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 84 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 85 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 86 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 87 } 88 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 89 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 90 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); 91 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 92 if (VT.isInteger()) { 93 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 94 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 95 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 96 } 97 98 // Promote all bit-wise operations. 99 if (VT.isInteger() && VT != PromotedBitwiseVT) { 100 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 101 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 102 PromotedBitwiseVT.getSimpleVT()); 103 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 104 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 105 PromotedBitwiseVT.getSimpleVT()); 106 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 107 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 108 PromotedBitwiseVT.getSimpleVT()); 109 } 110 111 // Neon does not support vector divide/remainder operations. 112 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 113 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 114 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 115 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 116 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 117 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 118} 119 120void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 121 addRegisterClass(VT, ARM::DPRRegisterClass); 122 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 123} 124 125void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 126 addRegisterClass(VT, ARM::QPRRegisterClass); 127 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 128} 129 130static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 131 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 132 return new TargetLoweringObjectFileMachO(); 133 return new ARMElfTargetObjectFile(); 134} 135 136ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 137 : TargetLowering(TM, createTLOF(TM)) { 138 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 139 140 if (Subtarget->isTargetDarwin()) { 141 // Uses VFP for Thumb libfuncs if available. 142 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 143 // Single-precision floating-point arithmetic. 144 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 145 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 146 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 147 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 148 149 // Double-precision floating-point arithmetic. 150 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 151 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 152 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 153 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 154 155 // Single-precision comparisons. 156 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 157 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 158 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 159 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 160 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 161 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 162 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 163 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 164 165 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 166 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 167 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 168 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 169 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 170 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 171 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 172 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 173 174 // Double-precision comparisons. 175 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 176 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 177 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 178 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 179 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 180 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 181 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 182 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 183 184 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 185 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 186 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 187 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 188 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 189 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 190 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 191 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 192 193 // Floating-point to integer conversions. 194 // i64 conversions are done via library routines even when generating VFP 195 // instructions, so use the same ones. 196 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 197 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 198 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 199 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 200 201 // Conversions between floating types. 202 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 203 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 204 205 // Integer to floating-point conversions. 206 // i64 conversions are done via library routines even when generating VFP 207 // instructions, so use the same ones. 208 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 209 // e.g., __floatunsidf vs. __floatunssidfvfp. 210 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 211 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 212 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 213 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 214 } 215 } 216 217 // These libcalls are not available in 32-bit. 218 setLibcallName(RTLIB::SHL_I128, 0); 219 setLibcallName(RTLIB::SRL_I128, 0); 220 setLibcallName(RTLIB::SRA_I128, 0); 221 222 // Libcalls should use the AAPCS base standard ABI, even if hard float 223 // is in effect, as per the ARM RTABI specification, section 4.1.2. 224 if (Subtarget->isAAPCS_ABI()) { 225 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { 226 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), 227 CallingConv::ARM_AAPCS); 228 } 229 } 230 231 if (Subtarget->isThumb1Only()) 232 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 233 else 234 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 235 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 236 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 237 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 238 239 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 240 } 241 242 if (Subtarget->hasNEON()) { 243 addDRTypeForNEON(MVT::v2f32); 244 addDRTypeForNEON(MVT::v8i8); 245 addDRTypeForNEON(MVT::v4i16); 246 addDRTypeForNEON(MVT::v2i32); 247 addDRTypeForNEON(MVT::v1i64); 248 249 addQRTypeForNEON(MVT::v4f32); 250 addQRTypeForNEON(MVT::v2f64); 251 addQRTypeForNEON(MVT::v16i8); 252 addQRTypeForNEON(MVT::v8i16); 253 addQRTypeForNEON(MVT::v4i32); 254 addQRTypeForNEON(MVT::v2i64); 255 256 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 257 // neither Neon nor VFP support any arithmetic operations on it. 258 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 259 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 260 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 261 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 262 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 263 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 264 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 265 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 266 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 267 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 268 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 269 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 270 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 271 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 272 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 273 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 274 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 275 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 276 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 277 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 278 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 279 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 280 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 281 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 282 283 // Neon does not support some operations on v1i64 and v2i64 types. 284 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 285 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 286 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 287 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 288 289 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 290 setTargetDAGCombine(ISD::SHL); 291 setTargetDAGCombine(ISD::SRL); 292 setTargetDAGCombine(ISD::SRA); 293 setTargetDAGCombine(ISD::SIGN_EXTEND); 294 setTargetDAGCombine(ISD::ZERO_EXTEND); 295 setTargetDAGCombine(ISD::ANY_EXTEND); 296 } 297 298 computeRegisterProperties(); 299 300 // ARM does not have f32 extending load. 301 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 302 303 // ARM does not have i1 sign extending load. 304 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 305 306 // ARM supports all 4 flavors of integer indexed load / store. 307 if (!Subtarget->isThumb1Only()) { 308 for (unsigned im = (unsigned)ISD::PRE_INC; 309 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 310 setIndexedLoadAction(im, MVT::i1, Legal); 311 setIndexedLoadAction(im, MVT::i8, Legal); 312 setIndexedLoadAction(im, MVT::i16, Legal); 313 setIndexedLoadAction(im, MVT::i32, Legal); 314 setIndexedStoreAction(im, MVT::i1, Legal); 315 setIndexedStoreAction(im, MVT::i8, Legal); 316 setIndexedStoreAction(im, MVT::i16, Legal); 317 setIndexedStoreAction(im, MVT::i32, Legal); 318 } 319 } 320 321 // i64 operation support. 322 if (Subtarget->isThumb1Only()) { 323 setOperationAction(ISD::MUL, MVT::i64, Expand); 324 setOperationAction(ISD::MULHU, MVT::i32, Expand); 325 setOperationAction(ISD::MULHS, MVT::i32, Expand); 326 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 327 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 328 } else { 329 setOperationAction(ISD::MUL, MVT::i64, Expand); 330 setOperationAction(ISD::MULHU, MVT::i32, Expand); 331 if (!Subtarget->hasV6Ops()) 332 setOperationAction(ISD::MULHS, MVT::i32, Expand); 333 } 334 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 335 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 336 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 337 setOperationAction(ISD::SRL, MVT::i64, Custom); 338 setOperationAction(ISD::SRA, MVT::i64, Custom); 339 340 // ARM does not have ROTL. 341 setOperationAction(ISD::ROTL, MVT::i32, Expand); 342 setOperationAction(ISD::CTTZ, MVT::i32, Expand); 343 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 344 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 345 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 346 347 // Only ARMv6 has BSWAP. 348 if (!Subtarget->hasV6Ops()) 349 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 350 351 // These are expanded into libcalls. 352 setOperationAction(ISD::SDIV, MVT::i32, Expand); 353 setOperationAction(ISD::UDIV, MVT::i32, Expand); 354 setOperationAction(ISD::SREM, MVT::i32, Expand); 355 setOperationAction(ISD::UREM, MVT::i32, Expand); 356 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 357 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 358 359 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 360 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 361 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 362 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 363 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 364 365 // Use the default implementation. 366 setOperationAction(ISD::VASTART, MVT::Other, Custom); 367 setOperationAction(ISD::VAARG, MVT::Other, Expand); 368 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 369 setOperationAction(ISD::VAEND, MVT::Other, Expand); 370 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 371 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 372 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 373 // FIXME: Shouldn't need this, since no register is used, but the legalizer 374 // doesn't yet know how to not do that for SjLj. 375 setExceptionSelectorRegister(ARM::R0); 376 if (Subtarget->isThumb()) 377 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 378 else 379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 380 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 381 382 if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { 383 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 384 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 385 } 386 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 387 388 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) 389 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2. 390 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 391 392 // We want to custom lower some of our intrinsics. 393 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 394 395 setOperationAction(ISD::SETCC, MVT::i32, Expand); 396 setOperationAction(ISD::SETCC, MVT::f32, Expand); 397 setOperationAction(ISD::SETCC, MVT::f64, Expand); 398 setOperationAction(ISD::SELECT, MVT::i32, Expand); 399 setOperationAction(ISD::SELECT, MVT::f32, Expand); 400 setOperationAction(ISD::SELECT, MVT::f64, Expand); 401 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 402 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 403 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 404 405 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 406 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 407 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 408 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 409 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 410 411 // We don't support sin/cos/fmod/copysign/pow 412 setOperationAction(ISD::FSIN, MVT::f64, Expand); 413 setOperationAction(ISD::FSIN, MVT::f32, Expand); 414 setOperationAction(ISD::FCOS, MVT::f32, Expand); 415 setOperationAction(ISD::FCOS, MVT::f64, Expand); 416 setOperationAction(ISD::FREM, MVT::f64, Expand); 417 setOperationAction(ISD::FREM, MVT::f32, Expand); 418 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 419 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 420 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 421 } 422 setOperationAction(ISD::FPOW, MVT::f64, Expand); 423 setOperationAction(ISD::FPOW, MVT::f32, Expand); 424 425 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 426 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 427 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 428 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 429 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 430 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 431 } 432 433 // We have target-specific dag combine patterns for the following nodes: 434 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 435 setTargetDAGCombine(ISD::ADD); 436 setTargetDAGCombine(ISD::SUB); 437 438 setStackPointerRegisterToSaveRestore(ARM::SP); 439 setSchedulingPreference(SchedulingForRegPressure); 440 441 // FIXME: If-converter should use instruction latency to determine 442 // profitability rather than relying on fixed limits. 443 if (Subtarget->getCPUString() == "generic") { 444 // Generic (and overly aggressive) if-conversion limits. 445 setIfCvtBlockSizeLimit(10); 446 setIfCvtDupBlockSizeLimit(2); 447 } else if (Subtarget->hasV6Ops()) { 448 setIfCvtBlockSizeLimit(2); 449 setIfCvtDupBlockSizeLimit(1); 450 } else { 451 setIfCvtBlockSizeLimit(3); 452 setIfCvtDupBlockSizeLimit(2); 453 } 454 455 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 456 // Do not enable CodePlacementOpt for now: it currently runs after the 457 // ARMConstantIslandPass and messes up branch relaxation and placement 458 // of constant islands. 459 // benefitFromCodePlacementOpt = true; 460} 461 462const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 463 switch (Opcode) { 464 default: return 0; 465 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 466 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 467 case ARMISD::CALL: return "ARMISD::CALL"; 468 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 469 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 470 case ARMISD::tCALL: return "ARMISD::tCALL"; 471 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 472 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 473 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 474 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 475 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 476 case ARMISD::CMP: return "ARMISD::CMP"; 477 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 478 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 479 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 480 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 481 case ARMISD::CMOV: return "ARMISD::CMOV"; 482 case ARMISD::CNEG: return "ARMISD::CNEG"; 483 484 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 485 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 486 case ARMISD::SITOF: return "ARMISD::SITOF"; 487 case ARMISD::UITOF: return "ARMISD::UITOF"; 488 489 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 490 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 491 case ARMISD::RRX: return "ARMISD::RRX"; 492 493 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 494 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 495 496 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 497 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 498 499 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 500 501 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 502 503 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 504 case ARMISD::VCGE: return "ARMISD::VCGE"; 505 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 506 case ARMISD::VCGT: return "ARMISD::VCGT"; 507 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 508 case ARMISD::VTST: return "ARMISD::VTST"; 509 510 case ARMISD::VSHL: return "ARMISD::VSHL"; 511 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 512 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 513 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 514 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 515 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 516 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 517 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 518 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 519 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 520 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 521 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 522 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 523 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 524 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 525 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 526 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 527 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 528 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 529 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 530 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 531 case ARMISD::VDUP: return "ARMISD::VDUP"; 532 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 533 case ARMISD::VEXT: return "ARMISD::VEXT"; 534 case ARMISD::VREV64: return "ARMISD::VREV64"; 535 case ARMISD::VREV32: return "ARMISD::VREV32"; 536 case ARMISD::VREV16: return "ARMISD::VREV16"; 537 case ARMISD::VZIP: return "ARMISD::VZIP"; 538 case ARMISD::VUZP: return "ARMISD::VUZP"; 539 case ARMISD::VTRN: return "ARMISD::VTRN"; 540 } 541} 542 543/// getFunctionAlignment - Return the Log2 alignment of this function. 544unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 545 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; 546} 547 548//===----------------------------------------------------------------------===// 549// Lowering Code 550//===----------------------------------------------------------------------===// 551 552/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 553static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 554 switch (CC) { 555 default: llvm_unreachable("Unknown condition code!"); 556 case ISD::SETNE: return ARMCC::NE; 557 case ISD::SETEQ: return ARMCC::EQ; 558 case ISD::SETGT: return ARMCC::GT; 559 case ISD::SETGE: return ARMCC::GE; 560 case ISD::SETLT: return ARMCC::LT; 561 case ISD::SETLE: return ARMCC::LE; 562 case ISD::SETUGT: return ARMCC::HI; 563 case ISD::SETUGE: return ARMCC::HS; 564 case ISD::SETULT: return ARMCC::LO; 565 case ISD::SETULE: return ARMCC::LS; 566 } 567} 568 569/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 570static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 571 ARMCC::CondCodes &CondCode2) { 572 CondCode2 = ARMCC::AL; 573 switch (CC) { 574 default: llvm_unreachable("Unknown FP condition!"); 575 case ISD::SETEQ: 576 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 577 case ISD::SETGT: 578 case ISD::SETOGT: CondCode = ARMCC::GT; break; 579 case ISD::SETGE: 580 case ISD::SETOGE: CondCode = ARMCC::GE; break; 581 case ISD::SETOLT: CondCode = ARMCC::MI; break; 582 case ISD::SETOLE: CondCode = ARMCC::LS; break; 583 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 584 case ISD::SETO: CondCode = ARMCC::VC; break; 585 case ISD::SETUO: CondCode = ARMCC::VS; break; 586 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 587 case ISD::SETUGT: CondCode = ARMCC::HI; break; 588 case ISD::SETUGE: CondCode = ARMCC::PL; break; 589 case ISD::SETLT: 590 case ISD::SETULT: CondCode = ARMCC::LT; break; 591 case ISD::SETLE: 592 case ISD::SETULE: CondCode = ARMCC::LE; break; 593 case ISD::SETNE: 594 case ISD::SETUNE: CondCode = ARMCC::NE; break; 595 } 596} 597 598//===----------------------------------------------------------------------===// 599// Calling Convention Implementation 600//===----------------------------------------------------------------------===// 601 602#include "ARMGenCallingConv.inc" 603 604// APCS f64 is in register pairs, possibly split to stack 605static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 606 CCValAssign::LocInfo &LocInfo, 607 CCState &State, bool CanFail) { 608 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; 609 610 // Try to get the first register. 611 if (unsigned Reg = State.AllocateReg(RegList, 4)) 612 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 613 else { 614 // For the 2nd half of a v2f64, do not fail. 615 if (CanFail) 616 return false; 617 618 // Put the whole thing on the stack. 619 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 620 State.AllocateStack(8, 4), 621 LocVT, LocInfo)); 622 return true; 623 } 624 625 // Try to get the second register. 626 if (unsigned Reg = State.AllocateReg(RegList, 4)) 627 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 628 else 629 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 630 State.AllocateStack(4, 4), 631 LocVT, LocInfo)); 632 return true; 633} 634 635static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 636 CCValAssign::LocInfo &LocInfo, 637 ISD::ArgFlagsTy &ArgFlags, 638 CCState &State) { 639 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 640 return false; 641 if (LocVT == MVT::v2f64 && 642 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 643 return false; 644 return true; // we handled it 645} 646 647// AAPCS f64 is in aligned register pairs 648static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 649 CCValAssign::LocInfo &LocInfo, 650 CCState &State, bool CanFail) { 651 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 652 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 653 654 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 655 if (Reg == 0) { 656 // For the 2nd half of a v2f64, do not just fail. 657 if (CanFail) 658 return false; 659 660 // Put the whole thing on the stack. 661 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 662 State.AllocateStack(8, 8), 663 LocVT, LocInfo)); 664 return true; 665 } 666 667 unsigned i; 668 for (i = 0; i < 2; ++i) 669 if (HiRegList[i] == Reg) 670 break; 671 672 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 673 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 674 LocVT, LocInfo)); 675 return true; 676} 677 678static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 679 CCValAssign::LocInfo &LocInfo, 680 ISD::ArgFlagsTy &ArgFlags, 681 CCState &State) { 682 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 683 return false; 684 if (LocVT == MVT::v2f64 && 685 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 686 return false; 687 return true; // we handled it 688} 689 690static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 691 CCValAssign::LocInfo &LocInfo, CCState &State) { 692 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 693 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 694 695 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 696 if (Reg == 0) 697 return false; // we didn't handle it 698 699 unsigned i; 700 for (i = 0; i < 2; ++i) 701 if (HiRegList[i] == Reg) 702 break; 703 704 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 705 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 706 LocVT, LocInfo)); 707 return true; 708} 709 710static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 711 CCValAssign::LocInfo &LocInfo, 712 ISD::ArgFlagsTy &ArgFlags, 713 CCState &State) { 714 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 715 return false; 716 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 717 return false; 718 return true; // we handled it 719} 720 721static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 722 CCValAssign::LocInfo &LocInfo, 723 ISD::ArgFlagsTy &ArgFlags, 724 CCState &State) { 725 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, 726 State); 727} 728 729/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 730/// given CallingConvention value. 731CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 732 bool Return, 733 bool isVarArg) const { 734 switch (CC) { 735 default: 736 llvm_unreachable("Unsupported calling convention"); 737 case CallingConv::C: 738 case CallingConv::Fast: 739 // Use target triple & subtarget features to do actual dispatch. 740 if (Subtarget->isAAPCS_ABI()) { 741 if (Subtarget->hasVFP2() && 742 FloatABIType == FloatABI::Hard && !isVarArg) 743 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 744 else 745 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 746 } else 747 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 748 case CallingConv::ARM_AAPCS_VFP: 749 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 750 case CallingConv::ARM_AAPCS: 751 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 752 case CallingConv::ARM_APCS: 753 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 754 } 755} 756 757/// LowerCallResult - Lower the result values of a call into the 758/// appropriate copies out of appropriate physical registers. 759SDValue 760ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 761 CallingConv::ID CallConv, bool isVarArg, 762 const SmallVectorImpl<ISD::InputArg> &Ins, 763 DebugLoc dl, SelectionDAG &DAG, 764 SmallVectorImpl<SDValue> &InVals) { 765 766 // Assign locations to each value returned by this call. 767 SmallVector<CCValAssign, 16> RVLocs; 768 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 769 RVLocs, *DAG.getContext()); 770 CCInfo.AnalyzeCallResult(Ins, 771 CCAssignFnForNode(CallConv, /* Return*/ true, 772 isVarArg)); 773 774 // Copy all of the result registers out of their specified physreg. 775 for (unsigned i = 0; i != RVLocs.size(); ++i) { 776 CCValAssign VA = RVLocs[i]; 777 778 SDValue Val; 779 if (VA.needsCustom()) { 780 // Handle f64 or half of a v2f64. 781 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 782 InFlag); 783 Chain = Lo.getValue(1); 784 InFlag = Lo.getValue(2); 785 VA = RVLocs[++i]; // skip ahead to next loc 786 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 787 InFlag); 788 Chain = Hi.getValue(1); 789 InFlag = Hi.getValue(2); 790 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 791 792 if (VA.getLocVT() == MVT::v2f64) { 793 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 794 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 795 DAG.getConstant(0, MVT::i32)); 796 797 VA = RVLocs[++i]; // skip ahead to next loc 798 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 799 Chain = Lo.getValue(1); 800 InFlag = Lo.getValue(2); 801 VA = RVLocs[++i]; // skip ahead to next loc 802 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 803 Chain = Hi.getValue(1); 804 InFlag = Hi.getValue(2); 805 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 806 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 807 DAG.getConstant(1, MVT::i32)); 808 } 809 } else { 810 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 811 InFlag); 812 Chain = Val.getValue(1); 813 InFlag = Val.getValue(2); 814 } 815 816 switch (VA.getLocInfo()) { 817 default: llvm_unreachable("Unknown loc info!"); 818 case CCValAssign::Full: break; 819 case CCValAssign::BCvt: 820 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 821 break; 822 } 823 824 InVals.push_back(Val); 825 } 826 827 return Chain; 828} 829 830/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 831/// by "Src" to address "Dst" of size "Size". Alignment information is 832/// specified by the specific parameter attribute. The copy will be passed as 833/// a byval function parameter. 834/// Sometimes what we are copying is the end of a larger object, the part that 835/// does not fit in registers. 836static SDValue 837CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 838 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 839 DebugLoc dl) { 840 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 841 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 842 /*AlwaysInline=*/false, NULL, 0, NULL, 0); 843} 844 845/// LowerMemOpCallTo - Store the argument to the stack. 846SDValue 847ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 848 SDValue StackPtr, SDValue Arg, 849 DebugLoc dl, SelectionDAG &DAG, 850 const CCValAssign &VA, 851 ISD::ArgFlagsTy Flags) { 852 unsigned LocMemOffset = VA.getLocMemOffset(); 853 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 854 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 855 if (Flags.isByVal()) { 856 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 857 } 858 return DAG.getStore(Chain, dl, Arg, PtrOff, 859 PseudoSourceValue::getStack(), LocMemOffset); 860} 861 862void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 863 SDValue Chain, SDValue &Arg, 864 RegsToPassVector &RegsToPass, 865 CCValAssign &VA, CCValAssign &NextVA, 866 SDValue &StackPtr, 867 SmallVector<SDValue, 8> &MemOpChains, 868 ISD::ArgFlagsTy Flags) { 869 870 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 871 DAG.getVTList(MVT::i32, MVT::i32), Arg); 872 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 873 874 if (NextVA.isRegLoc()) 875 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 876 else { 877 assert(NextVA.isMemLoc()); 878 if (StackPtr.getNode() == 0) 879 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 880 881 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 882 dl, DAG, NextVA, 883 Flags)); 884 } 885} 886 887/// LowerCall - Lowering a call into a callseq_start <- 888/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 889/// nodes. 890SDValue 891ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 892 CallingConv::ID CallConv, bool isVarArg, 893 bool isTailCall, 894 const SmallVectorImpl<ISD::OutputArg> &Outs, 895 const SmallVectorImpl<ISD::InputArg> &Ins, 896 DebugLoc dl, SelectionDAG &DAG, 897 SmallVectorImpl<SDValue> &InVals) { 898 899 // Analyze operands of the call, assigning locations to each operand. 900 SmallVector<CCValAssign, 16> ArgLocs; 901 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 902 *DAG.getContext()); 903 CCInfo.AnalyzeCallOperands(Outs, 904 CCAssignFnForNode(CallConv, /* Return*/ false, 905 isVarArg)); 906 907 // Get a count of how many bytes are to be pushed on the stack. 908 unsigned NumBytes = CCInfo.getNextStackOffset(); 909 910 // Adjust the stack pointer for the new arguments... 911 // These operations are automatically eliminated by the prolog/epilog pass 912 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 913 914 SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); 915 916 RegsToPassVector RegsToPass; 917 SmallVector<SDValue, 8> MemOpChains; 918 919 // Walk the register/memloc assignments, inserting copies/loads. In the case 920 // of tail call optimization, arguments are handled later. 921 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 922 i != e; 923 ++i, ++realArgIdx) { 924 CCValAssign &VA = ArgLocs[i]; 925 SDValue Arg = Outs[realArgIdx].Val; 926 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 927 928 // Promote the value if needed. 929 switch (VA.getLocInfo()) { 930 default: llvm_unreachable("Unknown loc info!"); 931 case CCValAssign::Full: break; 932 case CCValAssign::SExt: 933 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 934 break; 935 case CCValAssign::ZExt: 936 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 937 break; 938 case CCValAssign::AExt: 939 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 940 break; 941 case CCValAssign::BCvt: 942 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 943 break; 944 } 945 946 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 947 if (VA.needsCustom()) { 948 if (VA.getLocVT() == MVT::v2f64) { 949 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 950 DAG.getConstant(0, MVT::i32)); 951 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 952 DAG.getConstant(1, MVT::i32)); 953 954 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 955 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 956 957 VA = ArgLocs[++i]; // skip ahead to next loc 958 if (VA.isRegLoc()) { 959 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 960 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 961 } else { 962 assert(VA.isMemLoc()); 963 if (StackPtr.getNode() == 0) 964 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 965 966 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 967 dl, DAG, VA, Flags)); 968 } 969 } else { 970 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 971 StackPtr, MemOpChains, Flags); 972 } 973 } else if (VA.isRegLoc()) { 974 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 975 } else { 976 assert(VA.isMemLoc()); 977 if (StackPtr.getNode() == 0) 978 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 979 980 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 981 dl, DAG, VA, Flags)); 982 } 983 } 984 985 if (!MemOpChains.empty()) 986 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 987 &MemOpChains[0], MemOpChains.size()); 988 989 // Build a sequence of copy-to-reg nodes chained together with token chain 990 // and flag operands which copy the outgoing args into the appropriate regs. 991 SDValue InFlag; 992 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 993 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 994 RegsToPass[i].second, InFlag); 995 InFlag = Chain.getValue(1); 996 } 997 998 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 999 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1000 // node so that legalize doesn't hack it. 1001 bool isDirect = false; 1002 bool isARMFunc = false; 1003 bool isLocalARMFunc = false; 1004 MachineFunction &MF = DAG.getMachineFunction(); 1005 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1006 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1007 GlobalValue *GV = G->getGlobal(); 1008 isDirect = true; 1009 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1010 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1011 getTargetMachine().getRelocationModel() != Reloc::Static; 1012 isARMFunc = !Subtarget->isThumb() || isStub; 1013 // ARM call to a local ARM function is predicable. 1014 isLocalARMFunc = !Subtarget->isThumb() && !isExt; 1015 // tBX takes a register source operand. 1016 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1017 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1018 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1019 ARMPCLabelIndex, 1020 ARMCP::CPValue, 4); 1021 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1022 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1023 Callee = DAG.getLoad(getPointerTy(), dl, 1024 DAG.getEntryNode(), CPAddr, 1025 PseudoSourceValue::getConstantPool(), 0); 1026 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1027 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1028 getPointerTy(), Callee, PICLabel); 1029 } else 1030 Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); 1031 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1032 isDirect = true; 1033 bool isStub = Subtarget->isTargetDarwin() && 1034 getTargetMachine().getRelocationModel() != Reloc::Static; 1035 isARMFunc = !Subtarget->isThumb() || isStub; 1036 // tBX takes a register source operand. 1037 const char *Sym = S->getSymbol(); 1038 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1039 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1040 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1041 Sym, ARMPCLabelIndex, 4); 1042 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1043 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1044 Callee = DAG.getLoad(getPointerTy(), dl, 1045 DAG.getEntryNode(), CPAddr, 1046 PseudoSourceValue::getConstantPool(), 0); 1047 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1048 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1049 getPointerTy(), Callee, PICLabel); 1050 } else 1051 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 1052 } 1053 1054 // FIXME: handle tail calls differently. 1055 unsigned CallOpc; 1056 if (Subtarget->isThumb()) { 1057 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1058 CallOpc = ARMISD::CALL_NOLINK; 1059 else 1060 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1061 } else { 1062 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1063 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1064 : ARMISD::CALL_NOLINK; 1065 } 1066 if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { 1067 // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK 1068 Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); 1069 InFlag = Chain.getValue(1); 1070 } 1071 1072 std::vector<SDValue> Ops; 1073 Ops.push_back(Chain); 1074 Ops.push_back(Callee); 1075 1076 // Add argument registers to the end of the list so that they are known live 1077 // into the call. 1078 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1079 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1080 RegsToPass[i].second.getValueType())); 1081 1082 if (InFlag.getNode()) 1083 Ops.push_back(InFlag); 1084 // Returns a chain and a flag for retval copy to use. 1085 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), 1086 &Ops[0], Ops.size()); 1087 InFlag = Chain.getValue(1); 1088 1089 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1090 DAG.getIntPtrConstant(0, true), InFlag); 1091 if (!Ins.empty()) 1092 InFlag = Chain.getValue(1); 1093 1094 // Handle result values, copying them out of physregs into vregs that we 1095 // return. 1096 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1097 dl, DAG, InVals); 1098} 1099 1100SDValue 1101ARMTargetLowering::LowerReturn(SDValue Chain, 1102 CallingConv::ID CallConv, bool isVarArg, 1103 const SmallVectorImpl<ISD::OutputArg> &Outs, 1104 DebugLoc dl, SelectionDAG &DAG) { 1105 1106 // CCValAssign - represent the assignment of the return value to a location. 1107 SmallVector<CCValAssign, 16> RVLocs; 1108 1109 // CCState - Info about the registers and stack slots. 1110 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1111 *DAG.getContext()); 1112 1113 // Analyze outgoing return values. 1114 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1115 isVarArg)); 1116 1117 // If this is the first return lowered for this function, add 1118 // the regs to the liveout set for the function. 1119 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1120 for (unsigned i = 0; i != RVLocs.size(); ++i) 1121 if (RVLocs[i].isRegLoc()) 1122 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1123 } 1124 1125 SDValue Flag; 1126 1127 // Copy the result values into the output registers. 1128 for (unsigned i = 0, realRVLocIdx = 0; 1129 i != RVLocs.size(); 1130 ++i, ++realRVLocIdx) { 1131 CCValAssign &VA = RVLocs[i]; 1132 assert(VA.isRegLoc() && "Can only return in registers!"); 1133 1134 SDValue Arg = Outs[realRVLocIdx].Val; 1135 1136 switch (VA.getLocInfo()) { 1137 default: llvm_unreachable("Unknown loc info!"); 1138 case CCValAssign::Full: break; 1139 case CCValAssign::BCvt: 1140 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1141 break; 1142 } 1143 1144 if (VA.needsCustom()) { 1145 if (VA.getLocVT() == MVT::v2f64) { 1146 // Extract the first half and return it in two registers. 1147 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1148 DAG.getConstant(0, MVT::i32)); 1149 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1150 DAG.getVTList(MVT::i32, MVT::i32), Half); 1151 1152 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1153 Flag = Chain.getValue(1); 1154 VA = RVLocs[++i]; // skip ahead to next loc 1155 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1156 HalfGPRs.getValue(1), Flag); 1157 Flag = Chain.getValue(1); 1158 VA = RVLocs[++i]; // skip ahead to next loc 1159 1160 // Extract the 2nd half and fall through to handle it as an f64 value. 1161 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1162 DAG.getConstant(1, MVT::i32)); 1163 } 1164 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1165 // available. 1166 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1167 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1168 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1169 Flag = Chain.getValue(1); 1170 VA = RVLocs[++i]; // skip ahead to next loc 1171 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1172 Flag); 1173 } else 1174 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1175 1176 // Guarantee that all emitted copies are 1177 // stuck together, avoiding something bad. 1178 Flag = Chain.getValue(1); 1179 } 1180 1181 SDValue result; 1182 if (Flag.getNode()) 1183 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1184 else // Return Void 1185 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1186 1187 return result; 1188} 1189 1190// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1191// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1192// one of the above mentioned nodes. It has to be wrapped because otherwise 1193// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1194// be used to form addressing mode. These wrapped nodes will be selected 1195// into MOVi. 1196static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1197 EVT PtrVT = Op.getValueType(); 1198 // FIXME there is no actual debug info here 1199 DebugLoc dl = Op.getDebugLoc(); 1200 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1201 SDValue Res; 1202 if (CP->isMachineConstantPoolEntry()) 1203 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1204 CP->getAlignment()); 1205 else 1206 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1207 CP->getAlignment()); 1208 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1209} 1210 1211SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { 1212 MachineFunction &MF = DAG.getMachineFunction(); 1213 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1214 unsigned ARMPCLabelIndex = 0; 1215 DebugLoc DL = Op.getDebugLoc(); 1216 EVT PtrVT = getPointerTy(); 1217 BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1218 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1219 SDValue CPAddr; 1220 if (RelocM == Reloc::Static) { 1221 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1222 } else { 1223 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1224 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1225 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1226 ARMCP::CPBlockAddress, 1227 PCAdj); 1228 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1229 } 1230 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1231 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1232 PseudoSourceValue::getConstantPool(), 0); 1233 if (RelocM == Reloc::Static) 1234 return Result; 1235 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1236 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1237} 1238 1239// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1240SDValue 1241ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1242 SelectionDAG &DAG) { 1243 DebugLoc dl = GA->getDebugLoc(); 1244 EVT PtrVT = getPointerTy(); 1245 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1246 MachineFunction &MF = DAG.getMachineFunction(); 1247 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1248 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1249 ARMConstantPoolValue *CPV = 1250 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1251 ARMCP::CPValue, PCAdj, "tlsgd", true); 1252 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1253 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1254 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1255 PseudoSourceValue::getConstantPool(), 0); 1256 SDValue Chain = Argument.getValue(1); 1257 1258 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1259 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1260 1261 // call __tls_get_addr. 1262 ArgListTy Args; 1263 ArgListEntry Entry; 1264 Entry.Node = Argument; 1265 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1266 Args.push_back(Entry); 1267 // FIXME: is there useful debug info available here? 1268 std::pair<SDValue, SDValue> CallResult = 1269 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1270 false, false, false, false, 1271 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1272 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1273 return CallResult.first; 1274} 1275 1276// Lower ISD::GlobalTLSAddress using the "initial exec" or 1277// "local exec" model. 1278SDValue 1279ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1280 SelectionDAG &DAG) { 1281 GlobalValue *GV = GA->getGlobal(); 1282 DebugLoc dl = GA->getDebugLoc(); 1283 SDValue Offset; 1284 SDValue Chain = DAG.getEntryNode(); 1285 EVT PtrVT = getPointerTy(); 1286 // Get the Thread Pointer 1287 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1288 1289 if (GV->isDeclaration()) { 1290 MachineFunction &MF = DAG.getMachineFunction(); 1291 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1292 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1293 // Initial exec model. 1294 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1295 ARMConstantPoolValue *CPV = 1296 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1297 ARMCP::CPValue, PCAdj, "gottpoff", true); 1298 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1299 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1300 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1301 PseudoSourceValue::getConstantPool(), 0); 1302 Chain = Offset.getValue(1); 1303 1304 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1305 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1306 1307 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1308 PseudoSourceValue::getConstantPool(), 0); 1309 } else { 1310 // local exec model 1311 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1312 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1313 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1314 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1315 PseudoSourceValue::getConstantPool(), 0); 1316 } 1317 1318 // The address of the thread local variable is the add of the thread 1319 // pointer with the offset of the variable. 1320 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1321} 1322 1323SDValue 1324ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 1325 // TODO: implement the "local dynamic" model 1326 assert(Subtarget->isTargetELF() && 1327 "TLS not implemented for non-ELF targets"); 1328 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1329 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1330 // otherwise use the "Local Exec" TLS Model 1331 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1332 return LowerToTLSGeneralDynamicModel(GA, DAG); 1333 else 1334 return LowerToTLSExecModels(GA, DAG); 1335} 1336 1337SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1338 SelectionDAG &DAG) { 1339 EVT PtrVT = getPointerTy(); 1340 DebugLoc dl = Op.getDebugLoc(); 1341 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1342 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1343 if (RelocM == Reloc::PIC_) { 1344 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1345 ARMConstantPoolValue *CPV = 1346 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1347 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1348 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1349 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1350 CPAddr, 1351 PseudoSourceValue::getConstantPool(), 0); 1352 SDValue Chain = Result.getValue(1); 1353 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1354 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1355 if (!UseGOTOFF) 1356 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1357 PseudoSourceValue::getGOT(), 0); 1358 return Result; 1359 } else { 1360 // If we have T2 ops, we can materialize the address directly via movt/movw 1361 // pair. This is always cheaper. 1362 if (Subtarget->useMovt()) { 1363 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1364 DAG.getTargetGlobalAddress(GV, PtrVT)); 1365 } else { 1366 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1367 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1368 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1369 PseudoSourceValue::getConstantPool(), 0); 1370 } 1371 } 1372} 1373 1374SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1375 SelectionDAG &DAG) { 1376 MachineFunction &MF = DAG.getMachineFunction(); 1377 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1378 unsigned ARMPCLabelIndex = 0; 1379 EVT PtrVT = getPointerTy(); 1380 DebugLoc dl = Op.getDebugLoc(); 1381 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1382 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1383 SDValue CPAddr; 1384 if (RelocM == Reloc::Static) 1385 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1386 else { 1387 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1388 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1389 ARMConstantPoolValue *CPV = 1390 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1391 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1392 } 1393 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1394 1395 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1396 PseudoSourceValue::getConstantPool(), 0); 1397 SDValue Chain = Result.getValue(1); 1398 1399 if (RelocM == Reloc::PIC_) { 1400 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1401 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1402 } 1403 1404 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1405 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1406 PseudoSourceValue::getGOT(), 0); 1407 1408 return Result; 1409} 1410 1411SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1412 SelectionDAG &DAG){ 1413 assert(Subtarget->isTargetELF() && 1414 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1415 MachineFunction &MF = DAG.getMachineFunction(); 1416 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1417 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1418 EVT PtrVT = getPointerTy(); 1419 DebugLoc dl = Op.getDebugLoc(); 1420 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1421 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1422 "_GLOBAL_OFFSET_TABLE_", 1423 ARMPCLabelIndex, PCAdj); 1424 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1425 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1426 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1427 PseudoSourceValue::getConstantPool(), 0); 1428 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1429 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1430} 1431 1432SDValue 1433ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { 1434 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1435 DebugLoc dl = Op.getDebugLoc(); 1436 switch (IntNo) { 1437 default: return SDValue(); // Don't custom lower most intrinsics. 1438 case Intrinsic::arm_thread_pointer: { 1439 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1440 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1441 } 1442 case Intrinsic::eh_sjlj_lsda: { 1443 MachineFunction &MF = DAG.getMachineFunction(); 1444 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1445 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1446 EVT PtrVT = getPointerTy(); 1447 DebugLoc dl = Op.getDebugLoc(); 1448 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1449 SDValue CPAddr; 1450 unsigned PCAdj = (RelocM != Reloc::PIC_) 1451 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1452 ARMConstantPoolValue *CPV = 1453 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1454 ARMCP::CPLSDA, PCAdj); 1455 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1456 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1457 SDValue Result = 1458 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1459 PseudoSourceValue::getConstantPool(), 0); 1460 SDValue Chain = Result.getValue(1); 1461 1462 if (RelocM == Reloc::PIC_) { 1463 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1464 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1465 } 1466 return Result; 1467 } 1468 case Intrinsic::eh_sjlj_setjmp: 1469 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1)); 1470 } 1471} 1472 1473static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, 1474 unsigned VarArgsFrameIndex) { 1475 // vastart just stores the address of the VarArgsFrameIndex slot into the 1476 // memory location argument. 1477 DebugLoc dl = Op.getDebugLoc(); 1478 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1479 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1480 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1481 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); 1482} 1483 1484SDValue 1485ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { 1486 SDNode *Node = Op.getNode(); 1487 DebugLoc dl = Node->getDebugLoc(); 1488 EVT VT = Node->getValueType(0); 1489 SDValue Chain = Op.getOperand(0); 1490 SDValue Size = Op.getOperand(1); 1491 SDValue Align = Op.getOperand(2); 1492 1493 // Chain the dynamic stack allocation so that it doesn't modify the stack 1494 // pointer when other instructions are using the stack. 1495 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); 1496 1497 unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue(); 1498 unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); 1499 if (AlignVal > StackAlign) 1500 // Do this now since selection pass cannot introduce new target 1501 // independent node. 1502 Align = DAG.getConstant(-(uint64_t)AlignVal, VT); 1503 1504 // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up 1505 // using a "add r, sp, r" instead. Negate the size now so we don't have to 1506 // do even more horrible hack later. 1507 MachineFunction &MF = DAG.getMachineFunction(); 1508 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1509 if (AFI->isThumb1OnlyFunction()) { 1510 bool Negate = true; 1511 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size); 1512 if (C) { 1513 uint32_t Val = C->getZExtValue(); 1514 if (Val <= 508 && ((Val & 3) == 0)) 1515 Negate = false; 1516 } 1517 if (Negate) 1518 Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); 1519 } 1520 1521 SDVTList VTList = DAG.getVTList(VT, MVT::Other); 1522 SDValue Ops1[] = { Chain, Size, Align }; 1523 SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); 1524 Chain = Res.getValue(1); 1525 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), 1526 DAG.getIntPtrConstant(0, true), SDValue()); 1527 SDValue Ops2[] = { Res, Chain }; 1528 return DAG.getMergeValues(Ops2, 2, dl); 1529} 1530 1531SDValue 1532ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 1533 SDValue &Root, SelectionDAG &DAG, 1534 DebugLoc dl) { 1535 MachineFunction &MF = DAG.getMachineFunction(); 1536 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1537 1538 TargetRegisterClass *RC; 1539 if (AFI->isThumb1OnlyFunction()) 1540 RC = ARM::tGPRRegisterClass; 1541 else 1542 RC = ARM::GPRRegisterClass; 1543 1544 // Transform the arguments stored in physical registers into virtual ones. 1545 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1546 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 1547 1548 SDValue ArgValue2; 1549 if (NextVA.isMemLoc()) { 1550 unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; 1551 MachineFrameInfo *MFI = MF.getFrameInfo(); 1552 int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), 1553 true, false); 1554 1555 // Create load node to retrieve arguments from the stack. 1556 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1557 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 1558 PseudoSourceValue::getFixedStack(FI), 0); 1559 } else { 1560 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 1561 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 1562 } 1563 1564 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 1565} 1566 1567SDValue 1568ARMTargetLowering::LowerFormalArguments(SDValue Chain, 1569 CallingConv::ID CallConv, bool isVarArg, 1570 const SmallVectorImpl<ISD::InputArg> 1571 &Ins, 1572 DebugLoc dl, SelectionDAG &DAG, 1573 SmallVectorImpl<SDValue> &InVals) { 1574 1575 MachineFunction &MF = DAG.getMachineFunction(); 1576 MachineFrameInfo *MFI = MF.getFrameInfo(); 1577 1578 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1579 1580 // Assign locations to all of the incoming arguments. 1581 SmallVector<CCValAssign, 16> ArgLocs; 1582 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1583 *DAG.getContext()); 1584 CCInfo.AnalyzeFormalArguments(Ins, 1585 CCAssignFnForNode(CallConv, /* Return*/ false, 1586 isVarArg)); 1587 1588 SmallVector<SDValue, 16> ArgValues; 1589 1590 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1591 CCValAssign &VA = ArgLocs[i]; 1592 1593 // Arguments stored in registers. 1594 if (VA.isRegLoc()) { 1595 EVT RegVT = VA.getLocVT(); 1596 1597 SDValue ArgValue; 1598 if (VA.needsCustom()) { 1599 // f64 and vector types are split up into multiple registers or 1600 // combinations of registers and stack slots. 1601 RegVT = MVT::i32; 1602 1603 if (VA.getLocVT() == MVT::v2f64) { 1604 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 1605 Chain, DAG, dl); 1606 VA = ArgLocs[++i]; // skip ahead to next loc 1607 SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 1608 Chain, DAG, dl); 1609 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1610 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 1611 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 1612 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 1613 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 1614 } else 1615 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 1616 1617 } else { 1618 TargetRegisterClass *RC; 1619 1620 if (RegVT == MVT::f32) 1621 RC = ARM::SPRRegisterClass; 1622 else if (RegVT == MVT::f64) 1623 RC = ARM::DPRRegisterClass; 1624 else if (RegVT == MVT::v2f64) 1625 RC = ARM::QPRRegisterClass; 1626 else if (RegVT == MVT::i32) 1627 RC = (AFI->isThumb1OnlyFunction() ? 1628 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 1629 else 1630 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 1631 1632 // Transform the arguments in physical registers into virtual ones. 1633 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1634 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1635 } 1636 1637 // If this is an 8 or 16-bit value, it is really passed promoted 1638 // to 32 bits. Insert an assert[sz]ext to capture this, then 1639 // truncate to the right size. 1640 switch (VA.getLocInfo()) { 1641 default: llvm_unreachable("Unknown loc info!"); 1642 case CCValAssign::Full: break; 1643 case CCValAssign::BCvt: 1644 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 1645 break; 1646 case CCValAssign::SExt: 1647 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1648 DAG.getValueType(VA.getValVT())); 1649 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1650 break; 1651 case CCValAssign::ZExt: 1652 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1653 DAG.getValueType(VA.getValVT())); 1654 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1655 break; 1656 } 1657 1658 InVals.push_back(ArgValue); 1659 1660 } else { // VA.isRegLoc() 1661 1662 // sanity check 1663 assert(VA.isMemLoc()); 1664 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 1665 1666 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 1667 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 1668 true, false); 1669 1670 // Create load nodes to retrieve arguments from the stack. 1671 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1672 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 1673 PseudoSourceValue::getFixedStack(FI), 0)); 1674 } 1675 } 1676 1677 // varargs 1678 if (isVarArg) { 1679 static const unsigned GPRArgRegs[] = { 1680 ARM::R0, ARM::R1, ARM::R2, ARM::R3 1681 }; 1682 1683 unsigned NumGPRs = CCInfo.getFirstUnallocated 1684 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 1685 1686 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 1687 unsigned VARegSize = (4 - NumGPRs) * 4; 1688 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 1689 unsigned ArgOffset = CCInfo.getNextStackOffset(); 1690 if (VARegSaveSize) { 1691 // If this function is vararg, store any remaining integer argument regs 1692 // to their spots on the stack so that they may be loaded by deferencing 1693 // the result of va_next. 1694 AFI->setVarArgsRegSaveSize(VARegSaveSize); 1695 VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + 1696 VARegSaveSize - VARegSize, 1697 true, false); 1698 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 1699 1700 SmallVector<SDValue, 4> MemOps; 1701 for (; NumGPRs < 4; ++NumGPRs) { 1702 TargetRegisterClass *RC; 1703 if (AFI->isThumb1OnlyFunction()) 1704 RC = ARM::tGPRRegisterClass; 1705 else 1706 RC = ARM::GPRRegisterClass; 1707 1708 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 1709 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 1710 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 1711 PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0); 1712 MemOps.push_back(Store); 1713 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 1714 DAG.getConstant(4, getPointerTy())); 1715 } 1716 if (!MemOps.empty()) 1717 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1718 &MemOps[0], MemOps.size()); 1719 } else 1720 // This will point to the next argument passed via stack. 1721 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); 1722 } 1723 1724 return Chain; 1725} 1726 1727/// isFloatingPointZero - Return true if this is +0.0. 1728static bool isFloatingPointZero(SDValue Op) { 1729 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 1730 return CFP->getValueAPF().isPosZero(); 1731 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 1732 // Maybe this has already been legalized into the constant pool? 1733 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 1734 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 1735 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 1736 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 1737 return CFP->getValueAPF().isPosZero(); 1738 } 1739 } 1740 return false; 1741} 1742 1743/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 1744/// the given operands. 1745SDValue 1746ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 1747 SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { 1748 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 1749 unsigned C = RHSC->getZExtValue(); 1750 if (!isLegalICmpImmediate(C)) { 1751 // Constant does not fit, try adjusting it by one? 1752 switch (CC) { 1753 default: break; 1754 case ISD::SETLT: 1755 case ISD::SETGE: 1756 if (isLegalICmpImmediate(C-1)) { 1757 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 1758 RHS = DAG.getConstant(C-1, MVT::i32); 1759 } 1760 break; 1761 case ISD::SETULT: 1762 case ISD::SETUGE: 1763 if (C > 0 && isLegalICmpImmediate(C-1)) { 1764 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 1765 RHS = DAG.getConstant(C-1, MVT::i32); 1766 } 1767 break; 1768 case ISD::SETLE: 1769 case ISD::SETGT: 1770 if (isLegalICmpImmediate(C+1)) { 1771 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 1772 RHS = DAG.getConstant(C+1, MVT::i32); 1773 } 1774 break; 1775 case ISD::SETULE: 1776 case ISD::SETUGT: 1777 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { 1778 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 1779 RHS = DAG.getConstant(C+1, MVT::i32); 1780 } 1781 break; 1782 } 1783 } 1784 } 1785 1786 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 1787 ARMISD::NodeType CompareType; 1788 switch (CondCode) { 1789 default: 1790 CompareType = ARMISD::CMP; 1791 break; 1792 case ARMCC::EQ: 1793 case ARMCC::NE: 1794 // Uses only Z Flag 1795 CompareType = ARMISD::CMPZ; 1796 break; 1797 } 1798 ARMCC = DAG.getConstant(CondCode, MVT::i32); 1799 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 1800} 1801 1802/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 1803static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 1804 DebugLoc dl) { 1805 SDValue Cmp; 1806 if (!isFloatingPointZero(RHS)) 1807 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 1808 else 1809 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 1810 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 1811} 1812 1813SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { 1814 EVT VT = Op.getValueType(); 1815 SDValue LHS = Op.getOperand(0); 1816 SDValue RHS = Op.getOperand(1); 1817 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 1818 SDValue TrueVal = Op.getOperand(2); 1819 SDValue FalseVal = Op.getOperand(3); 1820 DebugLoc dl = Op.getDebugLoc(); 1821 1822 if (LHS.getValueType() == MVT::i32) { 1823 SDValue ARMCC; 1824 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1825 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); 1826 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); 1827 } 1828 1829 ARMCC::CondCodes CondCode, CondCode2; 1830 FPCCToARMCC(CC, CondCode, CondCode2); 1831 1832 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); 1833 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1834 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 1835 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 1836 ARMCC, CCR, Cmp); 1837 if (CondCode2 != ARMCC::AL) { 1838 SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); 1839 // FIXME: Needs another CMP because flag can have but one use. 1840 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 1841 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 1842 Result, TrueVal, ARMCC2, CCR, Cmp2); 1843 } 1844 return Result; 1845} 1846 1847SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { 1848 SDValue Chain = Op.getOperand(0); 1849 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 1850 SDValue LHS = Op.getOperand(2); 1851 SDValue RHS = Op.getOperand(3); 1852 SDValue Dest = Op.getOperand(4); 1853 DebugLoc dl = Op.getDebugLoc(); 1854 1855 if (LHS.getValueType() == MVT::i32) { 1856 SDValue ARMCC; 1857 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1858 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); 1859 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 1860 Chain, Dest, ARMCC, CCR,Cmp); 1861 } 1862 1863 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 1864 ARMCC::CondCodes CondCode, CondCode2; 1865 FPCCToARMCC(CC, CondCode, CondCode2); 1866 1867 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 1868 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); 1869 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1870 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 1871 SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; 1872 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 1873 if (CondCode2 != ARMCC::AL) { 1874 ARMCC = DAG.getConstant(CondCode2, MVT::i32); 1875 SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; 1876 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 1877 } 1878 return Res; 1879} 1880 1881SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { 1882 SDValue Chain = Op.getOperand(0); 1883 SDValue Table = Op.getOperand(1); 1884 SDValue Index = Op.getOperand(2); 1885 DebugLoc dl = Op.getDebugLoc(); 1886 1887 EVT PTy = getPointerTy(); 1888 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 1889 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 1890 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 1891 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 1892 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 1893 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 1894 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 1895 if (Subtarget->isThumb2()) { 1896 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 1897 // which does another jump to the destination. This also makes it easier 1898 // to translate it to TBB / TBH later. 1899 // FIXME: This might not work if the function is extremely large. 1900 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 1901 Addr, Op.getOperand(2), JTI, UId); 1902 } 1903 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 1904 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 1905 PseudoSourceValue::getJumpTable(), 0); 1906 Chain = Addr.getValue(1); 1907 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 1908 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 1909 } else { 1910 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 1911 PseudoSourceValue::getJumpTable(), 0); 1912 Chain = Addr.getValue(1); 1913 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 1914 } 1915} 1916 1917static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 1918 DebugLoc dl = Op.getDebugLoc(); 1919 unsigned Opc = 1920 Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI; 1921 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 1922 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 1923} 1924 1925static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 1926 EVT VT = Op.getValueType(); 1927 DebugLoc dl = Op.getDebugLoc(); 1928 unsigned Opc = 1929 Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; 1930 1931 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 1932 return DAG.getNode(Opc, dl, VT, Op); 1933} 1934 1935static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 1936 // Implement fcopysign with a fabs and a conditional fneg. 1937 SDValue Tmp0 = Op.getOperand(0); 1938 SDValue Tmp1 = Op.getOperand(1); 1939 DebugLoc dl = Op.getDebugLoc(); 1940 EVT VT = Op.getValueType(); 1941 EVT SrcVT = Tmp1.getValueType(); 1942 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 1943 SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); 1944 SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); 1945 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 1946 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); 1947} 1948 1949SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 1950 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 1951 MFI->setFrameAddressIsTaken(true); 1952 EVT VT = Op.getValueType(); 1953 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 1954 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1955 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 1956 ? ARM::R7 : ARM::R11; 1957 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 1958 while (Depth--) 1959 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); 1960 return FrameAddr; 1961} 1962 1963SDValue 1964ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, 1965 SDValue Chain, 1966 SDValue Dst, SDValue Src, 1967 SDValue Size, unsigned Align, 1968 bool AlwaysInline, 1969 const Value *DstSV, uint64_t DstSVOff, 1970 const Value *SrcSV, uint64_t SrcSVOff){ 1971 // Do repeated 4-byte loads and stores. To be improved. 1972 // This requires 4-byte alignment. 1973 if ((Align & 3) != 0) 1974 return SDValue(); 1975 // This requires the copy size to be a constant, preferrably 1976 // within a subtarget-specific limit. 1977 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 1978 if (!ConstantSize) 1979 return SDValue(); 1980 uint64_t SizeVal = ConstantSize->getZExtValue(); 1981 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 1982 return SDValue(); 1983 1984 unsigned BytesLeft = SizeVal & 3; 1985 unsigned NumMemOps = SizeVal >> 2; 1986 unsigned EmittedNumMemOps = 0; 1987 EVT VT = MVT::i32; 1988 unsigned VTSize = 4; 1989 unsigned i = 0; 1990 const unsigned MAX_LOADS_IN_LDM = 6; 1991 SDValue TFOps[MAX_LOADS_IN_LDM]; 1992 SDValue Loads[MAX_LOADS_IN_LDM]; 1993 uint64_t SrcOff = 0, DstOff = 0; 1994 1995 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the 1996 // same number of stores. The loads and stores will get combined into 1997 // ldm/stm later on. 1998 while (EmittedNumMemOps < NumMemOps) { 1999 for (i = 0; 2000 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 2001 Loads[i] = DAG.getLoad(VT, dl, Chain, 2002 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 2003 DAG.getConstant(SrcOff, MVT::i32)), 2004 SrcSV, SrcSVOff + SrcOff); 2005 TFOps[i] = Loads[i].getValue(1); 2006 SrcOff += VTSize; 2007 } 2008 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2009 2010 for (i = 0; 2011 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 2012 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 2013 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 2014 DAG.getConstant(DstOff, MVT::i32)), 2015 DstSV, DstSVOff + DstOff); 2016 DstOff += VTSize; 2017 } 2018 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2019 2020 EmittedNumMemOps += i; 2021 } 2022 2023 if (BytesLeft == 0) 2024 return Chain; 2025 2026 // Issue loads / stores for the trailing (1 - 3) bytes. 2027 unsigned BytesLeftSave = BytesLeft; 2028 i = 0; 2029 while (BytesLeft) { 2030 if (BytesLeft >= 2) { 2031 VT = MVT::i16; 2032 VTSize = 2; 2033 } else { 2034 VT = MVT::i8; 2035 VTSize = 1; 2036 } 2037 2038 Loads[i] = DAG.getLoad(VT, dl, Chain, 2039 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 2040 DAG.getConstant(SrcOff, MVT::i32)), 2041 SrcSV, SrcSVOff + SrcOff); 2042 TFOps[i] = Loads[i].getValue(1); 2043 ++i; 2044 SrcOff += VTSize; 2045 BytesLeft -= VTSize; 2046 } 2047 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2048 2049 i = 0; 2050 BytesLeft = BytesLeftSave; 2051 while (BytesLeft) { 2052 if (BytesLeft >= 2) { 2053 VT = MVT::i16; 2054 VTSize = 2; 2055 } else { 2056 VT = MVT::i8; 2057 VTSize = 1; 2058 } 2059 2060 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 2061 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 2062 DAG.getConstant(DstOff, MVT::i32)), 2063 DstSV, DstSVOff + DstOff); 2064 ++i; 2065 DstOff += VTSize; 2066 BytesLeft -= VTSize; 2067 } 2068 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 2069} 2070 2071static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2072 SDValue Op = N->getOperand(0); 2073 DebugLoc dl = N->getDebugLoc(); 2074 if (N->getValueType(0) == MVT::f64) { 2075 // Turn i64->f64 into VMOVDRR. 2076 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2077 DAG.getConstant(0, MVT::i32)); 2078 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2079 DAG.getConstant(1, MVT::i32)); 2080 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 2081 } 2082 2083 // Turn f64->i64 into VMOVRRD. 2084 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2085 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2086 2087 // Merge the pieces into a single i64 value. 2088 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2089} 2090 2091/// getZeroVector - Returns a vector of specified type with all zero elements. 2092/// 2093static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2094 assert(VT.isVector() && "Expected a vector type"); 2095 2096 // Zero vectors are used to represent vector negation and in those cases 2097 // will be implemented with the NEON VNEG instruction. However, VNEG does 2098 // not support i64 elements, so sometimes the zero vectors will need to be 2099 // explicitly constructed. For those cases, and potentially other uses in 2100 // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted 2101 // to their dest type. This ensures they get CSE'd. 2102 SDValue Vec; 2103 SDValue Cst = DAG.getTargetConstant(0, MVT::i8); 2104 SmallVector<SDValue, 8> Ops; 2105 MVT TVT; 2106 2107 if (VT.getSizeInBits() == 64) { 2108 Ops.assign(8, Cst); TVT = MVT::v8i8; 2109 } else { 2110 Ops.assign(16, Cst); TVT = MVT::v16i8; 2111 } 2112 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); 2113 2114 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 2115} 2116 2117/// getOnesVector - Returns a vector of specified type with all bits set. 2118/// 2119static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2120 assert(VT.isVector() && "Expected a vector type"); 2121 2122 // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their 2123 // dest type. This ensures they get CSE'd. 2124 SDValue Vec; 2125 SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); 2126 SmallVector<SDValue, 8> Ops; 2127 MVT TVT; 2128 2129 if (VT.getSizeInBits() == 64) { 2130 Ops.assign(8, Cst); TVT = MVT::v8i8; 2131 } else { 2132 Ops.assign(16, Cst); TVT = MVT::v16i8; 2133 } 2134 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); 2135 2136 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 2137} 2138 2139/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2140/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2141SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { 2142 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2143 EVT VT = Op.getValueType(); 2144 unsigned VTBits = VT.getSizeInBits(); 2145 DebugLoc dl = Op.getDebugLoc(); 2146 SDValue ShOpLo = Op.getOperand(0); 2147 SDValue ShOpHi = Op.getOperand(1); 2148 SDValue ShAmt = Op.getOperand(2); 2149 SDValue ARMCC; 2150 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2151 2152 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2153 2154 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2155 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2156 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2157 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2158 DAG.getConstant(VTBits, MVT::i32)); 2159 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2160 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2161 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2162 2163 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2164 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2165 ARMCC, DAG, dl); 2166 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2167 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, 2168 CCR, Cmp); 2169 2170 SDValue Ops[2] = { Lo, Hi }; 2171 return DAG.getMergeValues(Ops, 2, dl); 2172} 2173 2174/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2175/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2176SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { 2177 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2178 EVT VT = Op.getValueType(); 2179 unsigned VTBits = VT.getSizeInBits(); 2180 DebugLoc dl = Op.getDebugLoc(); 2181 SDValue ShOpLo = Op.getOperand(0); 2182 SDValue ShOpHi = Op.getOperand(1); 2183 SDValue ShAmt = Op.getOperand(2); 2184 SDValue ARMCC; 2185 2186 assert(Op.getOpcode() == ISD::SHL_PARTS); 2187 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2188 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2189 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2190 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2191 DAG.getConstant(VTBits, MVT::i32)); 2192 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2193 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2194 2195 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2196 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2197 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2198 ARMCC, DAG, dl); 2199 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2200 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, 2201 CCR, Cmp); 2202 2203 SDValue Ops[2] = { Lo, Hi }; 2204 return DAG.getMergeValues(Ops, 2, dl); 2205} 2206 2207static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2208 const ARMSubtarget *ST) { 2209 EVT VT = N->getValueType(0); 2210 DebugLoc dl = N->getDebugLoc(); 2211 2212 // Lower vector shifts on NEON to use VSHL. 2213 if (VT.isVector()) { 2214 assert(ST->hasNEON() && "unexpected vector shift"); 2215 2216 // Left shifts translate directly to the vshiftu intrinsic. 2217 if (N->getOpcode() == ISD::SHL) 2218 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2219 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2220 N->getOperand(0), N->getOperand(1)); 2221 2222 assert((N->getOpcode() == ISD::SRA || 2223 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2224 2225 // NEON uses the same intrinsics for both left and right shifts. For 2226 // right shifts, the shift amounts are negative, so negate the vector of 2227 // shift amounts. 2228 EVT ShiftVT = N->getOperand(1).getValueType(); 2229 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2230 getZeroVector(ShiftVT, DAG, dl), 2231 N->getOperand(1)); 2232 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2233 Intrinsic::arm_neon_vshifts : 2234 Intrinsic::arm_neon_vshiftu); 2235 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2236 DAG.getConstant(vshiftInt, MVT::i32), 2237 N->getOperand(0), NegatedCount); 2238 } 2239 2240 // We can get here for a node like i32 = ISD::SHL i32, i64 2241 if (VT != MVT::i64) 2242 return SDValue(); 2243 2244 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2245 "Unknown shift to lower!"); 2246 2247 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2248 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2249 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2250 return SDValue(); 2251 2252 // If we are in thumb mode, we don't have RRX. 2253 if (ST->isThumb1Only()) return SDValue(); 2254 2255 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2256 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2257 DAG.getConstant(0, MVT::i32)); 2258 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2259 DAG.getConstant(1, MVT::i32)); 2260 2261 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2262 // captures the result into a carry flag. 2263 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2264 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2265 2266 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2267 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2268 2269 // Merge the pieces into a single i64 value. 2270 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2271} 2272 2273static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2274 SDValue TmpOp0, TmpOp1; 2275 bool Invert = false; 2276 bool Swap = false; 2277 unsigned Opc = 0; 2278 2279 SDValue Op0 = Op.getOperand(0); 2280 SDValue Op1 = Op.getOperand(1); 2281 SDValue CC = Op.getOperand(2); 2282 EVT VT = Op.getValueType(); 2283 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2284 DebugLoc dl = Op.getDebugLoc(); 2285 2286 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2287 switch (SetCCOpcode) { 2288 default: llvm_unreachable("Illegal FP comparison"); break; 2289 case ISD::SETUNE: 2290 case ISD::SETNE: Invert = true; // Fallthrough 2291 case ISD::SETOEQ: 2292 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2293 case ISD::SETOLT: 2294 case ISD::SETLT: Swap = true; // Fallthrough 2295 case ISD::SETOGT: 2296 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2297 case ISD::SETOLE: 2298 case ISD::SETLE: Swap = true; // Fallthrough 2299 case ISD::SETOGE: 2300 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2301 case ISD::SETUGE: Swap = true; // Fallthrough 2302 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2303 case ISD::SETUGT: Swap = true; // Fallthrough 2304 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2305 case ISD::SETUEQ: Invert = true; // Fallthrough 2306 case ISD::SETONE: 2307 // Expand this to (OLT | OGT). 2308 TmpOp0 = Op0; 2309 TmpOp1 = Op1; 2310 Opc = ISD::OR; 2311 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2312 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2313 break; 2314 case ISD::SETUO: Invert = true; // Fallthrough 2315 case ISD::SETO: 2316 // Expand this to (OLT | OGE). 2317 TmpOp0 = Op0; 2318 TmpOp1 = Op1; 2319 Opc = ISD::OR; 2320 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2321 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2322 break; 2323 } 2324 } else { 2325 // Integer comparisons. 2326 switch (SetCCOpcode) { 2327 default: llvm_unreachable("Illegal integer comparison"); break; 2328 case ISD::SETNE: Invert = true; 2329 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2330 case ISD::SETLT: Swap = true; 2331 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2332 case ISD::SETLE: Swap = true; 2333 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2334 case ISD::SETULT: Swap = true; 2335 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2336 case ISD::SETULE: Swap = true; 2337 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2338 } 2339 2340 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2341 if (Opc == ARMISD::VCEQ) { 2342 2343 SDValue AndOp; 2344 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2345 AndOp = Op0; 2346 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 2347 AndOp = Op1; 2348 2349 // Ignore bitconvert. 2350 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 2351 AndOp = AndOp.getOperand(0); 2352 2353 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 2354 Opc = ARMISD::VTST; 2355 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 2356 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 2357 Invert = !Invert; 2358 } 2359 } 2360 } 2361 2362 if (Swap) 2363 std::swap(Op0, Op1); 2364 2365 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 2366 2367 if (Invert) 2368 Result = DAG.getNOT(dl, Result, VT); 2369 2370 return Result; 2371} 2372 2373/// isVMOVSplat - Check if the specified splat value corresponds to an immediate 2374/// VMOV instruction, and if so, return the constant being splatted. 2375static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, 2376 unsigned SplatBitSize, SelectionDAG &DAG) { 2377 switch (SplatBitSize) { 2378 case 8: 2379 // Any 1-byte value is OK. 2380 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 2381 return DAG.getTargetConstant(SplatBits, MVT::i8); 2382 2383 case 16: 2384 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 2385 if ((SplatBits & ~0xff) == 0 || 2386 (SplatBits & ~0xff00) == 0) 2387 return DAG.getTargetConstant(SplatBits, MVT::i16); 2388 break; 2389 2390 case 32: 2391 // NEON's 32-bit VMOV supports splat values where: 2392 // * only one byte is nonzero, or 2393 // * the least significant byte is 0xff and the second byte is nonzero, or 2394 // * the least significant 2 bytes are 0xff and the third is nonzero. 2395 if ((SplatBits & ~0xff) == 0 || 2396 (SplatBits & ~0xff00) == 0 || 2397 (SplatBits & ~0xff0000) == 0 || 2398 (SplatBits & ~0xff000000) == 0) 2399 return DAG.getTargetConstant(SplatBits, MVT::i32); 2400 2401 if ((SplatBits & ~0xffff) == 0 && 2402 ((SplatBits | SplatUndef) & 0xff) == 0xff) 2403 return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); 2404 2405 if ((SplatBits & ~0xffffff) == 0 && 2406 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) 2407 return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); 2408 2409 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 2410 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 2411 // VMOV.I32. A (very) minor optimization would be to replicate the value 2412 // and fall through here to test for a valid 64-bit splat. But, then the 2413 // caller would also need to check and handle the change in size. 2414 break; 2415 2416 case 64: { 2417 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 2418 uint64_t BitMask = 0xff; 2419 uint64_t Val = 0; 2420 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 2421 if (((SplatBits | SplatUndef) & BitMask) == BitMask) 2422 Val |= BitMask; 2423 else if ((SplatBits & BitMask) != 0) 2424 return SDValue(); 2425 BitMask <<= 8; 2426 } 2427 return DAG.getTargetConstant(Val, MVT::i64); 2428 } 2429 2430 default: 2431 llvm_unreachable("unexpected size for isVMOVSplat"); 2432 break; 2433 } 2434 2435 return SDValue(); 2436} 2437 2438/// getVMOVImm - If this is a build_vector of constants which can be 2439/// formed by using a VMOV instruction of the specified element size, 2440/// return the constant being splatted. The ByteSize field indicates the 2441/// number of bytes of each element [1248]. 2442SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 2443 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 2444 APInt SplatBits, SplatUndef; 2445 unsigned SplatBitSize; 2446 bool HasAnyUndefs; 2447 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 2448 HasAnyUndefs, ByteSize * 8)) 2449 return SDValue(); 2450 2451 if (SplatBitSize > ByteSize * 8) 2452 return SDValue(); 2453 2454 return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), 2455 SplatBitSize, DAG); 2456} 2457 2458static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 2459 bool &ReverseVEXT, unsigned &Imm) { 2460 unsigned NumElts = VT.getVectorNumElements(); 2461 ReverseVEXT = false; 2462 Imm = M[0]; 2463 2464 // If this is a VEXT shuffle, the immediate value is the index of the first 2465 // element. The other shuffle indices must be the successive elements after 2466 // the first one. 2467 unsigned ExpectedElt = Imm; 2468 for (unsigned i = 1; i < NumElts; ++i) { 2469 // Increment the expected index. If it wraps around, it may still be 2470 // a VEXT but the source vectors must be swapped. 2471 ExpectedElt += 1; 2472 if (ExpectedElt == NumElts * 2) { 2473 ExpectedElt = 0; 2474 ReverseVEXT = true; 2475 } 2476 2477 if (ExpectedElt != static_cast<unsigned>(M[i])) 2478 return false; 2479 } 2480 2481 // Adjust the index value if the source operands will be swapped. 2482 if (ReverseVEXT) 2483 Imm -= NumElts; 2484 2485 return true; 2486} 2487 2488/// isVREVMask - Check if a vector shuffle corresponds to a VREV 2489/// instruction with the specified blocksize. (The order of the elements 2490/// within each block of the vector is reversed.) 2491static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 2492 unsigned BlockSize) { 2493 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 2494 "Only possible block sizes for VREV are: 16, 32, 64"); 2495 2496 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2497 if (EltSz == 64) 2498 return false; 2499 2500 unsigned NumElts = VT.getVectorNumElements(); 2501 unsigned BlockElts = M[0] + 1; 2502 2503 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 2504 return false; 2505 2506 for (unsigned i = 0; i < NumElts; ++i) { 2507 if ((unsigned) M[i] != 2508 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 2509 return false; 2510 } 2511 2512 return true; 2513} 2514 2515static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 2516 unsigned &WhichResult) { 2517 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2518 if (EltSz == 64) 2519 return false; 2520 2521 unsigned NumElts = VT.getVectorNumElements(); 2522 WhichResult = (M[0] == 0 ? 0 : 1); 2523 for (unsigned i = 0; i < NumElts; i += 2) { 2524 if ((unsigned) M[i] != i + WhichResult || 2525 (unsigned) M[i+1] != i + NumElts + WhichResult) 2526 return false; 2527 } 2528 return true; 2529} 2530 2531/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 2532/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2533/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 2534static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2535 unsigned &WhichResult) { 2536 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2537 if (EltSz == 64) 2538 return false; 2539 2540 unsigned NumElts = VT.getVectorNumElements(); 2541 WhichResult = (M[0] == 0 ? 0 : 1); 2542 for (unsigned i = 0; i < NumElts; i += 2) { 2543 if ((unsigned) M[i] != i + WhichResult || 2544 (unsigned) M[i+1] != i + WhichResult) 2545 return false; 2546 } 2547 return true; 2548} 2549 2550static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 2551 unsigned &WhichResult) { 2552 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2553 if (EltSz == 64) 2554 return false; 2555 2556 unsigned NumElts = VT.getVectorNumElements(); 2557 WhichResult = (M[0] == 0 ? 0 : 1); 2558 for (unsigned i = 0; i != NumElts; ++i) { 2559 if ((unsigned) M[i] != 2 * i + WhichResult) 2560 return false; 2561 } 2562 2563 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2564 if (VT.is64BitVector() && EltSz == 32) 2565 return false; 2566 2567 return true; 2568} 2569 2570/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 2571/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2572/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 2573static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2574 unsigned &WhichResult) { 2575 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2576 if (EltSz == 64) 2577 return false; 2578 2579 unsigned Half = VT.getVectorNumElements() / 2; 2580 WhichResult = (M[0] == 0 ? 0 : 1); 2581 for (unsigned j = 0; j != 2; ++j) { 2582 unsigned Idx = WhichResult; 2583 for (unsigned i = 0; i != Half; ++i) { 2584 if ((unsigned) M[i + j * Half] != Idx) 2585 return false; 2586 Idx += 2; 2587 } 2588 } 2589 2590 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2591 if (VT.is64BitVector() && EltSz == 32) 2592 return false; 2593 2594 return true; 2595} 2596 2597static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 2598 unsigned &WhichResult) { 2599 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2600 if (EltSz == 64) 2601 return false; 2602 2603 unsigned NumElts = VT.getVectorNumElements(); 2604 WhichResult = (M[0] == 0 ? 0 : 1); 2605 unsigned Idx = WhichResult * NumElts / 2; 2606 for (unsigned i = 0; i != NumElts; i += 2) { 2607 if ((unsigned) M[i] != Idx || 2608 (unsigned) M[i+1] != Idx + NumElts) 2609 return false; 2610 Idx += 1; 2611 } 2612 2613 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2614 if (VT.is64BitVector() && EltSz == 32) 2615 return false; 2616 2617 return true; 2618} 2619 2620/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 2621/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 2622/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 2623static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 2624 unsigned &WhichResult) { 2625 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 2626 if (EltSz == 64) 2627 return false; 2628 2629 unsigned NumElts = VT.getVectorNumElements(); 2630 WhichResult = (M[0] == 0 ? 0 : 1); 2631 unsigned Idx = WhichResult * NumElts / 2; 2632 for (unsigned i = 0; i != NumElts; i += 2) { 2633 if ((unsigned) M[i] != Idx || 2634 (unsigned) M[i+1] != Idx) 2635 return false; 2636 Idx += 1; 2637 } 2638 2639 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 2640 if (VT.is64BitVector() && EltSz == 32) 2641 return false; 2642 2643 return true; 2644} 2645 2646 2647static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2648 // Canonicalize all-zeros and all-ones vectors. 2649 ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); 2650 if (ConstVal->isNullValue()) 2651 return getZeroVector(VT, DAG, dl); 2652 if (ConstVal->isAllOnesValue()) 2653 return getOnesVector(VT, DAG, dl); 2654 2655 EVT CanonicalVT; 2656 if (VT.is64BitVector()) { 2657 switch (Val.getValueType().getSizeInBits()) { 2658 case 8: CanonicalVT = MVT::v8i8; break; 2659 case 16: CanonicalVT = MVT::v4i16; break; 2660 case 32: CanonicalVT = MVT::v2i32; break; 2661 case 64: CanonicalVT = MVT::v1i64; break; 2662 default: llvm_unreachable("unexpected splat element type"); break; 2663 } 2664 } else { 2665 assert(VT.is128BitVector() && "unknown splat vector size"); 2666 switch (Val.getValueType().getSizeInBits()) { 2667 case 8: CanonicalVT = MVT::v16i8; break; 2668 case 16: CanonicalVT = MVT::v8i16; break; 2669 case 32: CanonicalVT = MVT::v4i32; break; 2670 case 64: CanonicalVT = MVT::v2i64; break; 2671 default: llvm_unreachable("unexpected splat element type"); break; 2672 } 2673 } 2674 2675 // Build a canonical splat for this value. 2676 SmallVector<SDValue, 8> Ops; 2677 Ops.assign(CanonicalVT.getVectorNumElements(), Val); 2678 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], 2679 Ops.size()); 2680 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); 2681} 2682 2683// If this is a case we can't handle, return null and let the default 2684// expansion code take care of it. 2685static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 2686 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 2687 DebugLoc dl = Op.getDebugLoc(); 2688 EVT VT = Op.getValueType(); 2689 2690 APInt SplatBits, SplatUndef; 2691 unsigned SplatBitSize; 2692 bool HasAnyUndefs; 2693 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 2694 if (SplatBitSize <= 64) { 2695 SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), 2696 SplatUndef.getZExtValue(), SplatBitSize, DAG); 2697 if (Val.getNode()) 2698 return BuildSplat(Val, VT, DAG, dl); 2699 } 2700 } 2701 2702 // If there are only 2 elements in a 128-bit vector, insert them into an 2703 // undef vector. This handles the common case for 128-bit vector argument 2704 // passing, where the insertions should be translated to subreg accesses 2705 // with no real instructions. 2706 if (VT.is128BitVector() && Op.getNumOperands() == 2) { 2707 SDValue Val = DAG.getUNDEF(VT); 2708 SDValue Op0 = Op.getOperand(0); 2709 SDValue Op1 = Op.getOperand(1); 2710 if (Op0.getOpcode() != ISD::UNDEF) 2711 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, 2712 DAG.getIntPtrConstant(0)); 2713 if (Op1.getOpcode() != ISD::UNDEF) 2714 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, 2715 DAG.getIntPtrConstant(1)); 2716 return Val; 2717 } 2718 2719 return SDValue(); 2720} 2721 2722/// isShuffleMaskLegal - Targets can use this to indicate that they only 2723/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 2724/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 2725/// are assumed to be legal. 2726bool 2727ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 2728 EVT VT) const { 2729 if (VT.getVectorNumElements() == 4 && 2730 (VT.is128BitVector() || VT.is64BitVector())) { 2731 unsigned PFIndexes[4]; 2732 for (unsigned i = 0; i != 4; ++i) { 2733 if (M[i] < 0) 2734 PFIndexes[i] = 8; 2735 else 2736 PFIndexes[i] = M[i]; 2737 } 2738 2739 // Compute the index in the perfect shuffle table. 2740 unsigned PFTableIndex = 2741 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 2742 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 2743 unsigned Cost = (PFEntry >> 30); 2744 2745 if (Cost <= 4) 2746 return true; 2747 } 2748 2749 bool ReverseVEXT; 2750 unsigned Imm, WhichResult; 2751 2752 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 2753 isVREVMask(M, VT, 64) || 2754 isVREVMask(M, VT, 32) || 2755 isVREVMask(M, VT, 16) || 2756 isVEXTMask(M, VT, ReverseVEXT, Imm) || 2757 isVTRNMask(M, VT, WhichResult) || 2758 isVUZPMask(M, VT, WhichResult) || 2759 isVZIPMask(M, VT, WhichResult) || 2760 isVTRN_v_undef_Mask(M, VT, WhichResult) || 2761 isVUZP_v_undef_Mask(M, VT, WhichResult) || 2762 isVZIP_v_undef_Mask(M, VT, WhichResult)); 2763} 2764 2765/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 2766/// the specified operations to build the shuffle. 2767static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 2768 SDValue RHS, SelectionDAG &DAG, 2769 DebugLoc dl) { 2770 unsigned OpNum = (PFEntry >> 26) & 0x0F; 2771 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 2772 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 2773 2774 enum { 2775 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 2776 OP_VREV, 2777 OP_VDUP0, 2778 OP_VDUP1, 2779 OP_VDUP2, 2780 OP_VDUP3, 2781 OP_VEXT1, 2782 OP_VEXT2, 2783 OP_VEXT3, 2784 OP_VUZPL, // VUZP, left result 2785 OP_VUZPR, // VUZP, right result 2786 OP_VZIPL, // VZIP, left result 2787 OP_VZIPR, // VZIP, right result 2788 OP_VTRNL, // VTRN, left result 2789 OP_VTRNR // VTRN, right result 2790 }; 2791 2792 if (OpNum == OP_COPY) { 2793 if (LHSID == (1*9+2)*9+3) return LHS; 2794 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 2795 return RHS; 2796 } 2797 2798 SDValue OpLHS, OpRHS; 2799 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 2800 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 2801 EVT VT = OpLHS.getValueType(); 2802 2803 switch (OpNum) { 2804 default: llvm_unreachable("Unknown shuffle opcode!"); 2805 case OP_VREV: 2806 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 2807 case OP_VDUP0: 2808 case OP_VDUP1: 2809 case OP_VDUP2: 2810 case OP_VDUP3: 2811 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 2812 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 2813 case OP_VEXT1: 2814 case OP_VEXT2: 2815 case OP_VEXT3: 2816 return DAG.getNode(ARMISD::VEXT, dl, VT, 2817 OpLHS, OpRHS, 2818 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 2819 case OP_VUZPL: 2820 case OP_VUZPR: 2821 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 2822 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 2823 case OP_VZIPL: 2824 case OP_VZIPR: 2825 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 2826 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 2827 case OP_VTRNL: 2828 case OP_VTRNR: 2829 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 2830 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 2831 } 2832} 2833 2834static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 2835 SDValue V1 = Op.getOperand(0); 2836 SDValue V2 = Op.getOperand(1); 2837 DebugLoc dl = Op.getDebugLoc(); 2838 EVT VT = Op.getValueType(); 2839 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 2840 SmallVector<int, 8> ShuffleMask; 2841 2842 // Convert shuffles that are directly supported on NEON to target-specific 2843 // DAG nodes, instead of keeping them as shuffles and matching them again 2844 // during code selection. This is more efficient and avoids the possibility 2845 // of inconsistencies between legalization and selection. 2846 // FIXME: floating-point vectors should be canonicalized to integer vectors 2847 // of the same time so that they get CSEd properly. 2848 SVN->getMask(ShuffleMask); 2849 2850 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 2851 int Lane = SVN->getSplatIndex(); 2852 // If this is undef splat, generate it via "just" vdup, if possible. 2853 if (Lane == -1) Lane = 0; 2854 2855 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 2856 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 2857 } 2858 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 2859 DAG.getConstant(Lane, MVT::i32)); 2860 } 2861 2862 bool ReverseVEXT; 2863 unsigned Imm; 2864 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 2865 if (ReverseVEXT) 2866 std::swap(V1, V2); 2867 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 2868 DAG.getConstant(Imm, MVT::i32)); 2869 } 2870 2871 if (isVREVMask(ShuffleMask, VT, 64)) 2872 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 2873 if (isVREVMask(ShuffleMask, VT, 32)) 2874 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 2875 if (isVREVMask(ShuffleMask, VT, 16)) 2876 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 2877 2878 // Check for Neon shuffles that modify both input vectors in place. 2879 // If both results are used, i.e., if there are two shuffles with the same 2880 // source operands and with masks corresponding to both results of one of 2881 // these operations, DAG memoization will ensure that a single node is 2882 // used for both shuffles. 2883 unsigned WhichResult; 2884 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 2885 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 2886 V1, V2).getValue(WhichResult); 2887 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 2888 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 2889 V1, V2).getValue(WhichResult); 2890 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 2891 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 2892 V1, V2).getValue(WhichResult); 2893 2894 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 2895 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 2896 V1, V1).getValue(WhichResult); 2897 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 2898 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 2899 V1, V1).getValue(WhichResult); 2900 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 2901 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 2902 V1, V1).getValue(WhichResult); 2903 2904 // If the shuffle is not directly supported and it has 4 elements, use 2905 // the PerfectShuffle-generated table to synthesize it from other shuffles. 2906 if (VT.getVectorNumElements() == 4 && 2907 (VT.is128BitVector() || VT.is64BitVector())) { 2908 unsigned PFIndexes[4]; 2909 for (unsigned i = 0; i != 4; ++i) { 2910 if (ShuffleMask[i] < 0) 2911 PFIndexes[i] = 8; 2912 else 2913 PFIndexes[i] = ShuffleMask[i]; 2914 } 2915 2916 // Compute the index in the perfect shuffle table. 2917 unsigned PFTableIndex = 2918 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 2919 2920 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 2921 unsigned Cost = (PFEntry >> 30); 2922 2923 if (Cost <= 4) 2924 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 2925 } 2926 2927 return SDValue(); 2928} 2929 2930static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2931 EVT VT = Op.getValueType(); 2932 DebugLoc dl = Op.getDebugLoc(); 2933 SDValue Vec = Op.getOperand(0); 2934 SDValue Lane = Op.getOperand(1); 2935 assert(VT == MVT::i32 && 2936 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 2937 "unexpected type for custom-lowering vector extract"); 2938 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 2939} 2940 2941static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 2942 // The only time a CONCAT_VECTORS operation can have legal types is when 2943 // two 64-bit vectors are concatenated to a 128-bit vector. 2944 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 2945 "unexpected CONCAT_VECTORS"); 2946 DebugLoc dl = Op.getDebugLoc(); 2947 SDValue Val = DAG.getUNDEF(MVT::v2f64); 2948 SDValue Op0 = Op.getOperand(0); 2949 SDValue Op1 = Op.getOperand(1); 2950 if (Op0.getOpcode() != ISD::UNDEF) 2951 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 2952 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 2953 DAG.getIntPtrConstant(0)); 2954 if (Op1.getOpcode() != ISD::UNDEF) 2955 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 2956 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 2957 DAG.getIntPtrConstant(1)); 2958 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 2959} 2960 2961SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 2962 switch (Op.getOpcode()) { 2963 default: llvm_unreachable("Don't know how to custom lower this!"); 2964 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 2965 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 2966 case ISD::GlobalAddress: 2967 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 2968 LowerGlobalAddressELF(Op, DAG); 2969 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 2970 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 2971 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 2972 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 2973 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 2974 case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); 2975 case ISD::SINT_TO_FP: 2976 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 2977 case ISD::FP_TO_SINT: 2978 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 2979 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 2980 case ISD::RETURNADDR: break; 2981 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 2982 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 2983 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 2984 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 2985 case ISD::SHL: 2986 case ISD::SRL: 2987 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 2988 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 2989 case ISD::SRL_PARTS: 2990 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 2991 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 2992 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 2993 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 2994 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2995 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 2996 } 2997 return SDValue(); 2998} 2999 3000/// ReplaceNodeResults - Replace the results of node with an illegal result 3001/// type with new values built out of custom code. 3002void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3003 SmallVectorImpl<SDValue>&Results, 3004 SelectionDAG &DAG) { 3005 switch (N->getOpcode()) { 3006 default: 3007 llvm_unreachable("Don't know how to custom expand this!"); 3008 return; 3009 case ISD::BIT_CONVERT: 3010 Results.push_back(ExpandBIT_CONVERT(N, DAG)); 3011 return; 3012 case ISD::SRL: 3013 case ISD::SRA: { 3014 SDValue Res = LowerShift(N, DAG, Subtarget); 3015 if (Res.getNode()) 3016 Results.push_back(Res); 3017 return; 3018 } 3019 } 3020} 3021 3022//===----------------------------------------------------------------------===// 3023// ARM Scheduler Hooks 3024//===----------------------------------------------------------------------===// 3025 3026MachineBasicBlock * 3027ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3028 MachineBasicBlock *BB, 3029 DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { 3030 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3031 DebugLoc dl = MI->getDebugLoc(); 3032 switch (MI->getOpcode()) { 3033 default: 3034 llvm_unreachable("Unexpected instr type to insert"); 3035 case ARM::tMOVCCr_pseudo: { 3036 // To "insert" a SELECT_CC instruction, we actually have to insert the 3037 // diamond control-flow pattern. The incoming instruction knows the 3038 // destination vreg to set, the condition code register to branch on, the 3039 // true/false values to select between, and a branch opcode to use. 3040 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3041 MachineFunction::iterator It = BB; 3042 ++It; 3043 3044 // thisMBB: 3045 // ... 3046 // TrueVal = ... 3047 // cmpTY ccX, r1, r2 3048 // bCC copy1MBB 3049 // fallthrough --> copy0MBB 3050 MachineBasicBlock *thisMBB = BB; 3051 MachineFunction *F = BB->getParent(); 3052 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 3053 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 3054 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 3055 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 3056 F->insert(It, copy0MBB); 3057 F->insert(It, sinkMBB); 3058 // Update machine-CFG edges by first adding all successors of the current 3059 // block to the new block which will contain the Phi node for the select. 3060 // Also inform sdisel of the edge changes. 3061 for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 3062 E = BB->succ_end(); I != E; ++I) { 3063 EM->insert(std::make_pair(*I, sinkMBB)); 3064 sinkMBB->addSuccessor(*I); 3065 } 3066 // Next, remove all successors of the current block, and add the true 3067 // and fallthrough blocks as its successors. 3068 while (!BB->succ_empty()) 3069 BB->removeSuccessor(BB->succ_begin()); 3070 BB->addSuccessor(copy0MBB); 3071 BB->addSuccessor(sinkMBB); 3072 3073 // copy0MBB: 3074 // %FalseValue = ... 3075 // # fallthrough to sinkMBB 3076 BB = copy0MBB; 3077 3078 // Update machine-CFG edges 3079 BB->addSuccessor(sinkMBB); 3080 3081 // sinkMBB: 3082 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3083 // ... 3084 BB = sinkMBB; 3085 BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) 3086 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 3087 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3088 3089 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 3090 return BB; 3091 } 3092 3093 case ARM::tANDsp: 3094 case ARM::tADDspr_: 3095 case ARM::tSUBspi_: 3096 case ARM::t2SUBrSPi_: 3097 case ARM::t2SUBrSPi12_: 3098 case ARM::t2SUBrSPs_: { 3099 MachineFunction *MF = BB->getParent(); 3100 unsigned DstReg = MI->getOperand(0).getReg(); 3101 unsigned SrcReg = MI->getOperand(1).getReg(); 3102 bool DstIsDead = MI->getOperand(0).isDead(); 3103 bool SrcIsKill = MI->getOperand(1).isKill(); 3104 3105 if (SrcReg != ARM::SP) { 3106 // Copy the source to SP from virtual register. 3107 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); 3108 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 3109 ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; 3110 BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) 3111 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3112 } 3113 3114 unsigned OpOpc = 0; 3115 bool NeedPred = false, NeedCC = false, NeedOp3 = false; 3116 switch (MI->getOpcode()) { 3117 default: 3118 llvm_unreachable("Unexpected pseudo instruction!"); 3119 case ARM::tANDsp: 3120 OpOpc = ARM::tAND; 3121 NeedPred = true; 3122 break; 3123 case ARM::tADDspr_: 3124 OpOpc = ARM::tADDspr; 3125 break; 3126 case ARM::tSUBspi_: 3127 OpOpc = ARM::tSUBspi; 3128 break; 3129 case ARM::t2SUBrSPi_: 3130 OpOpc = ARM::t2SUBrSPi; 3131 NeedPred = true; NeedCC = true; 3132 break; 3133 case ARM::t2SUBrSPi12_: 3134 OpOpc = ARM::t2SUBrSPi12; 3135 NeedPred = true; 3136 break; 3137 case ARM::t2SUBrSPs_: 3138 OpOpc = ARM::t2SUBrSPs; 3139 NeedPred = true; NeedCC = true; NeedOp3 = true; 3140 break; 3141 } 3142 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); 3143 if (OpOpc == ARM::tAND) 3144 AddDefaultT1CC(MIB); 3145 MIB.addReg(ARM::SP); 3146 MIB.addOperand(MI->getOperand(2)); 3147 if (NeedOp3) 3148 MIB.addOperand(MI->getOperand(3)); 3149 if (NeedPred) 3150 AddDefaultPred(MIB); 3151 if (NeedCC) 3152 AddDefaultCC(MIB); 3153 3154 // Copy the result from SP to virtual register. 3155 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); 3156 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 3157 ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; 3158 BuildMI(BB, dl, TII->get(CopyOpc)) 3159 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) 3160 .addReg(ARM::SP); 3161 MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 3162 return BB; 3163 } 3164 } 3165} 3166 3167//===----------------------------------------------------------------------===// 3168// ARM Optimization Hooks 3169//===----------------------------------------------------------------------===// 3170 3171static 3172SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 3173 TargetLowering::DAGCombinerInfo &DCI) { 3174 SelectionDAG &DAG = DCI.DAG; 3175 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3176 EVT VT = N->getValueType(0); 3177 unsigned Opc = N->getOpcode(); 3178 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 3179 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 3180 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 3181 ISD::CondCode CC = ISD::SETCC_INVALID; 3182 3183 if (isSlctCC) { 3184 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 3185 } else { 3186 SDValue CCOp = Slct.getOperand(0); 3187 if (CCOp.getOpcode() == ISD::SETCC) 3188 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 3189 } 3190 3191 bool DoXform = false; 3192 bool InvCC = false; 3193 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 3194 "Bad input!"); 3195 3196 if (LHS.getOpcode() == ISD::Constant && 3197 cast<ConstantSDNode>(LHS)->isNullValue()) { 3198 DoXform = true; 3199 } else if (CC != ISD::SETCC_INVALID && 3200 RHS.getOpcode() == ISD::Constant && 3201 cast<ConstantSDNode>(RHS)->isNullValue()) { 3202 std::swap(LHS, RHS); 3203 SDValue Op0 = Slct.getOperand(0); 3204 EVT OpVT = isSlctCC ? Op0.getValueType() : 3205 Op0.getOperand(0).getValueType(); 3206 bool isInt = OpVT.isInteger(); 3207 CC = ISD::getSetCCInverse(CC, isInt); 3208 3209 if (!TLI.isCondCodeLegal(CC, OpVT)) 3210 return SDValue(); // Inverse operator isn't legal. 3211 3212 DoXform = true; 3213 InvCC = true; 3214 } 3215 3216 if (DoXform) { 3217 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 3218 if (isSlctCC) 3219 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 3220 Slct.getOperand(0), Slct.getOperand(1), CC); 3221 SDValue CCOp = Slct.getOperand(0); 3222 if (InvCC) 3223 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 3224 CCOp.getOperand(0), CCOp.getOperand(1), CC); 3225 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 3226 CCOp, OtherOp, Result); 3227 } 3228 return SDValue(); 3229} 3230 3231/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3232static SDValue PerformADDCombine(SDNode *N, 3233 TargetLowering::DAGCombinerInfo &DCI) { 3234 // added by evan in r37685 with no testcase. 3235 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3236 3237 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 3238 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 3239 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 3240 if (Result.getNode()) return Result; 3241 } 3242 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 3243 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 3244 if (Result.getNode()) return Result; 3245 } 3246 3247 return SDValue(); 3248} 3249 3250/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 3251static SDValue PerformSUBCombine(SDNode *N, 3252 TargetLowering::DAGCombinerInfo &DCI) { 3253 // added by evan in r37685 with no testcase. 3254 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3255 3256 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 3257 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 3258 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 3259 if (Result.getNode()) return Result; 3260 } 3261 3262 return SDValue(); 3263} 3264 3265/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD. 3266static SDValue PerformVMOVRRDCombine(SDNode *N, 3267 TargetLowering::DAGCombinerInfo &DCI) { 3268 // fmrrd(fmdrr x, y) -> x,y 3269 SDValue InDouble = N->getOperand(0); 3270 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 3271 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 3272 return SDValue(); 3273} 3274 3275/// getVShiftImm - Check if this is a valid build_vector for the immediate 3276/// operand of a vector shift operation, where all the elements of the 3277/// build_vector must have the same constant integer value. 3278static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 3279 // Ignore bit_converts. 3280 while (Op.getOpcode() == ISD::BIT_CONVERT) 3281 Op = Op.getOperand(0); 3282 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 3283 APInt SplatBits, SplatUndef; 3284 unsigned SplatBitSize; 3285 bool HasAnyUndefs; 3286 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 3287 HasAnyUndefs, ElementBits) || 3288 SplatBitSize > ElementBits) 3289 return false; 3290 Cnt = SplatBits.getSExtValue(); 3291 return true; 3292} 3293 3294/// isVShiftLImm - Check if this is a valid build_vector for the immediate 3295/// operand of a vector shift left operation. That value must be in the range: 3296/// 0 <= Value < ElementBits for a left shift; or 3297/// 0 <= Value <= ElementBits for a long left shift. 3298static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 3299 assert(VT.isVector() && "vector shift count is not a vector type"); 3300 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 3301 if (! getVShiftImm(Op, ElementBits, Cnt)) 3302 return false; 3303 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 3304} 3305 3306/// isVShiftRImm - Check if this is a valid build_vector for the immediate 3307/// operand of a vector shift right operation. For a shift opcode, the value 3308/// is positive, but for an intrinsic the value count must be negative. The 3309/// absolute value must be in the range: 3310/// 1 <= |Value| <= ElementBits for a right shift; or 3311/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 3312static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 3313 int64_t &Cnt) { 3314 assert(VT.isVector() && "vector shift count is not a vector type"); 3315 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 3316 if (! getVShiftImm(Op, ElementBits, Cnt)) 3317 return false; 3318 if (isIntrinsic) 3319 Cnt = -Cnt; 3320 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 3321} 3322 3323/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 3324static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 3325 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3326 switch (IntNo) { 3327 default: 3328 // Don't do anything for most intrinsics. 3329 break; 3330 3331 // Vector shifts: check for immediate versions and lower them. 3332 // Note: This is done during DAG combining instead of DAG legalizing because 3333 // the build_vectors for 64-bit vector element shift counts are generally 3334 // not legal, and it is hard to see their values after they get legalized to 3335 // loads from a constant pool. 3336 case Intrinsic::arm_neon_vshifts: 3337 case Intrinsic::arm_neon_vshiftu: 3338 case Intrinsic::arm_neon_vshiftls: 3339 case Intrinsic::arm_neon_vshiftlu: 3340 case Intrinsic::arm_neon_vshiftn: 3341 case Intrinsic::arm_neon_vrshifts: 3342 case Intrinsic::arm_neon_vrshiftu: 3343 case Intrinsic::arm_neon_vrshiftn: 3344 case Intrinsic::arm_neon_vqshifts: 3345 case Intrinsic::arm_neon_vqshiftu: 3346 case Intrinsic::arm_neon_vqshiftsu: 3347 case Intrinsic::arm_neon_vqshiftns: 3348 case Intrinsic::arm_neon_vqshiftnu: 3349 case Intrinsic::arm_neon_vqshiftnsu: 3350 case Intrinsic::arm_neon_vqrshiftns: 3351 case Intrinsic::arm_neon_vqrshiftnu: 3352 case Intrinsic::arm_neon_vqrshiftnsu: { 3353 EVT VT = N->getOperand(1).getValueType(); 3354 int64_t Cnt; 3355 unsigned VShiftOpc = 0; 3356 3357 switch (IntNo) { 3358 case Intrinsic::arm_neon_vshifts: 3359 case Intrinsic::arm_neon_vshiftu: 3360 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 3361 VShiftOpc = ARMISD::VSHL; 3362 break; 3363 } 3364 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 3365 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 3366 ARMISD::VSHRs : ARMISD::VSHRu); 3367 break; 3368 } 3369 return SDValue(); 3370 3371 case Intrinsic::arm_neon_vshiftls: 3372 case Intrinsic::arm_neon_vshiftlu: 3373 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 3374 break; 3375 llvm_unreachable("invalid shift count for vshll intrinsic"); 3376 3377 case Intrinsic::arm_neon_vrshifts: 3378 case Intrinsic::arm_neon_vrshiftu: 3379 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 3380 break; 3381 return SDValue(); 3382 3383 case Intrinsic::arm_neon_vqshifts: 3384 case Intrinsic::arm_neon_vqshiftu: 3385 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 3386 break; 3387 return SDValue(); 3388 3389 case Intrinsic::arm_neon_vqshiftsu: 3390 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 3391 break; 3392 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 3393 3394 case Intrinsic::arm_neon_vshiftn: 3395 case Intrinsic::arm_neon_vrshiftn: 3396 case Intrinsic::arm_neon_vqshiftns: 3397 case Intrinsic::arm_neon_vqshiftnu: 3398 case Intrinsic::arm_neon_vqshiftnsu: 3399 case Intrinsic::arm_neon_vqrshiftns: 3400 case Intrinsic::arm_neon_vqrshiftnu: 3401 case Intrinsic::arm_neon_vqrshiftnsu: 3402 // Narrowing shifts require an immediate right shift. 3403 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 3404 break; 3405 llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); 3406 3407 default: 3408 llvm_unreachable("unhandled vector shift"); 3409 } 3410 3411 switch (IntNo) { 3412 case Intrinsic::arm_neon_vshifts: 3413 case Intrinsic::arm_neon_vshiftu: 3414 // Opcode already set above. 3415 break; 3416 case Intrinsic::arm_neon_vshiftls: 3417 case Intrinsic::arm_neon_vshiftlu: 3418 if (Cnt == VT.getVectorElementType().getSizeInBits()) 3419 VShiftOpc = ARMISD::VSHLLi; 3420 else 3421 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 3422 ARMISD::VSHLLs : ARMISD::VSHLLu); 3423 break; 3424 case Intrinsic::arm_neon_vshiftn: 3425 VShiftOpc = ARMISD::VSHRN; break; 3426 case Intrinsic::arm_neon_vrshifts: 3427 VShiftOpc = ARMISD::VRSHRs; break; 3428 case Intrinsic::arm_neon_vrshiftu: 3429 VShiftOpc = ARMISD::VRSHRu; break; 3430 case Intrinsic::arm_neon_vrshiftn: 3431 VShiftOpc = ARMISD::VRSHRN; break; 3432 case Intrinsic::arm_neon_vqshifts: 3433 VShiftOpc = ARMISD::VQSHLs; break; 3434 case Intrinsic::arm_neon_vqshiftu: 3435 VShiftOpc = ARMISD::VQSHLu; break; 3436 case Intrinsic::arm_neon_vqshiftsu: 3437 VShiftOpc = ARMISD::VQSHLsu; break; 3438 case Intrinsic::arm_neon_vqshiftns: 3439 VShiftOpc = ARMISD::VQSHRNs; break; 3440 case Intrinsic::arm_neon_vqshiftnu: 3441 VShiftOpc = ARMISD::VQSHRNu; break; 3442 case Intrinsic::arm_neon_vqshiftnsu: 3443 VShiftOpc = ARMISD::VQSHRNsu; break; 3444 case Intrinsic::arm_neon_vqrshiftns: 3445 VShiftOpc = ARMISD::VQRSHRNs; break; 3446 case Intrinsic::arm_neon_vqrshiftnu: 3447 VShiftOpc = ARMISD::VQRSHRNu; break; 3448 case Intrinsic::arm_neon_vqrshiftnsu: 3449 VShiftOpc = ARMISD::VQRSHRNsu; break; 3450 } 3451 3452 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 3453 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 3454 } 3455 3456 case Intrinsic::arm_neon_vshiftins: { 3457 EVT VT = N->getOperand(1).getValueType(); 3458 int64_t Cnt; 3459 unsigned VShiftOpc = 0; 3460 3461 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 3462 VShiftOpc = ARMISD::VSLI; 3463 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 3464 VShiftOpc = ARMISD::VSRI; 3465 else { 3466 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 3467 } 3468 3469 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 3470 N->getOperand(1), N->getOperand(2), 3471 DAG.getConstant(Cnt, MVT::i32)); 3472 } 3473 3474 case Intrinsic::arm_neon_vqrshifts: 3475 case Intrinsic::arm_neon_vqrshiftu: 3476 // No immediate versions of these to check for. 3477 break; 3478 } 3479 3480 return SDValue(); 3481} 3482 3483/// PerformShiftCombine - Checks for immediate versions of vector shifts and 3484/// lowers them. As with the vector shift intrinsics, this is done during DAG 3485/// combining instead of DAG legalizing because the build_vectors for 64-bit 3486/// vector element shift counts are generally not legal, and it is hard to see 3487/// their values after they get legalized to loads from a constant pool. 3488static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 3489 const ARMSubtarget *ST) { 3490 EVT VT = N->getValueType(0); 3491 3492 // Nothing to be done for scalar shifts. 3493 if (! VT.isVector()) 3494 return SDValue(); 3495 3496 assert(ST->hasNEON() && "unexpected vector shift"); 3497 int64_t Cnt; 3498 3499 switch (N->getOpcode()) { 3500 default: llvm_unreachable("unexpected shift opcode"); 3501 3502 case ISD::SHL: 3503 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 3504 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 3505 DAG.getConstant(Cnt, MVT::i32)); 3506 break; 3507 3508 case ISD::SRA: 3509 case ISD::SRL: 3510 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 3511 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 3512 ARMISD::VSHRs : ARMISD::VSHRu); 3513 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 3514 DAG.getConstant(Cnt, MVT::i32)); 3515 } 3516 } 3517 return SDValue(); 3518} 3519 3520/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 3521/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 3522static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 3523 const ARMSubtarget *ST) { 3524 SDValue N0 = N->getOperand(0); 3525 3526 // Check for sign- and zero-extensions of vector extract operations of 8- 3527 // and 16-bit vector elements. NEON supports these directly. They are 3528 // handled during DAG combining because type legalization will promote them 3529 // to 32-bit types and it is messy to recognize the operations after that. 3530 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 3531 SDValue Vec = N0.getOperand(0); 3532 SDValue Lane = N0.getOperand(1); 3533 EVT VT = N->getValueType(0); 3534 EVT EltVT = N0.getValueType(); 3535 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3536 3537 if (VT == MVT::i32 && 3538 (EltVT == MVT::i8 || EltVT == MVT::i16) && 3539 TLI.isTypeLegal(Vec.getValueType())) { 3540 3541 unsigned Opc = 0; 3542 switch (N->getOpcode()) { 3543 default: llvm_unreachable("unexpected opcode"); 3544 case ISD::SIGN_EXTEND: 3545 Opc = ARMISD::VGETLANEs; 3546 break; 3547 case ISD::ZERO_EXTEND: 3548 case ISD::ANY_EXTEND: 3549 Opc = ARMISD::VGETLANEu; 3550 break; 3551 } 3552 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 3553 } 3554 } 3555 3556 return SDValue(); 3557} 3558 3559SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 3560 DAGCombinerInfo &DCI) const { 3561 switch (N->getOpcode()) { 3562 default: break; 3563 case ISD::ADD: return PerformADDCombine(N, DCI); 3564 case ISD::SUB: return PerformSUBCombine(N, DCI); 3565 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 3566 case ISD::INTRINSIC_WO_CHAIN: 3567 return PerformIntrinsicCombine(N, DCI.DAG); 3568 case ISD::SHL: 3569 case ISD::SRA: 3570 case ISD::SRL: 3571 return PerformShiftCombine(N, DCI.DAG, Subtarget); 3572 case ISD::SIGN_EXTEND: 3573 case ISD::ZERO_EXTEND: 3574 case ISD::ANY_EXTEND: 3575 return PerformExtendCombine(N, DCI.DAG, Subtarget); 3576 } 3577 return SDValue(); 3578} 3579 3580bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 3581 if (!Subtarget->hasV6Ops()) 3582 // Pre-v6 does not support unaligned mem access. 3583 return false; 3584 else if (!Subtarget->hasV6Ops()) { 3585 // v6 may or may not support unaligned mem access. 3586 if (!Subtarget->isTargetDarwin()) 3587 return false; 3588 } 3589 3590 switch (VT.getSimpleVT().SimpleTy) { 3591 default: 3592 return false; 3593 case MVT::i8: 3594 case MVT::i16: 3595 case MVT::i32: 3596 return true; 3597 // FIXME: VLD1 etc with standard alignment is legal. 3598 } 3599} 3600 3601static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 3602 if (V < 0) 3603 return false; 3604 3605 unsigned Scale = 1; 3606 switch (VT.getSimpleVT().SimpleTy) { 3607 default: return false; 3608 case MVT::i1: 3609 case MVT::i8: 3610 // Scale == 1; 3611 break; 3612 case MVT::i16: 3613 // Scale == 2; 3614 Scale = 2; 3615 break; 3616 case MVT::i32: 3617 // Scale == 4; 3618 Scale = 4; 3619 break; 3620 } 3621 3622 if ((V & (Scale - 1)) != 0) 3623 return false; 3624 V /= Scale; 3625 return V == (V & ((1LL << 5) - 1)); 3626} 3627 3628static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 3629 const ARMSubtarget *Subtarget) { 3630 bool isNeg = false; 3631 if (V < 0) { 3632 isNeg = true; 3633 V = - V; 3634 } 3635 3636 switch (VT.getSimpleVT().SimpleTy) { 3637 default: return false; 3638 case MVT::i1: 3639 case MVT::i8: 3640 case MVT::i16: 3641 case MVT::i32: 3642 // + imm12 or - imm8 3643 if (isNeg) 3644 return V == (V & ((1LL << 8) - 1)); 3645 return V == (V & ((1LL << 12) - 1)); 3646 case MVT::f32: 3647 case MVT::f64: 3648 // Same as ARM mode. FIXME: NEON? 3649 if (!Subtarget->hasVFP2()) 3650 return false; 3651 if ((V & 3) != 0) 3652 return false; 3653 V >>= 2; 3654 return V == (V & ((1LL << 8) - 1)); 3655 } 3656} 3657 3658/// isLegalAddressImmediate - Return true if the integer value can be used 3659/// as the offset of the target addressing mode for load / store of the 3660/// given type. 3661static bool isLegalAddressImmediate(int64_t V, EVT VT, 3662 const ARMSubtarget *Subtarget) { 3663 if (V == 0) 3664 return true; 3665 3666 if (!VT.isSimple()) 3667 return false; 3668 3669 if (Subtarget->isThumb1Only()) 3670 return isLegalT1AddressImmediate(V, VT); 3671 else if (Subtarget->isThumb2()) 3672 return isLegalT2AddressImmediate(V, VT, Subtarget); 3673 3674 // ARM mode. 3675 if (V < 0) 3676 V = - V; 3677 switch (VT.getSimpleVT().SimpleTy) { 3678 default: return false; 3679 case MVT::i1: 3680 case MVT::i8: 3681 case MVT::i32: 3682 // +- imm12 3683 return V == (V & ((1LL << 12) - 1)); 3684 case MVT::i16: 3685 // +- imm8 3686 return V == (V & ((1LL << 8) - 1)); 3687 case MVT::f32: 3688 case MVT::f64: 3689 if (!Subtarget->hasVFP2()) // FIXME: NEON? 3690 return false; 3691 if ((V & 3) != 0) 3692 return false; 3693 V >>= 2; 3694 return V == (V & ((1LL << 8) - 1)); 3695 } 3696} 3697 3698bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 3699 EVT VT) const { 3700 int Scale = AM.Scale; 3701 if (Scale < 0) 3702 return false; 3703 3704 switch (VT.getSimpleVT().SimpleTy) { 3705 default: return false; 3706 case MVT::i1: 3707 case MVT::i8: 3708 case MVT::i16: 3709 case MVT::i32: 3710 if (Scale == 1) 3711 return true; 3712 // r + r << imm 3713 Scale = Scale & ~1; 3714 return Scale == 2 || Scale == 4 || Scale == 8; 3715 case MVT::i64: 3716 // r + r 3717 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 3718 return true; 3719 return false; 3720 case MVT::isVoid: 3721 // Note, we allow "void" uses (basically, uses that aren't loads or 3722 // stores), because arm allows folding a scale into many arithmetic 3723 // operations. This should be made more precise and revisited later. 3724 3725 // Allow r << imm, but the imm has to be a multiple of two. 3726 if (Scale & 1) return false; 3727 return isPowerOf2_32(Scale); 3728 } 3729} 3730 3731/// isLegalAddressingMode - Return true if the addressing mode represented 3732/// by AM is legal for this target, for a load/store of the specified type. 3733bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3734 const Type *Ty) const { 3735 EVT VT = getValueType(Ty, true); 3736 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 3737 return false; 3738 3739 // Can never fold addr of global into load/store. 3740 if (AM.BaseGV) 3741 return false; 3742 3743 switch (AM.Scale) { 3744 case 0: // no scale reg, must be "r+i" or "r", or "i". 3745 break; 3746 case 1: 3747 if (Subtarget->isThumb1Only()) 3748 return false; 3749 // FALL THROUGH. 3750 default: 3751 // ARM doesn't support any R+R*scale+imm addr modes. 3752 if (AM.BaseOffs) 3753 return false; 3754 3755 if (!VT.isSimple()) 3756 return false; 3757 3758 if (Subtarget->isThumb2()) 3759 return isLegalT2ScaledAddressingMode(AM, VT); 3760 3761 int Scale = AM.Scale; 3762 switch (VT.getSimpleVT().SimpleTy) { 3763 default: return false; 3764 case MVT::i1: 3765 case MVT::i8: 3766 case MVT::i32: 3767 if (Scale < 0) Scale = -Scale; 3768 if (Scale == 1) 3769 return true; 3770 // r + r << imm 3771 return isPowerOf2_32(Scale & ~1); 3772 case MVT::i16: 3773 case MVT::i64: 3774 // r + r 3775 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 3776 return true; 3777 return false; 3778 3779 case MVT::isVoid: 3780 // Note, we allow "void" uses (basically, uses that aren't loads or 3781 // stores), because arm allows folding a scale into many arithmetic 3782 // operations. This should be made more precise and revisited later. 3783 3784 // Allow r << imm, but the imm has to be a multiple of two. 3785 if (Scale & 1) return false; 3786 return isPowerOf2_32(Scale); 3787 } 3788 break; 3789 } 3790 return true; 3791} 3792 3793/// isLegalICmpImmediate - Return true if the specified immediate is legal 3794/// icmp immediate, that is the target has icmp instructions which can compare 3795/// a register against the immediate without having to materialize the 3796/// immediate into a register. 3797bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 3798 if (!Subtarget->isThumb()) 3799 return ARM_AM::getSOImmVal(Imm) != -1; 3800 if (Subtarget->isThumb2()) 3801 return ARM_AM::getT2SOImmVal(Imm) != -1; 3802 return Imm >= 0 && Imm <= 255; 3803} 3804 3805static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 3806 bool isSEXTLoad, SDValue &Base, 3807 SDValue &Offset, bool &isInc, 3808 SelectionDAG &DAG) { 3809 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 3810 return false; 3811 3812 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 3813 // AddressingMode 3 3814 Base = Ptr->getOperand(0); 3815 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 3816 int RHSC = (int)RHS->getZExtValue(); 3817 if (RHSC < 0 && RHSC > -256) { 3818 assert(Ptr->getOpcode() == ISD::ADD); 3819 isInc = false; 3820 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 3821 return true; 3822 } 3823 } 3824 isInc = (Ptr->getOpcode() == ISD::ADD); 3825 Offset = Ptr->getOperand(1); 3826 return true; 3827 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 3828 // AddressingMode 2 3829 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 3830 int RHSC = (int)RHS->getZExtValue(); 3831 if (RHSC < 0 && RHSC > -0x1000) { 3832 assert(Ptr->getOpcode() == ISD::ADD); 3833 isInc = false; 3834 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 3835 Base = Ptr->getOperand(0); 3836 return true; 3837 } 3838 } 3839 3840 if (Ptr->getOpcode() == ISD::ADD) { 3841 isInc = true; 3842 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 3843 if (ShOpcVal != ARM_AM::no_shift) { 3844 Base = Ptr->getOperand(1); 3845 Offset = Ptr->getOperand(0); 3846 } else { 3847 Base = Ptr->getOperand(0); 3848 Offset = Ptr->getOperand(1); 3849 } 3850 return true; 3851 } 3852 3853 isInc = (Ptr->getOpcode() == ISD::ADD); 3854 Base = Ptr->getOperand(0); 3855 Offset = Ptr->getOperand(1); 3856 return true; 3857 } 3858 3859 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 3860 return false; 3861} 3862 3863static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 3864 bool isSEXTLoad, SDValue &Base, 3865 SDValue &Offset, bool &isInc, 3866 SelectionDAG &DAG) { 3867 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 3868 return false; 3869 3870 Base = Ptr->getOperand(0); 3871 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 3872 int RHSC = (int)RHS->getZExtValue(); 3873 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 3874 assert(Ptr->getOpcode() == ISD::ADD); 3875 isInc = false; 3876 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 3877 return true; 3878 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 3879 isInc = Ptr->getOpcode() == ISD::ADD; 3880 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 3881 return true; 3882 } 3883 } 3884 3885 return false; 3886} 3887 3888/// getPreIndexedAddressParts - returns true by value, base pointer and 3889/// offset pointer and addressing mode by reference if the node's address 3890/// can be legally represented as pre-indexed load / store address. 3891bool 3892ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 3893 SDValue &Offset, 3894 ISD::MemIndexedMode &AM, 3895 SelectionDAG &DAG) const { 3896 if (Subtarget->isThumb1Only()) 3897 return false; 3898 3899 EVT VT; 3900 SDValue Ptr; 3901 bool isSEXTLoad = false; 3902 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 3903 Ptr = LD->getBasePtr(); 3904 VT = LD->getMemoryVT(); 3905 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 3906 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 3907 Ptr = ST->getBasePtr(); 3908 VT = ST->getMemoryVT(); 3909 } else 3910 return false; 3911 3912 bool isInc; 3913 bool isLegal = false; 3914 if (Subtarget->isThumb2()) 3915 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 3916 Offset, isInc, DAG); 3917 else 3918 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 3919 Offset, isInc, DAG); 3920 if (!isLegal) 3921 return false; 3922 3923 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 3924 return true; 3925} 3926 3927/// getPostIndexedAddressParts - returns true by value, base pointer and 3928/// offset pointer and addressing mode by reference if this node can be 3929/// combined with a load / store to form a post-indexed load / store. 3930bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 3931 SDValue &Base, 3932 SDValue &Offset, 3933 ISD::MemIndexedMode &AM, 3934 SelectionDAG &DAG) const { 3935 if (Subtarget->isThumb1Only()) 3936 return false; 3937 3938 EVT VT; 3939 SDValue Ptr; 3940 bool isSEXTLoad = false; 3941 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 3942 VT = LD->getMemoryVT(); 3943 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 3944 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 3945 VT = ST->getMemoryVT(); 3946 } else 3947 return false; 3948 3949 bool isInc; 3950 bool isLegal = false; 3951 if (Subtarget->isThumb2()) 3952 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 3953 isInc, DAG); 3954 else 3955 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 3956 isInc, DAG); 3957 if (!isLegal) 3958 return false; 3959 3960 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 3961 return true; 3962} 3963 3964void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 3965 const APInt &Mask, 3966 APInt &KnownZero, 3967 APInt &KnownOne, 3968 const SelectionDAG &DAG, 3969 unsigned Depth) const { 3970 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 3971 switch (Op.getOpcode()) { 3972 default: break; 3973 case ARMISD::CMOV: { 3974 // Bits are known zero/one if known on the LHS and RHS. 3975 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 3976 if (KnownZero == 0 && KnownOne == 0) return; 3977 3978 APInt KnownZeroRHS, KnownOneRHS; 3979 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 3980 KnownZeroRHS, KnownOneRHS, Depth+1); 3981 KnownZero &= KnownZeroRHS; 3982 KnownOne &= KnownOneRHS; 3983 return; 3984 } 3985 } 3986} 3987 3988//===----------------------------------------------------------------------===// 3989// ARM Inline Assembly Support 3990//===----------------------------------------------------------------------===// 3991 3992/// getConstraintType - Given a constraint letter, return the type of 3993/// constraint it is for this target. 3994ARMTargetLowering::ConstraintType 3995ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 3996 if (Constraint.size() == 1) { 3997 switch (Constraint[0]) { 3998 default: break; 3999 case 'l': return C_RegisterClass; 4000 case 'w': return C_RegisterClass; 4001 } 4002 } 4003 return TargetLowering::getConstraintType(Constraint); 4004} 4005 4006std::pair<unsigned, const TargetRegisterClass*> 4007ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4008 EVT VT) const { 4009 if (Constraint.size() == 1) { 4010 // GCC RS6000 Constraint Letters 4011 switch (Constraint[0]) { 4012 case 'l': 4013 if (Subtarget->isThumb1Only()) 4014 return std::make_pair(0U, ARM::tGPRRegisterClass); 4015 else 4016 return std::make_pair(0U, ARM::GPRRegisterClass); 4017 case 'r': 4018 return std::make_pair(0U, ARM::GPRRegisterClass); 4019 case 'w': 4020 if (VT == MVT::f32) 4021 return std::make_pair(0U, ARM::SPRRegisterClass); 4022 if (VT == MVT::f64) 4023 return std::make_pair(0U, ARM::DPRRegisterClass); 4024 break; 4025 } 4026 } 4027 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4028} 4029 4030std::vector<unsigned> ARMTargetLowering:: 4031getRegClassForInlineAsmConstraint(const std::string &Constraint, 4032 EVT VT) const { 4033 if (Constraint.size() != 1) 4034 return std::vector<unsigned>(); 4035 4036 switch (Constraint[0]) { // GCC ARM Constraint Letters 4037 default: break; 4038 case 'l': 4039 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 4040 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 4041 0); 4042 case 'r': 4043 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 4044 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 4045 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 4046 ARM::R12, ARM::LR, 0); 4047 case 'w': 4048 if (VT == MVT::f32) 4049 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 4050 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 4051 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 4052 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 4053 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 4054 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 4055 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 4056 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 4057 if (VT == MVT::f64) 4058 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 4059 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 4060 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 4061 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 4062 break; 4063 } 4064 4065 return std::vector<unsigned>(); 4066} 4067 4068/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 4069/// vector. If it is invalid, don't add anything to Ops. 4070void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 4071 char Constraint, 4072 bool hasMemory, 4073 std::vector<SDValue>&Ops, 4074 SelectionDAG &DAG) const { 4075 SDValue Result(0, 0); 4076 4077 switch (Constraint) { 4078 default: break; 4079 case 'I': case 'J': case 'K': case 'L': 4080 case 'M': case 'N': case 'O': 4081 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 4082 if (!C) 4083 return; 4084 4085 int64_t CVal64 = C->getSExtValue(); 4086 int CVal = (int) CVal64; 4087 // None of these constraints allow values larger than 32 bits. Check 4088 // that the value fits in an int. 4089 if (CVal != CVal64) 4090 return; 4091 4092 switch (Constraint) { 4093 case 'I': 4094 if (Subtarget->isThumb1Only()) { 4095 // This must be a constant between 0 and 255, for ADD 4096 // immediates. 4097 if (CVal >= 0 && CVal <= 255) 4098 break; 4099 } else if (Subtarget->isThumb2()) { 4100 // A constant that can be used as an immediate value in a 4101 // data-processing instruction. 4102 if (ARM_AM::getT2SOImmVal(CVal) != -1) 4103 break; 4104 } else { 4105 // A constant that can be used as an immediate value in a 4106 // data-processing instruction. 4107 if (ARM_AM::getSOImmVal(CVal) != -1) 4108 break; 4109 } 4110 return; 4111 4112 case 'J': 4113 if (Subtarget->isThumb()) { // FIXME thumb2 4114 // This must be a constant between -255 and -1, for negated ADD 4115 // immediates. This can be used in GCC with an "n" modifier that 4116 // prints the negated value, for use with SUB instructions. It is 4117 // not useful otherwise but is implemented for compatibility. 4118 if (CVal >= -255 && CVal <= -1) 4119 break; 4120 } else { 4121 // This must be a constant between -4095 and 4095. It is not clear 4122 // what this constraint is intended for. Implemented for 4123 // compatibility with GCC. 4124 if (CVal >= -4095 && CVal <= 4095) 4125 break; 4126 } 4127 return; 4128 4129 case 'K': 4130 if (Subtarget->isThumb1Only()) { 4131 // A 32-bit value where only one byte has a nonzero value. Exclude 4132 // zero to match GCC. This constraint is used by GCC internally for 4133 // constants that can be loaded with a move/shift combination. 4134 // It is not useful otherwise but is implemented for compatibility. 4135 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 4136 break; 4137 } else if (Subtarget->isThumb2()) { 4138 // A constant whose bitwise inverse can be used as an immediate 4139 // value in a data-processing instruction. This can be used in GCC 4140 // with a "B" modifier that prints the inverted value, for use with 4141 // BIC and MVN instructions. It is not useful otherwise but is 4142 // implemented for compatibility. 4143 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 4144 break; 4145 } else { 4146 // A constant whose bitwise inverse can be used as an immediate 4147 // value in a data-processing instruction. This can be used in GCC 4148 // with a "B" modifier that prints the inverted value, for use with 4149 // BIC and MVN instructions. It is not useful otherwise but is 4150 // implemented for compatibility. 4151 if (ARM_AM::getSOImmVal(~CVal) != -1) 4152 break; 4153 } 4154 return; 4155 4156 case 'L': 4157 if (Subtarget->isThumb1Only()) { 4158 // This must be a constant between -7 and 7, 4159 // for 3-operand ADD/SUB immediate instructions. 4160 if (CVal >= -7 && CVal < 7) 4161 break; 4162 } else if (Subtarget->isThumb2()) { 4163 // A constant whose negation can be used as an immediate value in a 4164 // data-processing instruction. This can be used in GCC with an "n" 4165 // modifier that prints the negated value, for use with SUB 4166 // instructions. It is not useful otherwise but is implemented for 4167 // compatibility. 4168 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 4169 break; 4170 } else { 4171 // A constant whose negation can be used as an immediate value in a 4172 // data-processing instruction. This can be used in GCC with an "n" 4173 // modifier that prints the negated value, for use with SUB 4174 // instructions. It is not useful otherwise but is implemented for 4175 // compatibility. 4176 if (ARM_AM::getSOImmVal(-CVal) != -1) 4177 break; 4178 } 4179 return; 4180 4181 case 'M': 4182 if (Subtarget->isThumb()) { // FIXME thumb2 4183 // This must be a multiple of 4 between 0 and 1020, for 4184 // ADD sp + immediate. 4185 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 4186 break; 4187 } else { 4188 // A power of two or a constant between 0 and 32. This is used in 4189 // GCC for the shift amount on shifted register operands, but it is 4190 // useful in general for any shift amounts. 4191 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 4192 break; 4193 } 4194 return; 4195 4196 case 'N': 4197 if (Subtarget->isThumb()) { // FIXME thumb2 4198 // This must be a constant between 0 and 31, for shift amounts. 4199 if (CVal >= 0 && CVal <= 31) 4200 break; 4201 } 4202 return; 4203 4204 case 'O': 4205 if (Subtarget->isThumb()) { // FIXME thumb2 4206 // This must be a multiple of 4 between -508 and 508, for 4207 // ADD/SUB sp = sp + immediate. 4208 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 4209 break; 4210 } 4211 return; 4212 } 4213 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 4214 break; 4215 } 4216 4217 if (Result.getNode()) { 4218 Ops.push_back(Result); 4219 return; 4220 } 4221 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 4222 Ops, DAG); 4223} 4224 4225bool 4226ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 4227 // The ARM target isn't yet aware of offsets. 4228 return false; 4229} 4230 4231int ARM::getVFPf32Imm(const APFloat &FPImm) { 4232 APInt Imm = FPImm.bitcastToAPInt(); 4233 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 4234 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 4235 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 4236 4237 // We can handle 4 bits of mantissa. 4238 // mantissa = (16+UInt(e:f:g:h))/16. 4239 if (Mantissa & 0x7ffff) 4240 return -1; 4241 Mantissa >>= 19; 4242 if ((Mantissa & 0xf) != Mantissa) 4243 return -1; 4244 4245 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 4246 if (Exp < -3 || Exp > 4) 4247 return -1; 4248 Exp = ((Exp+3) & 0x7) ^ 4; 4249 4250 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 4251} 4252 4253int ARM::getVFPf64Imm(const APFloat &FPImm) { 4254 APInt Imm = FPImm.bitcastToAPInt(); 4255 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 4256 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 4257 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 4258 4259 // We can handle 4 bits of mantissa. 4260 // mantissa = (16+UInt(e:f:g:h))/16. 4261 if (Mantissa & 0xffffffffffffLL) 4262 return -1; 4263 Mantissa >>= 48; 4264 if ((Mantissa & 0xf) != Mantissa) 4265 return -1; 4266 4267 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 4268 if (Exp < -3 || Exp > 4) 4269 return -1; 4270 Exp = ((Exp+3) & 0x7) ^ 4; 4271 4272 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 4273} 4274 4275/// isFPImmLegal - Returns true if the target can instruction select the 4276/// specified FP immediate natively. If false, the legalizer will 4277/// materialize the FP immediate as a load from a constant pool. 4278bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 4279 if (!Subtarget->hasVFP3()) 4280 return false; 4281 if (VT == MVT::f32) 4282 return ARM::getVFPf32Imm(Imm) != -1; 4283 if (VT == MVT::f64) 4284 return ARM::getVFPf64Imm(Imm) != -1; 4285 return false; 4286} 4287