ARMISelLowering.cpp revision c9aed19747608b7688a64f2f382a008889f8e57d
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMConstantPoolValue.h" 19#include "ARMISelLowering.h" 20#include "ARMMachineFunctionInfo.h" 21#include "ARMPerfectShuffle.h" 22#include "ARMRegisterInfo.h" 23#include "ARMSubtarget.h" 24#include "ARMTargetMachine.h" 25#include "ARMTargetObjectFile.h" 26#include "llvm/CallingConv.h" 27#include "llvm/Constants.h" 28#include "llvm/Function.h" 29#include "llvm/GlobalValue.h" 30#include "llvm/Instruction.h" 31#include "llvm/Intrinsics.h" 32#include "llvm/Type.h" 33#include "llvm/CodeGen/CallingConvLower.h" 34#include "llvm/CodeGen/MachineBasicBlock.h" 35#include "llvm/CodeGen/MachineFrameInfo.h" 36#include "llvm/CodeGen/MachineFunction.h" 37#include "llvm/CodeGen/MachineInstrBuilder.h" 38#include "llvm/CodeGen/MachineRegisterInfo.h" 39#include "llvm/CodeGen/PseudoSourceValue.h" 40#include "llvm/CodeGen/SelectionDAG.h" 41#include "llvm/MC/MCSectionMachO.h" 42#include "llvm/Target/TargetOptions.h" 43#include "llvm/ADT/VectorExtras.h" 44#include "llvm/ADT/Statistic.h" 45#include "llvm/Support/CommandLine.h" 46#include "llvm/Support/ErrorHandling.h" 47#include "llvm/Support/MathExtras.h" 48#include "llvm/Support/raw_ostream.h" 49#include <sstream> 50using namespace llvm; 51 52STATISTIC(NumTailCalls, "Number of tail calls"); 53 54// This option should go away when Machine LICM is smart enough to hoist a 55// reg-to-reg VDUP. 56static cl::opt<bool> 57EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, 58 cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), 59 cl::init(false)); 60 61static cl::opt<bool> 62EnableARMLongCalls("arm-long-calls", cl::Hidden, 63 cl::desc("Generate calls via indirect call instructions"), 64 cl::init(false)); 65 66static cl::opt<bool> 67ARMInterworking("arm-interworking", cl::Hidden, 68 cl::desc("Enable / disable ARM interworking (for debugging only)"), 69 cl::init(true)); 70 71static cl::opt<bool> 72EnableARMCodePlacement("arm-code-placement", cl::Hidden, 73 cl::desc("Enable code placement pass for ARM"), 74 cl::init(false)); 75 76static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 77 CCValAssign::LocInfo &LocInfo, 78 ISD::ArgFlagsTy &ArgFlags, 79 CCState &State); 80static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 81 CCValAssign::LocInfo &LocInfo, 82 ISD::ArgFlagsTy &ArgFlags, 83 CCState &State); 84static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 85 CCValAssign::LocInfo &LocInfo, 86 ISD::ArgFlagsTy &ArgFlags, 87 CCState &State); 88static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 89 CCValAssign::LocInfo &LocInfo, 90 ISD::ArgFlagsTy &ArgFlags, 91 CCState &State); 92 93void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 94 EVT PromotedBitwiseVT) { 95 if (VT != PromotedLdStVT) { 96 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 97 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 98 PromotedLdStVT.getSimpleVT()); 99 100 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 101 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 102 PromotedLdStVT.getSimpleVT()); 103 } 104 105 EVT ElemTy = VT.getVectorElementType(); 106 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 107 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 108 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 109 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 110 if (ElemTy != MVT::i32) { 111 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 112 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 113 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 114 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 115 } 116 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 117 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 118 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 119 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 120 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 121 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 122 if (VT.isInteger()) { 123 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 124 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 125 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 126 } 127 128 // Promote all bit-wise operations. 129 if (VT.isInteger() && VT != PromotedBitwiseVT) { 130 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 131 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 132 PromotedBitwiseVT.getSimpleVT()); 133 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 134 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 135 PromotedBitwiseVT.getSimpleVT()); 136 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 137 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 138 PromotedBitwiseVT.getSimpleVT()); 139 } 140 141 // Neon does not support vector divide/remainder operations. 142 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 143 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 144 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 145 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 146 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 147 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 148} 149 150void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 151 addRegisterClass(VT, ARM::DPRRegisterClass); 152 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 153} 154 155void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 156 addRegisterClass(VT, ARM::QPRRegisterClass); 157 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 158} 159 160static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 161 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 162 return new TargetLoweringObjectFileMachO(); 163 164 return new ARMElfTargetObjectFile(); 165} 166 167ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 168 : TargetLowering(TM, createTLOF(TM)) { 169 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 170 RegInfo = TM.getRegisterInfo(); 171 172 if (Subtarget->isTargetDarwin()) { 173 // Uses VFP for Thumb libfuncs if available. 174 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 175 // Single-precision floating-point arithmetic. 176 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 177 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 178 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 179 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 180 181 // Double-precision floating-point arithmetic. 182 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 183 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 184 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 185 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 186 187 // Single-precision comparisons. 188 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 189 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 190 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 191 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 192 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 193 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 194 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 195 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 196 197 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 198 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 199 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 200 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 201 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 202 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 203 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 204 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 205 206 // Double-precision comparisons. 207 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 208 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 209 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 210 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 211 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 212 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 213 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 214 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 215 216 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 217 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 218 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 219 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 220 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 221 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 222 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 223 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 224 225 // Floating-point to integer conversions. 226 // i64 conversions are done via library routines even when generating VFP 227 // instructions, so use the same ones. 228 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 229 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 230 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 231 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 232 233 // Conversions between floating types. 234 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 235 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 236 237 // Integer to floating-point conversions. 238 // i64 conversions are done via library routines even when generating VFP 239 // instructions, so use the same ones. 240 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 241 // e.g., __floatunsidf vs. __floatunssidfvfp. 242 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 243 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 244 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 245 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 246 } 247 } 248 249 // These libcalls are not available in 32-bit. 250 setLibcallName(RTLIB::SHL_I128, 0); 251 setLibcallName(RTLIB::SRL_I128, 0); 252 setLibcallName(RTLIB::SRA_I128, 0); 253 254 // Libcalls should use the AAPCS base standard ABI, even if hard float 255 // is in effect, as per the ARM RTABI specification, section 4.1.2. 256 if (Subtarget->isAAPCS_ABI()) { 257 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { 258 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), 259 CallingConv::ARM_AAPCS); 260 } 261 } 262 263 if (Subtarget->isThumb1Only()) 264 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 265 else 266 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 267 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 268 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 269 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 270 271 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 272 } 273 274 if (Subtarget->hasNEON()) { 275 addDRTypeForNEON(MVT::v2f32); 276 addDRTypeForNEON(MVT::v8i8); 277 addDRTypeForNEON(MVT::v4i16); 278 addDRTypeForNEON(MVT::v2i32); 279 addDRTypeForNEON(MVT::v1i64); 280 281 addQRTypeForNEON(MVT::v4f32); 282 addQRTypeForNEON(MVT::v2f64); 283 addQRTypeForNEON(MVT::v16i8); 284 addQRTypeForNEON(MVT::v8i16); 285 addQRTypeForNEON(MVT::v4i32); 286 addQRTypeForNEON(MVT::v2i64); 287 288 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 289 // neither Neon nor VFP support any arithmetic operations on it. 290 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 291 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 292 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 293 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 294 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 295 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 296 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 297 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 298 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 299 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 300 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 301 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 302 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 303 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 304 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 305 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 306 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 307 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 308 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 309 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 310 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 311 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 312 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 313 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 314 315 // Neon does not support some operations on v1i64 and v2i64 types. 316 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 317 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 318 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 319 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 320 321 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 322 setTargetDAGCombine(ISD::SHL); 323 setTargetDAGCombine(ISD::SRL); 324 setTargetDAGCombine(ISD::SRA); 325 setTargetDAGCombine(ISD::SIGN_EXTEND); 326 setTargetDAGCombine(ISD::ZERO_EXTEND); 327 setTargetDAGCombine(ISD::ANY_EXTEND); 328 setTargetDAGCombine(ISD::SELECT_CC); 329 } 330 331 computeRegisterProperties(); 332 333 // ARM does not have f32 extending load. 334 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 335 336 // ARM does not have i1 sign extending load. 337 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 338 339 // ARM supports all 4 flavors of integer indexed load / store. 340 if (!Subtarget->isThumb1Only()) { 341 for (unsigned im = (unsigned)ISD::PRE_INC; 342 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 343 setIndexedLoadAction(im, MVT::i1, Legal); 344 setIndexedLoadAction(im, MVT::i8, Legal); 345 setIndexedLoadAction(im, MVT::i16, Legal); 346 setIndexedLoadAction(im, MVT::i32, Legal); 347 setIndexedStoreAction(im, MVT::i1, Legal); 348 setIndexedStoreAction(im, MVT::i8, Legal); 349 setIndexedStoreAction(im, MVT::i16, Legal); 350 setIndexedStoreAction(im, MVT::i32, Legal); 351 } 352 } 353 354 // i64 operation support. 355 if (Subtarget->isThumb1Only()) { 356 setOperationAction(ISD::MUL, MVT::i64, Expand); 357 setOperationAction(ISD::MULHU, MVT::i32, Expand); 358 setOperationAction(ISD::MULHS, MVT::i32, Expand); 359 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 360 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 361 } else { 362 setOperationAction(ISD::MUL, MVT::i64, Expand); 363 setOperationAction(ISD::MULHU, MVT::i32, Expand); 364 if (!Subtarget->hasV6Ops()) 365 setOperationAction(ISD::MULHS, MVT::i32, Expand); 366 } 367 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 368 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 369 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 370 setOperationAction(ISD::SRL, MVT::i64, Custom); 371 setOperationAction(ISD::SRA, MVT::i64, Custom); 372 373 // ARM does not have ROTL. 374 setOperationAction(ISD::ROTL, MVT::i32, Expand); 375 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 376 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 377 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 378 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 379 380 // Only ARMv6 has BSWAP. 381 if (!Subtarget->hasV6Ops()) 382 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 383 384 // These are expanded into libcalls. 385 if (!Subtarget->hasDivide()) { 386 // v7M has a hardware divider 387 setOperationAction(ISD::SDIV, MVT::i32, Expand); 388 setOperationAction(ISD::UDIV, MVT::i32, Expand); 389 } 390 setOperationAction(ISD::SREM, MVT::i32, Expand); 391 setOperationAction(ISD::UREM, MVT::i32, Expand); 392 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 393 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 394 395 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 396 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 397 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 398 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 399 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 400 401 setOperationAction(ISD::TRAP, MVT::Other, Legal); 402 403 // Use the default implementation. 404 setOperationAction(ISD::VASTART, MVT::Other, Custom); 405 setOperationAction(ISD::VAARG, MVT::Other, Expand); 406 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 407 setOperationAction(ISD::VAEND, MVT::Other, Expand); 408 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 409 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 410 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 411 // FIXME: Shouldn't need this, since no register is used, but the legalizer 412 // doesn't yet know how to not do that for SjLj. 413 setExceptionSelectorRegister(ARM::R0); 414 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 415 // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise 416 // use the default expansion. 417 bool canHandleAtomics = 418 (Subtarget->hasV7Ops() || 419 (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); 420 if (canHandleAtomics) { 421 // membarrier needs custom lowering; the rest are legal and handled 422 // normally. 423 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 424 } else { 425 // Set them all for expansion, which will force libcalls. 426 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 427 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 428 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 429 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 430 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 431 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 432 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 433 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 434 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 435 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 436 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 437 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 438 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 439 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 440 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 441 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 442 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 443 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 444 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 445 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 446 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 447 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 448 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 449 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 450 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 451 // Since the libcalls include locking, fold in the fences 452 setShouldFoldAtomicFences(true); 453 } 454 // 64-bit versions are always libcalls (for now) 455 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 456 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 457 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 458 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 459 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 460 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 461 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 462 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 463 464 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 465 if (!Subtarget->hasV6Ops()) { 466 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 467 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 468 } 469 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 470 471 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 472 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 473 // iff target supports vfp2. 474 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 475 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 476 } 477 478 // We want to custom lower some of our intrinsics. 479 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 480 if (Subtarget->isTargetDarwin()) { 481 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 482 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 483 } 484 485 setOperationAction(ISD::SETCC, MVT::i32, Expand); 486 setOperationAction(ISD::SETCC, MVT::f32, Expand); 487 setOperationAction(ISD::SETCC, MVT::f64, Expand); 488 setOperationAction(ISD::SELECT, MVT::i32, Expand); 489 setOperationAction(ISD::SELECT, MVT::f32, Expand); 490 setOperationAction(ISD::SELECT, MVT::f64, Expand); 491 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 492 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 493 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 494 495 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 496 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 497 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 498 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 499 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 500 501 // We don't support sin/cos/fmod/copysign/pow 502 setOperationAction(ISD::FSIN, MVT::f64, Expand); 503 setOperationAction(ISD::FSIN, MVT::f32, Expand); 504 setOperationAction(ISD::FCOS, MVT::f32, Expand); 505 setOperationAction(ISD::FCOS, MVT::f64, Expand); 506 setOperationAction(ISD::FREM, MVT::f64, Expand); 507 setOperationAction(ISD::FREM, MVT::f32, Expand); 508 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 509 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 510 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 511 } 512 setOperationAction(ISD::FPOW, MVT::f64, Expand); 513 setOperationAction(ISD::FPOW, MVT::f32, Expand); 514 515 // Various VFP goodness 516 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 517 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 518 if (Subtarget->hasVFP2()) { 519 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 520 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 521 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 522 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 523 } 524 // Special handling for half-precision FP. 525 if (!Subtarget->hasFP16()) { 526 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 527 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 528 } 529 } 530 531 // We have target-specific dag combine patterns for the following nodes: 532 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 533 setTargetDAGCombine(ISD::ADD); 534 setTargetDAGCombine(ISD::SUB); 535 setTargetDAGCombine(ISD::MUL); 536 537 if (Subtarget->hasV6T2Ops()) 538 setTargetDAGCombine(ISD::OR); 539 540 setStackPointerRegisterToSaveRestore(ARM::SP); 541 542 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 543 setSchedulingPreference(Sched::RegPressure); 544 else 545 setSchedulingPreference(Sched::Hybrid); 546 547 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 548 549 // On ARM arguments smaller than 4 bytes are extended, so all arguments 550 // are at least 4 bytes aligned. 551 setMinStackArgumentAlignment(4); 552 553 if (EnableARMCodePlacement) 554 benefitFromCodePlacementOpt = true; 555} 556 557std::pair<const TargetRegisterClass*, uint8_t> 558ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 559 const TargetRegisterClass *RRC = 0; 560 uint8_t Cost = 1; 561 switch (VT.getSimpleVT().SimpleTy) { 562 default: 563 return TargetLowering::findRepresentativeClass(VT); 564 // Use DPR as representative register class for all floating point 565 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 566 // the cost is 1 for both f32 and f64. 567 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 568 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 569 RRC = ARM::DPRRegisterClass; 570 break; 571 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 572 case MVT::v4f32: case MVT::v2f64: 573 RRC = ARM::DPRRegisterClass; 574 Cost = 2; 575 break; 576 case MVT::v4i64: 577 RRC = ARM::DPRRegisterClass; 578 Cost = 4; 579 break; 580 case MVT::v8i64: 581 RRC = ARM::DPRRegisterClass; 582 Cost = 8; 583 break; 584 } 585 return std::make_pair(RRC, Cost); 586} 587 588const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 589 switch (Opcode) { 590 default: return 0; 591 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 592 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 593 case ARMISD::CALL: return "ARMISD::CALL"; 594 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 595 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 596 case ARMISD::tCALL: return "ARMISD::tCALL"; 597 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 598 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 599 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 600 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 601 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 602 case ARMISD::CMP: return "ARMISD::CMP"; 603 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 604 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 605 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 606 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 607 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 608 case ARMISD::CMOV: return "ARMISD::CMOV"; 609 case ARMISD::CNEG: return "ARMISD::CNEG"; 610 611 case ARMISD::RBIT: return "ARMISD::RBIT"; 612 613 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 614 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 615 case ARMISD::SITOF: return "ARMISD::SITOF"; 616 case ARMISD::UITOF: return "ARMISD::UITOF"; 617 618 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 619 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 620 case ARMISD::RRX: return "ARMISD::RRX"; 621 622 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 623 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 624 625 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 626 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 627 628 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 629 630 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 631 632 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 633 634 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 635 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; 636 637 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 638 case ARMISD::VCGE: return "ARMISD::VCGE"; 639 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 640 case ARMISD::VCGT: return "ARMISD::VCGT"; 641 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 642 case ARMISD::VTST: return "ARMISD::VTST"; 643 644 case ARMISD::VSHL: return "ARMISD::VSHL"; 645 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 646 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 647 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 648 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 649 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 650 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 651 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 652 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 653 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 654 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 655 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 656 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 657 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 658 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 659 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 660 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 661 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 662 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 663 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 664 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 665 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 666 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 667 case ARMISD::VDUP: return "ARMISD::VDUP"; 668 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 669 case ARMISD::VEXT: return "ARMISD::VEXT"; 670 case ARMISD::VREV64: return "ARMISD::VREV64"; 671 case ARMISD::VREV32: return "ARMISD::VREV32"; 672 case ARMISD::VREV16: return "ARMISD::VREV16"; 673 case ARMISD::VZIP: return "ARMISD::VZIP"; 674 case ARMISD::VUZP: return "ARMISD::VUZP"; 675 case ARMISD::VTRN: return "ARMISD::VTRN"; 676 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 677 case ARMISD::FMAX: return "ARMISD::FMAX"; 678 case ARMISD::FMIN: return "ARMISD::FMIN"; 679 case ARMISD::BFI: return "ARMISD::BFI"; 680 } 681} 682 683/// getRegClassFor - Return the register class that should be used for the 684/// specified value type. 685TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 686 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 687 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 688 // load / store 4 to 8 consecutive D registers. 689 if (Subtarget->hasNEON()) { 690 if (VT == MVT::v4i64) 691 return ARM::QQPRRegisterClass; 692 else if (VT == MVT::v8i64) 693 return ARM::QQQQPRRegisterClass; 694 } 695 return TargetLowering::getRegClassFor(VT); 696} 697 698// Create a fast isel object. 699FastISel * 700ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 701 return ARM::createFastISel(funcInfo); 702} 703 704/// getFunctionAlignment - Return the Log2 alignment of this function. 705unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 706 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; 707} 708 709/// getMaximalGlobalOffset - Returns the maximal possible offset which can 710/// be used for loads / stores from the global. 711unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 712 return (Subtarget->isThumb1Only() ? 127 : 4095); 713} 714 715Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 716 unsigned NumVals = N->getNumValues(); 717 if (!NumVals) 718 return Sched::RegPressure; 719 720 for (unsigned i = 0; i != NumVals; ++i) { 721 EVT VT = N->getValueType(i); 722 if (VT.isFloatingPoint() || VT.isVector()) 723 return Sched::Latency; 724 } 725 726 if (!N->isMachineOpcode()) 727 return Sched::RegPressure; 728 729 // Load are scheduled for latency even if there instruction itinerary 730 // is not available. 731 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 732 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 733 if (TID.mayLoad()) 734 return Sched::Latency; 735 736 const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); 737 if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) 738 return Sched::Latency; 739 return Sched::RegPressure; 740} 741 742unsigned 743ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, 744 MachineFunction &MF) const { 745 switch (RC->getID()) { 746 default: 747 return 0; 748 case ARM::tGPRRegClassID: 749 return RegInfo->hasFP(MF) ? 4 : 5; 750 case ARM::GPRRegClassID: { 751 unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; 752 return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); 753 } 754 case ARM::SPRRegClassID: // Currently not used as 'rep' register class. 755 case ARM::DPRRegClassID: 756 return 32 - 10; 757 } 758} 759 760//===----------------------------------------------------------------------===// 761// Lowering Code 762//===----------------------------------------------------------------------===// 763 764/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 765static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 766 switch (CC) { 767 default: llvm_unreachable("Unknown condition code!"); 768 case ISD::SETNE: return ARMCC::NE; 769 case ISD::SETEQ: return ARMCC::EQ; 770 case ISD::SETGT: return ARMCC::GT; 771 case ISD::SETGE: return ARMCC::GE; 772 case ISD::SETLT: return ARMCC::LT; 773 case ISD::SETLE: return ARMCC::LE; 774 case ISD::SETUGT: return ARMCC::HI; 775 case ISD::SETUGE: return ARMCC::HS; 776 case ISD::SETULT: return ARMCC::LO; 777 case ISD::SETULE: return ARMCC::LS; 778 } 779} 780 781/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 782static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 783 ARMCC::CondCodes &CondCode2) { 784 CondCode2 = ARMCC::AL; 785 switch (CC) { 786 default: llvm_unreachable("Unknown FP condition!"); 787 case ISD::SETEQ: 788 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 789 case ISD::SETGT: 790 case ISD::SETOGT: CondCode = ARMCC::GT; break; 791 case ISD::SETGE: 792 case ISD::SETOGE: CondCode = ARMCC::GE; break; 793 case ISD::SETOLT: CondCode = ARMCC::MI; break; 794 case ISD::SETOLE: CondCode = ARMCC::LS; break; 795 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 796 case ISD::SETO: CondCode = ARMCC::VC; break; 797 case ISD::SETUO: CondCode = ARMCC::VS; break; 798 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 799 case ISD::SETUGT: CondCode = ARMCC::HI; break; 800 case ISD::SETUGE: CondCode = ARMCC::PL; break; 801 case ISD::SETLT: 802 case ISD::SETULT: CondCode = ARMCC::LT; break; 803 case ISD::SETLE: 804 case ISD::SETULE: CondCode = ARMCC::LE; break; 805 case ISD::SETNE: 806 case ISD::SETUNE: CondCode = ARMCC::NE; break; 807 } 808} 809 810//===----------------------------------------------------------------------===// 811// Calling Convention Implementation 812//===----------------------------------------------------------------------===// 813 814#include "ARMGenCallingConv.inc" 815 816// APCS f64 is in register pairs, possibly split to stack 817static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 818 CCValAssign::LocInfo &LocInfo, 819 CCState &State, bool CanFail) { 820 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; 821 822 // Try to get the first register. 823 if (unsigned Reg = State.AllocateReg(RegList, 4)) 824 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 825 else { 826 // For the 2nd half of a v2f64, do not fail. 827 if (CanFail) 828 return false; 829 830 // Put the whole thing on the stack. 831 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 832 State.AllocateStack(8, 4), 833 LocVT, LocInfo)); 834 return true; 835 } 836 837 // Try to get the second register. 838 if (unsigned Reg = State.AllocateReg(RegList, 4)) 839 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 840 else 841 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 842 State.AllocateStack(4, 4), 843 LocVT, LocInfo)); 844 return true; 845} 846 847static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 848 CCValAssign::LocInfo &LocInfo, 849 ISD::ArgFlagsTy &ArgFlags, 850 CCState &State) { 851 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 852 return false; 853 if (LocVT == MVT::v2f64 && 854 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 855 return false; 856 return true; // we handled it 857} 858 859// AAPCS f64 is in aligned register pairs 860static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 861 CCValAssign::LocInfo &LocInfo, 862 CCState &State, bool CanFail) { 863 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 864 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 865 static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; 866 867 unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); 868 if (Reg == 0) { 869 // For the 2nd half of a v2f64, do not just fail. 870 if (CanFail) 871 return false; 872 873 // Put the whole thing on the stack. 874 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 875 State.AllocateStack(8, 8), 876 LocVT, LocInfo)); 877 return true; 878 } 879 880 unsigned i; 881 for (i = 0; i < 2; ++i) 882 if (HiRegList[i] == Reg) 883 break; 884 885 unsigned T = State.AllocateReg(LoRegList[i]); 886 (void)T; 887 assert(T == LoRegList[i] && "Could not allocate register"); 888 889 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 890 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 891 LocVT, LocInfo)); 892 return true; 893} 894 895static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 896 CCValAssign::LocInfo &LocInfo, 897 ISD::ArgFlagsTy &ArgFlags, 898 CCState &State) { 899 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 900 return false; 901 if (LocVT == MVT::v2f64 && 902 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 903 return false; 904 return true; // we handled it 905} 906 907static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 908 CCValAssign::LocInfo &LocInfo, CCState &State) { 909 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 910 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 911 912 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 913 if (Reg == 0) 914 return false; // we didn't handle it 915 916 unsigned i; 917 for (i = 0; i < 2; ++i) 918 if (HiRegList[i] == Reg) 919 break; 920 921 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 922 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 923 LocVT, LocInfo)); 924 return true; 925} 926 927static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 928 CCValAssign::LocInfo &LocInfo, 929 ISD::ArgFlagsTy &ArgFlags, 930 CCState &State) { 931 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 932 return false; 933 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 934 return false; 935 return true; // we handled it 936} 937 938static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 939 CCValAssign::LocInfo &LocInfo, 940 ISD::ArgFlagsTy &ArgFlags, 941 CCState &State) { 942 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, 943 State); 944} 945 946/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 947/// given CallingConvention value. 948CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 949 bool Return, 950 bool isVarArg) const { 951 switch (CC) { 952 default: 953 llvm_unreachable("Unsupported calling convention"); 954 case CallingConv::C: 955 case CallingConv::Fast: 956 // Use target triple & subtarget features to do actual dispatch. 957 if (Subtarget->isAAPCS_ABI()) { 958 if (Subtarget->hasVFP2() && 959 FloatABIType == FloatABI::Hard && !isVarArg) 960 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 961 else 962 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 963 } else 964 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 965 case CallingConv::ARM_AAPCS_VFP: 966 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 967 case CallingConv::ARM_AAPCS: 968 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 969 case CallingConv::ARM_APCS: 970 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 971 } 972} 973 974/// LowerCallResult - Lower the result values of a call into the 975/// appropriate copies out of appropriate physical registers. 976SDValue 977ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 978 CallingConv::ID CallConv, bool isVarArg, 979 const SmallVectorImpl<ISD::InputArg> &Ins, 980 DebugLoc dl, SelectionDAG &DAG, 981 SmallVectorImpl<SDValue> &InVals) const { 982 983 // Assign locations to each value returned by this call. 984 SmallVector<CCValAssign, 16> RVLocs; 985 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 986 RVLocs, *DAG.getContext()); 987 CCInfo.AnalyzeCallResult(Ins, 988 CCAssignFnForNode(CallConv, /* Return*/ true, 989 isVarArg)); 990 991 // Copy all of the result registers out of their specified physreg. 992 for (unsigned i = 0; i != RVLocs.size(); ++i) { 993 CCValAssign VA = RVLocs[i]; 994 995 SDValue Val; 996 if (VA.needsCustom()) { 997 // Handle f64 or half of a v2f64. 998 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 999 InFlag); 1000 Chain = Lo.getValue(1); 1001 InFlag = Lo.getValue(2); 1002 VA = RVLocs[++i]; // skip ahead to next loc 1003 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1004 InFlag); 1005 Chain = Hi.getValue(1); 1006 InFlag = Hi.getValue(2); 1007 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1008 1009 if (VA.getLocVT() == MVT::v2f64) { 1010 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1011 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1012 DAG.getConstant(0, MVT::i32)); 1013 1014 VA = RVLocs[++i]; // skip ahead to next loc 1015 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1016 Chain = Lo.getValue(1); 1017 InFlag = Lo.getValue(2); 1018 VA = RVLocs[++i]; // skip ahead to next loc 1019 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1020 Chain = Hi.getValue(1); 1021 InFlag = Hi.getValue(2); 1022 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1023 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1024 DAG.getConstant(1, MVT::i32)); 1025 } 1026 } else { 1027 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1028 InFlag); 1029 Chain = Val.getValue(1); 1030 InFlag = Val.getValue(2); 1031 } 1032 1033 switch (VA.getLocInfo()) { 1034 default: llvm_unreachable("Unknown loc info!"); 1035 case CCValAssign::Full: break; 1036 case CCValAssign::BCvt: 1037 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 1038 break; 1039 } 1040 1041 InVals.push_back(Val); 1042 } 1043 1044 return Chain; 1045} 1046 1047/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1048/// by "Src" to address "Dst" of size "Size". Alignment information is 1049/// specified by the specific parameter attribute. The copy will be passed as 1050/// a byval function parameter. 1051/// Sometimes what we are copying is the end of a larger object, the part that 1052/// does not fit in registers. 1053static SDValue 1054CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1055 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1056 DebugLoc dl) { 1057 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1058 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 1059 /*isVolatile=*/false, /*AlwaysInline=*/false, 1060 NULL, 0, NULL, 0); 1061} 1062 1063/// LowerMemOpCallTo - Store the argument to the stack. 1064SDValue 1065ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1066 SDValue StackPtr, SDValue Arg, 1067 DebugLoc dl, SelectionDAG &DAG, 1068 const CCValAssign &VA, 1069 ISD::ArgFlagsTy Flags) const { 1070 unsigned LocMemOffset = VA.getLocMemOffset(); 1071 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1072 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1073 if (Flags.isByVal()) { 1074 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1075 } 1076 return DAG.getStore(Chain, dl, Arg, PtrOff, 1077 PseudoSourceValue::getStack(), LocMemOffset, 1078 false, false, 0); 1079} 1080 1081void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1082 SDValue Chain, SDValue &Arg, 1083 RegsToPassVector &RegsToPass, 1084 CCValAssign &VA, CCValAssign &NextVA, 1085 SDValue &StackPtr, 1086 SmallVector<SDValue, 8> &MemOpChains, 1087 ISD::ArgFlagsTy Flags) const { 1088 1089 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1090 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1091 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1092 1093 if (NextVA.isRegLoc()) 1094 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1095 else { 1096 assert(NextVA.isMemLoc()); 1097 if (StackPtr.getNode() == 0) 1098 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1099 1100 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1101 dl, DAG, NextVA, 1102 Flags)); 1103 } 1104} 1105 1106/// LowerCall - Lowering a call into a callseq_start <- 1107/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1108/// nodes. 1109SDValue 1110ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1111 CallingConv::ID CallConv, bool isVarArg, 1112 bool &isTailCall, 1113 const SmallVectorImpl<ISD::OutputArg> &Outs, 1114 const SmallVectorImpl<SDValue> &OutVals, 1115 const SmallVectorImpl<ISD::InputArg> &Ins, 1116 DebugLoc dl, SelectionDAG &DAG, 1117 SmallVectorImpl<SDValue> &InVals) const { 1118 MachineFunction &MF = DAG.getMachineFunction(); 1119 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1120 bool IsSibCall = false; 1121 if (isTailCall) { 1122 // Check if it's really possible to do a tail call. 1123 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1124 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1125 Outs, OutVals, Ins, DAG); 1126 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1127 // detected sibcalls. 1128 if (isTailCall) { 1129 ++NumTailCalls; 1130 IsSibCall = true; 1131 } 1132 } 1133 1134 // Analyze operands of the call, assigning locations to each operand. 1135 SmallVector<CCValAssign, 16> ArgLocs; 1136 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1137 *DAG.getContext()); 1138 CCInfo.AnalyzeCallOperands(Outs, 1139 CCAssignFnForNode(CallConv, /* Return*/ false, 1140 isVarArg)); 1141 1142 // Get a count of how many bytes are to be pushed on the stack. 1143 unsigned NumBytes = CCInfo.getNextStackOffset(); 1144 1145 // For tail calls, memory operands are available in our caller's stack. 1146 if (IsSibCall) 1147 NumBytes = 0; 1148 1149 // Adjust the stack pointer for the new arguments... 1150 // These operations are automatically eliminated by the prolog/epilog pass 1151 if (!IsSibCall) 1152 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1153 1154 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1155 1156 RegsToPassVector RegsToPass; 1157 SmallVector<SDValue, 8> MemOpChains; 1158 1159 // Walk the register/memloc assignments, inserting copies/loads. In the case 1160 // of tail call optimization, arguments are handled later. 1161 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1162 i != e; 1163 ++i, ++realArgIdx) { 1164 CCValAssign &VA = ArgLocs[i]; 1165 SDValue Arg = OutVals[realArgIdx]; 1166 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1167 1168 // Promote the value if needed. 1169 switch (VA.getLocInfo()) { 1170 default: llvm_unreachable("Unknown loc info!"); 1171 case CCValAssign::Full: break; 1172 case CCValAssign::SExt: 1173 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1174 break; 1175 case CCValAssign::ZExt: 1176 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1177 break; 1178 case CCValAssign::AExt: 1179 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1180 break; 1181 case CCValAssign::BCvt: 1182 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1183 break; 1184 } 1185 1186 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1187 if (VA.needsCustom()) { 1188 if (VA.getLocVT() == MVT::v2f64) { 1189 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1190 DAG.getConstant(0, MVT::i32)); 1191 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1192 DAG.getConstant(1, MVT::i32)); 1193 1194 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1195 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1196 1197 VA = ArgLocs[++i]; // skip ahead to next loc 1198 if (VA.isRegLoc()) { 1199 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1200 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1201 } else { 1202 assert(VA.isMemLoc()); 1203 1204 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1205 dl, DAG, VA, Flags)); 1206 } 1207 } else { 1208 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1209 StackPtr, MemOpChains, Flags); 1210 } 1211 } else if (VA.isRegLoc()) { 1212 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1213 } else if (!IsSibCall) { 1214 assert(VA.isMemLoc()); 1215 1216 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1217 dl, DAG, VA, Flags)); 1218 } 1219 } 1220 1221 if (!MemOpChains.empty()) 1222 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1223 &MemOpChains[0], MemOpChains.size()); 1224 1225 // Build a sequence of copy-to-reg nodes chained together with token chain 1226 // and flag operands which copy the outgoing args into the appropriate regs. 1227 SDValue InFlag; 1228 // Tail call byval lowering might overwrite argument registers so in case of 1229 // tail call optimization the copies to registers are lowered later. 1230 if (!isTailCall) 1231 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1232 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1233 RegsToPass[i].second, InFlag); 1234 InFlag = Chain.getValue(1); 1235 } 1236 1237 // For tail calls lower the arguments to the 'real' stack slot. 1238 if (isTailCall) { 1239 // Force all the incoming stack arguments to be loaded from the stack 1240 // before any new outgoing arguments are stored to the stack, because the 1241 // outgoing stack slots may alias the incoming argument stack slots, and 1242 // the alias isn't otherwise explicit. This is slightly more conservative 1243 // than necessary, because it means that each store effectively depends 1244 // on every argument instead of just those arguments it would clobber. 1245 1246 // Do not flag preceeding copytoreg stuff together with the following stuff. 1247 InFlag = SDValue(); 1248 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1249 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1250 RegsToPass[i].second, InFlag); 1251 InFlag = Chain.getValue(1); 1252 } 1253 InFlag =SDValue(); 1254 } 1255 1256 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1257 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1258 // node so that legalize doesn't hack it. 1259 bool isDirect = false; 1260 bool isARMFunc = false; 1261 bool isLocalARMFunc = false; 1262 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1263 1264 if (EnableARMLongCalls) { 1265 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1266 && "long-calls with non-static relocation model!"); 1267 // Handle a global address or an external symbol. If it's not one of 1268 // those, the target's already in a register, so we don't need to do 1269 // anything extra. 1270 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1271 const GlobalValue *GV = G->getGlobal(); 1272 // Create a constant pool entry for the callee address 1273 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1274 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1275 ARMPCLabelIndex, 1276 ARMCP::CPValue, 0); 1277 // Get the address of the callee into a register 1278 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1279 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1280 Callee = DAG.getLoad(getPointerTy(), dl, 1281 DAG.getEntryNode(), CPAddr, 1282 PseudoSourceValue::getConstantPool(), 0, 1283 false, false, 0); 1284 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1285 const char *Sym = S->getSymbol(); 1286 1287 // Create a constant pool entry for the callee address 1288 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1289 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1290 Sym, ARMPCLabelIndex, 0); 1291 // Get the address of the callee into a register 1292 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1293 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1294 Callee = DAG.getLoad(getPointerTy(), dl, 1295 DAG.getEntryNode(), CPAddr, 1296 PseudoSourceValue::getConstantPool(), 0, 1297 false, false, 0); 1298 } 1299 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1300 const GlobalValue *GV = G->getGlobal(); 1301 isDirect = true; 1302 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1303 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1304 getTargetMachine().getRelocationModel() != Reloc::Static; 1305 isARMFunc = !Subtarget->isThumb() || isStub; 1306 // ARM call to a local ARM function is predicable. 1307 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1308 // tBX takes a register source operand. 1309 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1310 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1311 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1312 ARMPCLabelIndex, 1313 ARMCP::CPValue, 4); 1314 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1315 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1316 Callee = DAG.getLoad(getPointerTy(), dl, 1317 DAG.getEntryNode(), CPAddr, 1318 PseudoSourceValue::getConstantPool(), 0, 1319 false, false, 0); 1320 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1321 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1322 getPointerTy(), Callee, PICLabel); 1323 } else 1324 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 1325 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1326 isDirect = true; 1327 bool isStub = Subtarget->isTargetDarwin() && 1328 getTargetMachine().getRelocationModel() != Reloc::Static; 1329 isARMFunc = !Subtarget->isThumb() || isStub; 1330 // tBX takes a register source operand. 1331 const char *Sym = S->getSymbol(); 1332 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1333 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1334 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1335 Sym, ARMPCLabelIndex, 4); 1336 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1337 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1338 Callee = DAG.getLoad(getPointerTy(), dl, 1339 DAG.getEntryNode(), CPAddr, 1340 PseudoSourceValue::getConstantPool(), 0, 1341 false, false, 0); 1342 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1343 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1344 getPointerTy(), Callee, PICLabel); 1345 } else 1346 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 1347 } 1348 1349 // FIXME: handle tail calls differently. 1350 unsigned CallOpc; 1351 if (Subtarget->isThumb()) { 1352 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1353 CallOpc = ARMISD::CALL_NOLINK; 1354 else 1355 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1356 } else { 1357 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1358 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1359 : ARMISD::CALL_NOLINK; 1360 } 1361 1362 std::vector<SDValue> Ops; 1363 Ops.push_back(Chain); 1364 Ops.push_back(Callee); 1365 1366 // Add argument registers to the end of the list so that they are known live 1367 // into the call. 1368 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1369 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1370 RegsToPass[i].second.getValueType())); 1371 1372 if (InFlag.getNode()) 1373 Ops.push_back(InFlag); 1374 1375 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1376 if (isTailCall) 1377 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1378 1379 // Returns a chain and a flag for retval copy to use. 1380 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1381 InFlag = Chain.getValue(1); 1382 1383 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1384 DAG.getIntPtrConstant(0, true), InFlag); 1385 if (!Ins.empty()) 1386 InFlag = Chain.getValue(1); 1387 1388 // Handle result values, copying them out of physregs into vregs that we 1389 // return. 1390 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1391 dl, DAG, InVals); 1392} 1393 1394/// MatchingStackOffset - Return true if the given stack call argument is 1395/// already available in the same position (relatively) of the caller's 1396/// incoming argument stack. 1397static 1398bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1399 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1400 const ARMInstrInfo *TII) { 1401 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1402 int FI = INT_MAX; 1403 if (Arg.getOpcode() == ISD::CopyFromReg) { 1404 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1405 if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) 1406 return false; 1407 MachineInstr *Def = MRI->getVRegDef(VR); 1408 if (!Def) 1409 return false; 1410 if (!Flags.isByVal()) { 1411 if (!TII->isLoadFromStackSlot(Def, FI)) 1412 return false; 1413 } else { 1414 return false; 1415 } 1416 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1417 if (Flags.isByVal()) 1418 // ByVal argument is passed in as a pointer but it's now being 1419 // dereferenced. e.g. 1420 // define @foo(%struct.X* %A) { 1421 // tail call @bar(%struct.X* byval %A) 1422 // } 1423 return false; 1424 SDValue Ptr = Ld->getBasePtr(); 1425 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1426 if (!FINode) 1427 return false; 1428 FI = FINode->getIndex(); 1429 } else 1430 return false; 1431 1432 assert(FI != INT_MAX); 1433 if (!MFI->isFixedObjectIndex(FI)) 1434 return false; 1435 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1436} 1437 1438/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1439/// for tail call optimization. Targets which want to do tail call 1440/// optimization should implement this function. 1441bool 1442ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1443 CallingConv::ID CalleeCC, 1444 bool isVarArg, 1445 bool isCalleeStructRet, 1446 bool isCallerStructRet, 1447 const SmallVectorImpl<ISD::OutputArg> &Outs, 1448 const SmallVectorImpl<SDValue> &OutVals, 1449 const SmallVectorImpl<ISD::InputArg> &Ins, 1450 SelectionDAG& DAG) const { 1451 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1452 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1453 bool CCMatch = CallerCC == CalleeCC; 1454 1455 // Look for obvious safe cases to perform tail call optimization that do not 1456 // require ABI changes. This is what gcc calls sibcall. 1457 1458 // Do not sibcall optimize vararg calls unless the call site is not passing 1459 // any arguments. 1460 if (isVarArg && !Outs.empty()) 1461 return false; 1462 1463 // Also avoid sibcall optimization if either caller or callee uses struct 1464 // return semantics. 1465 if (isCalleeStructRet || isCallerStructRet) 1466 return false; 1467 1468 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1469 // emitEpilogue is not ready for them. 1470 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1471 // LR. This means if we need to reload LR, it takes an extra instructions, 1472 // which outweighs the value of the tail call; but here we don't know yet 1473 // whether LR is going to be used. Probably the right approach is to 1474 // generate the tail call here and turn it back into CALL/RET in 1475 // emitEpilogue if LR is used. 1476 if (Subtarget->isThumb1Only()) 1477 return false; 1478 1479 // For the moment, we can only do this to functions defined in this 1480 // compilation, or to indirect calls. A Thumb B to an ARM function, 1481 // or vice versa, is not easily fixed up in the linker unlike BL. 1482 // (We could do this by loading the address of the callee into a register; 1483 // that is an extra instruction over the direct call and burns a register 1484 // as well, so is not likely to be a win.) 1485 1486 // It might be safe to remove this restriction on non-Darwin. 1487 1488 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1489 // but we need to make sure there are enough registers; the only valid 1490 // registers are the 4 used for parameters. We don't currently do this 1491 // case. 1492 if (isa<ExternalSymbolSDNode>(Callee)) 1493 return false; 1494 1495 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1496 const GlobalValue *GV = G->getGlobal(); 1497 if (GV->isDeclaration() || GV->isWeakForLinker()) 1498 return false; 1499 } 1500 1501 // If the calling conventions do not match, then we'd better make sure the 1502 // results are returned in the same way as what the caller expects. 1503 if (!CCMatch) { 1504 SmallVector<CCValAssign, 16> RVLocs1; 1505 CCState CCInfo1(CalleeCC, false, getTargetMachine(), 1506 RVLocs1, *DAG.getContext()); 1507 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1508 1509 SmallVector<CCValAssign, 16> RVLocs2; 1510 CCState CCInfo2(CallerCC, false, getTargetMachine(), 1511 RVLocs2, *DAG.getContext()); 1512 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1513 1514 if (RVLocs1.size() != RVLocs2.size()) 1515 return false; 1516 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1517 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1518 return false; 1519 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1520 return false; 1521 if (RVLocs1[i].isRegLoc()) { 1522 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1523 return false; 1524 } else { 1525 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1526 return false; 1527 } 1528 } 1529 } 1530 1531 // If the callee takes no arguments then go on to check the results of the 1532 // call. 1533 if (!Outs.empty()) { 1534 // Check if stack adjustment is needed. For now, do not do this if any 1535 // argument is passed on the stack. 1536 SmallVector<CCValAssign, 16> ArgLocs; 1537 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 1538 ArgLocs, *DAG.getContext()); 1539 CCInfo.AnalyzeCallOperands(Outs, 1540 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1541 if (CCInfo.getNextStackOffset()) { 1542 MachineFunction &MF = DAG.getMachineFunction(); 1543 1544 // Check if the arguments are already laid out in the right way as 1545 // the caller's fixed stack objects. 1546 MachineFrameInfo *MFI = MF.getFrameInfo(); 1547 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1548 const ARMInstrInfo *TII = 1549 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1550 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1551 i != e; 1552 ++i, ++realArgIdx) { 1553 CCValAssign &VA = ArgLocs[i]; 1554 EVT RegVT = VA.getLocVT(); 1555 SDValue Arg = OutVals[realArgIdx]; 1556 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1557 if (VA.getLocInfo() == CCValAssign::Indirect) 1558 return false; 1559 if (VA.needsCustom()) { 1560 // f64 and vector types are split into multiple registers or 1561 // register/stack-slot combinations. The types will not match 1562 // the registers; give up on memory f64 refs until we figure 1563 // out what to do about this. 1564 if (!VA.isRegLoc()) 1565 return false; 1566 if (!ArgLocs[++i].isRegLoc()) 1567 return false; 1568 if (RegVT == MVT::v2f64) { 1569 if (!ArgLocs[++i].isRegLoc()) 1570 return false; 1571 if (!ArgLocs[++i].isRegLoc()) 1572 return false; 1573 } 1574 } else if (!VA.isRegLoc()) { 1575 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1576 MFI, MRI, TII)) 1577 return false; 1578 } 1579 } 1580 } 1581 } 1582 1583 return true; 1584} 1585 1586SDValue 1587ARMTargetLowering::LowerReturn(SDValue Chain, 1588 CallingConv::ID CallConv, bool isVarArg, 1589 const SmallVectorImpl<ISD::OutputArg> &Outs, 1590 const SmallVectorImpl<SDValue> &OutVals, 1591 DebugLoc dl, SelectionDAG &DAG) const { 1592 1593 // CCValAssign - represent the assignment of the return value to a location. 1594 SmallVector<CCValAssign, 16> RVLocs; 1595 1596 // CCState - Info about the registers and stack slots. 1597 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1598 *DAG.getContext()); 1599 1600 // Analyze outgoing return values. 1601 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1602 isVarArg)); 1603 1604 // If this is the first return lowered for this function, add 1605 // the regs to the liveout set for the function. 1606 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1607 for (unsigned i = 0; i != RVLocs.size(); ++i) 1608 if (RVLocs[i].isRegLoc()) 1609 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1610 } 1611 1612 SDValue Flag; 1613 1614 // Copy the result values into the output registers. 1615 for (unsigned i = 0, realRVLocIdx = 0; 1616 i != RVLocs.size(); 1617 ++i, ++realRVLocIdx) { 1618 CCValAssign &VA = RVLocs[i]; 1619 assert(VA.isRegLoc() && "Can only return in registers!"); 1620 1621 SDValue Arg = OutVals[realRVLocIdx]; 1622 1623 switch (VA.getLocInfo()) { 1624 default: llvm_unreachable("Unknown loc info!"); 1625 case CCValAssign::Full: break; 1626 case CCValAssign::BCvt: 1627 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1628 break; 1629 } 1630 1631 if (VA.needsCustom()) { 1632 if (VA.getLocVT() == MVT::v2f64) { 1633 // Extract the first half and return it in two registers. 1634 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1635 DAG.getConstant(0, MVT::i32)); 1636 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1637 DAG.getVTList(MVT::i32, MVT::i32), Half); 1638 1639 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1640 Flag = Chain.getValue(1); 1641 VA = RVLocs[++i]; // skip ahead to next loc 1642 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1643 HalfGPRs.getValue(1), Flag); 1644 Flag = Chain.getValue(1); 1645 VA = RVLocs[++i]; // skip ahead to next loc 1646 1647 // Extract the 2nd half and fall through to handle it as an f64 value. 1648 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1649 DAG.getConstant(1, MVT::i32)); 1650 } 1651 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1652 // available. 1653 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1654 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1655 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1656 Flag = Chain.getValue(1); 1657 VA = RVLocs[++i]; // skip ahead to next loc 1658 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1659 Flag); 1660 } else 1661 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1662 1663 // Guarantee that all emitted copies are 1664 // stuck together, avoiding something bad. 1665 Flag = Chain.getValue(1); 1666 } 1667 1668 SDValue result; 1669 if (Flag.getNode()) 1670 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1671 else // Return Void 1672 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1673 1674 return result; 1675} 1676 1677// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1678// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1679// one of the above mentioned nodes. It has to be wrapped because otherwise 1680// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1681// be used to form addressing mode. These wrapped nodes will be selected 1682// into MOVi. 1683static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1684 EVT PtrVT = Op.getValueType(); 1685 // FIXME there is no actual debug info here 1686 DebugLoc dl = Op.getDebugLoc(); 1687 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1688 SDValue Res; 1689 if (CP->isMachineConstantPoolEntry()) 1690 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1691 CP->getAlignment()); 1692 else 1693 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1694 CP->getAlignment()); 1695 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1696} 1697 1698unsigned ARMTargetLowering::getJumpTableEncoding() const { 1699 return MachineJumpTableInfo::EK_Inline; 1700} 1701 1702SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1703 SelectionDAG &DAG) const { 1704 MachineFunction &MF = DAG.getMachineFunction(); 1705 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1706 unsigned ARMPCLabelIndex = 0; 1707 DebugLoc DL = Op.getDebugLoc(); 1708 EVT PtrVT = getPointerTy(); 1709 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1710 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1711 SDValue CPAddr; 1712 if (RelocM == Reloc::Static) { 1713 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1714 } else { 1715 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1716 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1717 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1718 ARMCP::CPBlockAddress, 1719 PCAdj); 1720 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1721 } 1722 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1723 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1724 PseudoSourceValue::getConstantPool(), 0, 1725 false, false, 0); 1726 if (RelocM == Reloc::Static) 1727 return Result; 1728 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1729 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1730} 1731 1732// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1733SDValue 1734ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1735 SelectionDAG &DAG) const { 1736 DebugLoc dl = GA->getDebugLoc(); 1737 EVT PtrVT = getPointerTy(); 1738 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1739 MachineFunction &MF = DAG.getMachineFunction(); 1740 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1741 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1742 ARMConstantPoolValue *CPV = 1743 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1744 ARMCP::CPValue, PCAdj, "tlsgd", true); 1745 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1746 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1747 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1748 PseudoSourceValue::getConstantPool(), 0, 1749 false, false, 0); 1750 SDValue Chain = Argument.getValue(1); 1751 1752 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1753 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1754 1755 // call __tls_get_addr. 1756 ArgListTy Args; 1757 ArgListEntry Entry; 1758 Entry.Node = Argument; 1759 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1760 Args.push_back(Entry); 1761 // FIXME: is there useful debug info available here? 1762 std::pair<SDValue, SDValue> CallResult = 1763 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1764 false, false, false, false, 1765 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1766 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1767 return CallResult.first; 1768} 1769 1770// Lower ISD::GlobalTLSAddress using the "initial exec" or 1771// "local exec" model. 1772SDValue 1773ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1774 SelectionDAG &DAG) const { 1775 const GlobalValue *GV = GA->getGlobal(); 1776 DebugLoc dl = GA->getDebugLoc(); 1777 SDValue Offset; 1778 SDValue Chain = DAG.getEntryNode(); 1779 EVT PtrVT = getPointerTy(); 1780 // Get the Thread Pointer 1781 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1782 1783 if (GV->isDeclaration()) { 1784 MachineFunction &MF = DAG.getMachineFunction(); 1785 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1786 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1787 // Initial exec model. 1788 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1789 ARMConstantPoolValue *CPV = 1790 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1791 ARMCP::CPValue, PCAdj, "gottpoff", true); 1792 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1793 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1794 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1795 PseudoSourceValue::getConstantPool(), 0, 1796 false, false, 0); 1797 Chain = Offset.getValue(1); 1798 1799 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1800 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1801 1802 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1803 PseudoSourceValue::getConstantPool(), 0, 1804 false, false, 0); 1805 } else { 1806 // local exec model 1807 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1808 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1809 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1810 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1811 PseudoSourceValue::getConstantPool(), 0, 1812 false, false, 0); 1813 } 1814 1815 // The address of the thread local variable is the add of the thread 1816 // pointer with the offset of the variable. 1817 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1818} 1819 1820SDValue 1821ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 1822 // TODO: implement the "local dynamic" model 1823 assert(Subtarget->isTargetELF() && 1824 "TLS not implemented for non-ELF targets"); 1825 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1826 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1827 // otherwise use the "Local Exec" TLS Model 1828 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1829 return LowerToTLSGeneralDynamicModel(GA, DAG); 1830 else 1831 return LowerToTLSExecModels(GA, DAG); 1832} 1833 1834SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1835 SelectionDAG &DAG) const { 1836 EVT PtrVT = getPointerTy(); 1837 DebugLoc dl = Op.getDebugLoc(); 1838 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1839 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1840 if (RelocM == Reloc::PIC_) { 1841 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1842 ARMConstantPoolValue *CPV = 1843 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1844 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1845 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1846 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1847 CPAddr, 1848 PseudoSourceValue::getConstantPool(), 0, 1849 false, false, 0); 1850 SDValue Chain = Result.getValue(1); 1851 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1852 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1853 if (!UseGOTOFF) 1854 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1855 PseudoSourceValue::getGOT(), 0, 1856 false, false, 0); 1857 return Result; 1858 } else { 1859 // If we have T2 ops, we can materialize the address directly via movt/movw 1860 // pair. This is always cheaper. 1861 if (Subtarget->useMovt()) { 1862 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1863 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 1864 } else { 1865 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1866 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1867 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1868 PseudoSourceValue::getConstantPool(), 0, 1869 false, false, 0); 1870 } 1871 } 1872} 1873 1874SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1875 SelectionDAG &DAG) const { 1876 MachineFunction &MF = DAG.getMachineFunction(); 1877 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1878 unsigned ARMPCLabelIndex = 0; 1879 EVT PtrVT = getPointerTy(); 1880 DebugLoc dl = Op.getDebugLoc(); 1881 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1882 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1883 SDValue CPAddr; 1884 if (RelocM == Reloc::Static) 1885 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1886 else { 1887 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1888 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1889 ARMConstantPoolValue *CPV = 1890 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1891 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1892 } 1893 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1894 1895 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1896 PseudoSourceValue::getConstantPool(), 0, 1897 false, false, 0); 1898 SDValue Chain = Result.getValue(1); 1899 1900 if (RelocM == Reloc::PIC_) { 1901 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1902 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1903 } 1904 1905 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1906 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1907 PseudoSourceValue::getGOT(), 0, 1908 false, false, 0); 1909 1910 return Result; 1911} 1912 1913SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1914 SelectionDAG &DAG) const { 1915 assert(Subtarget->isTargetELF() && 1916 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1917 MachineFunction &MF = DAG.getMachineFunction(); 1918 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1919 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1920 EVT PtrVT = getPointerTy(); 1921 DebugLoc dl = Op.getDebugLoc(); 1922 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1923 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1924 "_GLOBAL_OFFSET_TABLE_", 1925 ARMPCLabelIndex, PCAdj); 1926 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1927 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1928 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1929 PseudoSourceValue::getConstantPool(), 0, 1930 false, false, 0); 1931 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1932 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1933} 1934 1935SDValue 1936ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 1937 DebugLoc dl = Op.getDebugLoc(); 1938 SDValue Val = DAG.getConstant(0, MVT::i32); 1939 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 1940 Op.getOperand(1), Val); 1941} 1942 1943SDValue 1944ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 1945 DebugLoc dl = Op.getDebugLoc(); 1946 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 1947 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 1948} 1949 1950SDValue 1951ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 1952 const ARMSubtarget *Subtarget) const { 1953 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1954 DebugLoc dl = Op.getDebugLoc(); 1955 switch (IntNo) { 1956 default: return SDValue(); // Don't custom lower most intrinsics. 1957 case Intrinsic::arm_thread_pointer: { 1958 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1959 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1960 } 1961 case Intrinsic::eh_sjlj_lsda: { 1962 MachineFunction &MF = DAG.getMachineFunction(); 1963 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1964 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1965 EVT PtrVT = getPointerTy(); 1966 DebugLoc dl = Op.getDebugLoc(); 1967 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1968 SDValue CPAddr; 1969 unsigned PCAdj = (RelocM != Reloc::PIC_) 1970 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1971 ARMConstantPoolValue *CPV = 1972 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1973 ARMCP::CPLSDA, PCAdj); 1974 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1975 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1976 SDValue Result = 1977 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1978 PseudoSourceValue::getConstantPool(), 0, 1979 false, false, 0); 1980 1981 if (RelocM == Reloc::PIC_) { 1982 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1983 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1984 } 1985 return Result; 1986 } 1987 } 1988} 1989 1990static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 1991 const ARMSubtarget *Subtarget) { 1992 DebugLoc dl = Op.getDebugLoc(); 1993 SDValue Op5 = Op.getOperand(5); 1994 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); 1995 // v6 and v7 can both handle barriers directly, but need handled a bit 1996 // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should 1997 // never get here. 1998 unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; 1999 if (Subtarget->hasV7Ops()) 2000 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); 2001 else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) 2002 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), 2003 DAG.getConstant(0, MVT::i32)); 2004 assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2005 return SDValue(); 2006} 2007 2008static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2009 MachineFunction &MF = DAG.getMachineFunction(); 2010 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2011 2012 // vastart just stores the address of the VarArgsFrameIndex slot into the 2013 // memory location argument. 2014 DebugLoc dl = Op.getDebugLoc(); 2015 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2016 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2017 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2018 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, 2019 false, false, 0); 2020} 2021 2022SDValue 2023ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2024 SDValue &Root, SelectionDAG &DAG, 2025 DebugLoc dl) const { 2026 MachineFunction &MF = DAG.getMachineFunction(); 2027 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2028 2029 TargetRegisterClass *RC; 2030 if (AFI->isThumb1OnlyFunction()) 2031 RC = ARM::tGPRRegisterClass; 2032 else 2033 RC = ARM::GPRRegisterClass; 2034 2035 // Transform the arguments stored in physical registers into virtual ones. 2036 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2037 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2038 2039 SDValue ArgValue2; 2040 if (NextVA.isMemLoc()) { 2041 MachineFrameInfo *MFI = MF.getFrameInfo(); 2042 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2043 2044 // Create load node to retrieve arguments from the stack. 2045 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2046 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2047 PseudoSourceValue::getFixedStack(FI), 0, 2048 false, false, 0); 2049 } else { 2050 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2051 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2052 } 2053 2054 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2055} 2056 2057SDValue 2058ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2059 CallingConv::ID CallConv, bool isVarArg, 2060 const SmallVectorImpl<ISD::InputArg> 2061 &Ins, 2062 DebugLoc dl, SelectionDAG &DAG, 2063 SmallVectorImpl<SDValue> &InVals) 2064 const { 2065 2066 MachineFunction &MF = DAG.getMachineFunction(); 2067 MachineFrameInfo *MFI = MF.getFrameInfo(); 2068 2069 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2070 2071 // Assign locations to all of the incoming arguments. 2072 SmallVector<CCValAssign, 16> ArgLocs; 2073 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 2074 *DAG.getContext()); 2075 CCInfo.AnalyzeFormalArguments(Ins, 2076 CCAssignFnForNode(CallConv, /* Return*/ false, 2077 isVarArg)); 2078 2079 SmallVector<SDValue, 16> ArgValues; 2080 2081 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2082 CCValAssign &VA = ArgLocs[i]; 2083 2084 // Arguments stored in registers. 2085 if (VA.isRegLoc()) { 2086 EVT RegVT = VA.getLocVT(); 2087 2088 SDValue ArgValue; 2089 if (VA.needsCustom()) { 2090 // f64 and vector types are split up into multiple registers or 2091 // combinations of registers and stack slots. 2092 if (VA.getLocVT() == MVT::v2f64) { 2093 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2094 Chain, DAG, dl); 2095 VA = ArgLocs[++i]; // skip ahead to next loc 2096 SDValue ArgValue2; 2097 if (VA.isMemLoc()) { 2098 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2099 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2100 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2101 PseudoSourceValue::getFixedStack(FI), 0, 2102 false, false, 0); 2103 } else { 2104 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2105 Chain, DAG, dl); 2106 } 2107 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2108 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2109 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2110 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2111 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2112 } else 2113 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2114 2115 } else { 2116 TargetRegisterClass *RC; 2117 2118 if (RegVT == MVT::f32) 2119 RC = ARM::SPRRegisterClass; 2120 else if (RegVT == MVT::f64) 2121 RC = ARM::DPRRegisterClass; 2122 else if (RegVT == MVT::v2f64) 2123 RC = ARM::QPRRegisterClass; 2124 else if (RegVT == MVT::i32) 2125 RC = (AFI->isThumb1OnlyFunction() ? 2126 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2127 else 2128 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2129 2130 // Transform the arguments in physical registers into virtual ones. 2131 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2132 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2133 } 2134 2135 // If this is an 8 or 16-bit value, it is really passed promoted 2136 // to 32 bits. Insert an assert[sz]ext to capture this, then 2137 // truncate to the right size. 2138 switch (VA.getLocInfo()) { 2139 default: llvm_unreachable("Unknown loc info!"); 2140 case CCValAssign::Full: break; 2141 case CCValAssign::BCvt: 2142 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 2143 break; 2144 case CCValAssign::SExt: 2145 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2146 DAG.getValueType(VA.getValVT())); 2147 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2148 break; 2149 case CCValAssign::ZExt: 2150 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2151 DAG.getValueType(VA.getValVT())); 2152 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2153 break; 2154 } 2155 2156 InVals.push_back(ArgValue); 2157 2158 } else { // VA.isRegLoc() 2159 2160 // sanity check 2161 assert(VA.isMemLoc()); 2162 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2163 2164 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 2165 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); 2166 2167 // Create load nodes to retrieve arguments from the stack. 2168 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2169 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2170 PseudoSourceValue::getFixedStack(FI), 0, 2171 false, false, 0)); 2172 } 2173 } 2174 2175 // varargs 2176 if (isVarArg) { 2177 static const unsigned GPRArgRegs[] = { 2178 ARM::R0, ARM::R1, ARM::R2, ARM::R3 2179 }; 2180 2181 unsigned NumGPRs = CCInfo.getFirstUnallocated 2182 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2183 2184 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 2185 unsigned VARegSize = (4 - NumGPRs) * 4; 2186 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2187 unsigned ArgOffset = CCInfo.getNextStackOffset(); 2188 if (VARegSaveSize) { 2189 // If this function is vararg, store any remaining integer argument regs 2190 // to their spots on the stack so that they may be loaded by deferencing 2191 // the result of va_next. 2192 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2193 AFI->setVarArgsFrameIndex( 2194 MFI->CreateFixedObject(VARegSaveSize, 2195 ArgOffset + VARegSaveSize - VARegSize, 2196 true)); 2197 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2198 getPointerTy()); 2199 2200 SmallVector<SDValue, 4> MemOps; 2201 for (; NumGPRs < 4; ++NumGPRs) { 2202 TargetRegisterClass *RC; 2203 if (AFI->isThumb1OnlyFunction()) 2204 RC = ARM::tGPRRegisterClass; 2205 else 2206 RC = ARM::GPRRegisterClass; 2207 2208 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 2209 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2210 SDValue Store = 2211 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2212 PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 2213 0, false, false, 0); 2214 MemOps.push_back(Store); 2215 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2216 DAG.getConstant(4, getPointerTy())); 2217 } 2218 if (!MemOps.empty()) 2219 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2220 &MemOps[0], MemOps.size()); 2221 } else 2222 // This will point to the next argument passed via stack. 2223 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2224 } 2225 2226 return Chain; 2227} 2228 2229/// isFloatingPointZero - Return true if this is +0.0. 2230static bool isFloatingPointZero(SDValue Op) { 2231 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2232 return CFP->getValueAPF().isPosZero(); 2233 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2234 // Maybe this has already been legalized into the constant pool? 2235 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2236 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2237 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2238 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2239 return CFP->getValueAPF().isPosZero(); 2240 } 2241 } 2242 return false; 2243} 2244 2245/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2246/// the given operands. 2247SDValue 2248ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2249 SDValue &ARMcc, SelectionDAG &DAG, 2250 DebugLoc dl) const { 2251 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2252 unsigned C = RHSC->getZExtValue(); 2253 if (!isLegalICmpImmediate(C)) { 2254 // Constant does not fit, try adjusting it by one? 2255 switch (CC) { 2256 default: break; 2257 case ISD::SETLT: 2258 case ISD::SETGE: 2259 if (isLegalICmpImmediate(C-1)) { 2260 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2261 RHS = DAG.getConstant(C-1, MVT::i32); 2262 } 2263 break; 2264 case ISD::SETULT: 2265 case ISD::SETUGE: 2266 if (C > 0 && isLegalICmpImmediate(C-1)) { 2267 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2268 RHS = DAG.getConstant(C-1, MVT::i32); 2269 } 2270 break; 2271 case ISD::SETLE: 2272 case ISD::SETGT: 2273 if (isLegalICmpImmediate(C+1)) { 2274 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2275 RHS = DAG.getConstant(C+1, MVT::i32); 2276 } 2277 break; 2278 case ISD::SETULE: 2279 case ISD::SETUGT: 2280 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { 2281 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2282 RHS = DAG.getConstant(C+1, MVT::i32); 2283 } 2284 break; 2285 } 2286 } 2287 } 2288 2289 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2290 ARMISD::NodeType CompareType; 2291 switch (CondCode) { 2292 default: 2293 CompareType = ARMISD::CMP; 2294 break; 2295 case ARMCC::EQ: 2296 case ARMCC::NE: 2297 // Uses only Z Flag 2298 CompareType = ARMISD::CMPZ; 2299 break; 2300 } 2301 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2302 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 2303} 2304 2305/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2306SDValue 2307ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2308 DebugLoc dl) const { 2309 SDValue Cmp; 2310 if (!isFloatingPointZero(RHS)) 2311 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 2312 else 2313 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 2314 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 2315} 2316 2317SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2318 EVT VT = Op.getValueType(); 2319 SDValue LHS = Op.getOperand(0); 2320 SDValue RHS = Op.getOperand(1); 2321 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2322 SDValue TrueVal = Op.getOperand(2); 2323 SDValue FalseVal = Op.getOperand(3); 2324 DebugLoc dl = Op.getDebugLoc(); 2325 2326 if (LHS.getValueType() == MVT::i32) { 2327 SDValue ARMcc; 2328 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2329 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2330 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2331 } 2332 2333 ARMCC::CondCodes CondCode, CondCode2; 2334 FPCCToARMCC(CC, CondCode, CondCode2); 2335 2336 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2337 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2338 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2339 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2340 ARMcc, CCR, Cmp); 2341 if (CondCode2 != ARMCC::AL) { 2342 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2343 // FIXME: Needs another CMP because flag can have but one use. 2344 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2345 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2346 Result, TrueVal, ARMcc2, CCR, Cmp2); 2347 } 2348 return Result; 2349} 2350 2351/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2352/// to morph to an integer compare sequence. 2353static bool canChangeToInt(SDValue Op, bool &SeenZero, 2354 const ARMSubtarget *Subtarget) { 2355 SDNode *N = Op.getNode(); 2356 if (!N->hasOneUse()) 2357 // Otherwise it requires moving the value from fp to integer registers. 2358 return false; 2359 if (!N->getNumValues()) 2360 return false; 2361 EVT VT = Op.getValueType(); 2362 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2363 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2364 // vmrs are very slow, e.g. cortex-a8. 2365 return false; 2366 2367 if (isFloatingPointZero(Op)) { 2368 SeenZero = true; 2369 return true; 2370 } 2371 return ISD::isNormalLoad(N); 2372} 2373 2374static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2375 if (isFloatingPointZero(Op)) 2376 return DAG.getConstant(0, MVT::i32); 2377 2378 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2379 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2380 Ld->getChain(), Ld->getBasePtr(), 2381 Ld->getSrcValue(), Ld->getSrcValueOffset(), 2382 Ld->isVolatile(), Ld->isNonTemporal(), 2383 Ld->getAlignment()); 2384 2385 llvm_unreachable("Unknown VFP cmp argument!"); 2386} 2387 2388static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2389 SDValue &RetVal1, SDValue &RetVal2) { 2390 if (isFloatingPointZero(Op)) { 2391 RetVal1 = DAG.getConstant(0, MVT::i32); 2392 RetVal2 = DAG.getConstant(0, MVT::i32); 2393 return; 2394 } 2395 2396 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2397 SDValue Ptr = Ld->getBasePtr(); 2398 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2399 Ld->getChain(), Ptr, 2400 Ld->getSrcValue(), Ld->getSrcValueOffset(), 2401 Ld->isVolatile(), Ld->isNonTemporal(), 2402 Ld->getAlignment()); 2403 2404 EVT PtrType = Ptr.getValueType(); 2405 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2406 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2407 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2408 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2409 Ld->getChain(), NewPtr, 2410 Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, 2411 Ld->isVolatile(), Ld->isNonTemporal(), 2412 NewAlign); 2413 return; 2414 } 2415 2416 llvm_unreachable("Unknown VFP cmp argument!"); 2417} 2418 2419/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2420/// f32 and even f64 comparisons to integer ones. 2421SDValue 2422ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2423 SDValue Chain = Op.getOperand(0); 2424 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2425 SDValue LHS = Op.getOperand(2); 2426 SDValue RHS = Op.getOperand(3); 2427 SDValue Dest = Op.getOperand(4); 2428 DebugLoc dl = Op.getDebugLoc(); 2429 2430 bool SeenZero = false; 2431 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2432 canChangeToInt(RHS, SeenZero, Subtarget) && 2433 // If one of the operand is zero, it's safe to ignore the NaN case since 2434 // we only care about equality comparisons. 2435 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2436 // If unsafe fp math optimization is enabled and there are no othter uses of 2437 // the CMP operands, and the condition code is EQ oe NE, we can optimize it 2438 // to an integer comparison. 2439 if (CC == ISD::SETOEQ) 2440 CC = ISD::SETEQ; 2441 else if (CC == ISD::SETUNE) 2442 CC = ISD::SETNE; 2443 2444 SDValue ARMcc; 2445 if (LHS.getValueType() == MVT::f32) { 2446 LHS = bitcastf32Toi32(LHS, DAG); 2447 RHS = bitcastf32Toi32(RHS, DAG); 2448 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2449 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2450 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2451 Chain, Dest, ARMcc, CCR, Cmp); 2452 } 2453 2454 SDValue LHS1, LHS2; 2455 SDValue RHS1, RHS2; 2456 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2457 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2458 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2459 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2460 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2461 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2462 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2463 } 2464 2465 return SDValue(); 2466} 2467 2468SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2469 SDValue Chain = Op.getOperand(0); 2470 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2471 SDValue LHS = Op.getOperand(2); 2472 SDValue RHS = Op.getOperand(3); 2473 SDValue Dest = Op.getOperand(4); 2474 DebugLoc dl = Op.getDebugLoc(); 2475 2476 if (LHS.getValueType() == MVT::i32) { 2477 SDValue ARMcc; 2478 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2479 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2480 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2481 Chain, Dest, ARMcc, CCR, Cmp); 2482 } 2483 2484 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2485 2486 if (UnsafeFPMath && 2487 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2488 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2489 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2490 if (Result.getNode()) 2491 return Result; 2492 } 2493 2494 ARMCC::CondCodes CondCode, CondCode2; 2495 FPCCToARMCC(CC, CondCode, CondCode2); 2496 2497 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2498 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2499 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2500 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2501 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2502 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2503 if (CondCode2 != ARMCC::AL) { 2504 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2505 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2506 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2507 } 2508 return Res; 2509} 2510 2511SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2512 SDValue Chain = Op.getOperand(0); 2513 SDValue Table = Op.getOperand(1); 2514 SDValue Index = Op.getOperand(2); 2515 DebugLoc dl = Op.getDebugLoc(); 2516 2517 EVT PTy = getPointerTy(); 2518 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2519 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2520 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2521 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2522 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2523 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2524 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2525 if (Subtarget->isThumb2()) { 2526 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2527 // which does another jump to the destination. This also makes it easier 2528 // to translate it to TBB / TBH later. 2529 // FIXME: This might not work if the function is extremely large. 2530 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2531 Addr, Op.getOperand(2), JTI, UId); 2532 } 2533 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2534 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2535 PseudoSourceValue::getJumpTable(), 0, 2536 false, false, 0); 2537 Chain = Addr.getValue(1); 2538 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2539 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2540 } else { 2541 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2542 PseudoSourceValue::getJumpTable(), 0, false, false, 0); 2543 Chain = Addr.getValue(1); 2544 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2545 } 2546} 2547 2548static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2549 DebugLoc dl = Op.getDebugLoc(); 2550 unsigned Opc; 2551 2552 switch (Op.getOpcode()) { 2553 default: 2554 assert(0 && "Invalid opcode!"); 2555 case ISD::FP_TO_SINT: 2556 Opc = ARMISD::FTOSI; 2557 break; 2558 case ISD::FP_TO_UINT: 2559 Opc = ARMISD::FTOUI; 2560 break; 2561 } 2562 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2563 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 2564} 2565 2566static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2567 EVT VT = Op.getValueType(); 2568 DebugLoc dl = Op.getDebugLoc(); 2569 unsigned Opc; 2570 2571 switch (Op.getOpcode()) { 2572 default: 2573 assert(0 && "Invalid opcode!"); 2574 case ISD::SINT_TO_FP: 2575 Opc = ARMISD::SITOF; 2576 break; 2577 case ISD::UINT_TO_FP: 2578 Opc = ARMISD::UITOF; 2579 break; 2580 } 2581 2582 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 2583 return DAG.getNode(Opc, dl, VT, Op); 2584} 2585 2586SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 2587 // Implement fcopysign with a fabs and a conditional fneg. 2588 SDValue Tmp0 = Op.getOperand(0); 2589 SDValue Tmp1 = Op.getOperand(1); 2590 DebugLoc dl = Op.getDebugLoc(); 2591 EVT VT = Op.getValueType(); 2592 EVT SrcVT = Tmp1.getValueType(); 2593 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 2594 SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); 2595 SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); 2596 SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); 2597 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2598 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); 2599} 2600 2601SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 2602 MachineFunction &MF = DAG.getMachineFunction(); 2603 MachineFrameInfo *MFI = MF.getFrameInfo(); 2604 MFI->setReturnAddressIsTaken(true); 2605 2606 EVT VT = Op.getValueType(); 2607 DebugLoc dl = Op.getDebugLoc(); 2608 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2609 if (Depth) { 2610 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 2611 SDValue Offset = DAG.getConstant(4, MVT::i32); 2612 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 2613 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 2614 NULL, 0, false, false, 0); 2615 } 2616 2617 // Return LR, which contains the return address. Mark it an implicit live-in. 2618 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 2619 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 2620} 2621 2622SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 2623 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 2624 MFI->setFrameAddressIsTaken(true); 2625 2626 EVT VT = Op.getValueType(); 2627 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 2628 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2629 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 2630 ? ARM::R7 : ARM::R11; 2631 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 2632 while (Depth--) 2633 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, 2634 false, false, 0); 2635 return FrameAddr; 2636} 2637 2638/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to 2639/// expand a bit convert where either the source or destination type is i64 to 2640/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 2641/// operand type is illegal (e.g., v2f32 for a target that doesn't support 2642/// vectors), since the legalizer won't know what to do with that. 2643static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2644 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2645 DebugLoc dl = N->getDebugLoc(); 2646 SDValue Op = N->getOperand(0); 2647 2648 // This function is only supposed to be called for i64 types, either as the 2649 // source or destination of the bit convert. 2650 EVT SrcVT = Op.getValueType(); 2651 EVT DstVT = N->getValueType(0); 2652 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 2653 "ExpandBIT_CONVERT called for non-i64 type"); 2654 2655 // Turn i64->f64 into VMOVDRR. 2656 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 2657 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2658 DAG.getConstant(0, MVT::i32)); 2659 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2660 DAG.getConstant(1, MVT::i32)); 2661 return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, 2662 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 2663 } 2664 2665 // Turn f64->i64 into VMOVRRD. 2666 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 2667 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2668 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2669 // Merge the pieces into a single i64 value. 2670 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2671 } 2672 2673 return SDValue(); 2674} 2675 2676/// getZeroVector - Returns a vector of specified type with all zero elements. 2677/// Zero vectors are used to represent vector negation and in those cases 2678/// will be implemented with the NEON VNEG instruction. However, VNEG does 2679/// not support i64 elements, so sometimes the zero vectors will need to be 2680/// explicitly constructed. Regardless, use a canonical VMOV to create the 2681/// zero vector. 2682static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2683 assert(VT.isVector() && "Expected a vector type"); 2684 // The canonical modified immediate encoding of a zero vector is....0! 2685 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 2686 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 2687 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 2688 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 2689} 2690 2691/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2692/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2693SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 2694 SelectionDAG &DAG) const { 2695 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2696 EVT VT = Op.getValueType(); 2697 unsigned VTBits = VT.getSizeInBits(); 2698 DebugLoc dl = Op.getDebugLoc(); 2699 SDValue ShOpLo = Op.getOperand(0); 2700 SDValue ShOpHi = Op.getOperand(1); 2701 SDValue ShAmt = Op.getOperand(2); 2702 SDValue ARMcc; 2703 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2704 2705 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2706 2707 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2708 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2709 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2710 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2711 DAG.getConstant(VTBits, MVT::i32)); 2712 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2713 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2714 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2715 2716 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2717 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2718 ARMcc, DAG, dl); 2719 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2720 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 2721 CCR, Cmp); 2722 2723 SDValue Ops[2] = { Lo, Hi }; 2724 return DAG.getMergeValues(Ops, 2, dl); 2725} 2726 2727/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2728/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2729SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 2730 SelectionDAG &DAG) const { 2731 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2732 EVT VT = Op.getValueType(); 2733 unsigned VTBits = VT.getSizeInBits(); 2734 DebugLoc dl = Op.getDebugLoc(); 2735 SDValue ShOpLo = Op.getOperand(0); 2736 SDValue ShOpHi = Op.getOperand(1); 2737 SDValue ShAmt = Op.getOperand(2); 2738 SDValue ARMcc; 2739 2740 assert(Op.getOpcode() == ISD::SHL_PARTS); 2741 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2742 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2743 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2744 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2745 DAG.getConstant(VTBits, MVT::i32)); 2746 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2747 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2748 2749 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2750 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2751 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2752 ARMcc, DAG, dl); 2753 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2754 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 2755 CCR, Cmp); 2756 2757 SDValue Ops[2] = { Lo, Hi }; 2758 return DAG.getMergeValues(Ops, 2, dl); 2759} 2760 2761SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 2762 SelectionDAG &DAG) const { 2763 // The rounding mode is in bits 23:22 of the FPSCR. 2764 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 2765 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 2766 // so that the shift + and get folded into a bitfield extract. 2767 DebugLoc dl = Op.getDebugLoc(); 2768 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 2769 DAG.getConstant(Intrinsic::arm_get_fpscr, 2770 MVT::i32)); 2771 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 2772 DAG.getConstant(1U << 22, MVT::i32)); 2773 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 2774 DAG.getConstant(22, MVT::i32)); 2775 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 2776 DAG.getConstant(3, MVT::i32)); 2777} 2778 2779static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 2780 const ARMSubtarget *ST) { 2781 EVT VT = N->getValueType(0); 2782 DebugLoc dl = N->getDebugLoc(); 2783 2784 if (!ST->hasV6T2Ops()) 2785 return SDValue(); 2786 2787 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 2788 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 2789} 2790 2791static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2792 const ARMSubtarget *ST) { 2793 EVT VT = N->getValueType(0); 2794 DebugLoc dl = N->getDebugLoc(); 2795 2796 // Lower vector shifts on NEON to use VSHL. 2797 if (VT.isVector()) { 2798 assert(ST->hasNEON() && "unexpected vector shift"); 2799 2800 // Left shifts translate directly to the vshiftu intrinsic. 2801 if (N->getOpcode() == ISD::SHL) 2802 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2803 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2804 N->getOperand(0), N->getOperand(1)); 2805 2806 assert((N->getOpcode() == ISD::SRA || 2807 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2808 2809 // NEON uses the same intrinsics for both left and right shifts. For 2810 // right shifts, the shift amounts are negative, so negate the vector of 2811 // shift amounts. 2812 EVT ShiftVT = N->getOperand(1).getValueType(); 2813 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2814 getZeroVector(ShiftVT, DAG, dl), 2815 N->getOperand(1)); 2816 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2817 Intrinsic::arm_neon_vshifts : 2818 Intrinsic::arm_neon_vshiftu); 2819 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2820 DAG.getConstant(vshiftInt, MVT::i32), 2821 N->getOperand(0), NegatedCount); 2822 } 2823 2824 // We can get here for a node like i32 = ISD::SHL i32, i64 2825 if (VT != MVT::i64) 2826 return SDValue(); 2827 2828 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2829 "Unknown shift to lower!"); 2830 2831 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2832 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2833 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2834 return SDValue(); 2835 2836 // If we are in thumb mode, we don't have RRX. 2837 if (ST->isThumb1Only()) return SDValue(); 2838 2839 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2840 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2841 DAG.getConstant(0, MVT::i32)); 2842 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2843 DAG.getConstant(1, MVT::i32)); 2844 2845 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2846 // captures the result into a carry flag. 2847 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2848 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2849 2850 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2851 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2852 2853 // Merge the pieces into a single i64 value. 2854 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2855} 2856 2857static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2858 SDValue TmpOp0, TmpOp1; 2859 bool Invert = false; 2860 bool Swap = false; 2861 unsigned Opc = 0; 2862 2863 SDValue Op0 = Op.getOperand(0); 2864 SDValue Op1 = Op.getOperand(1); 2865 SDValue CC = Op.getOperand(2); 2866 EVT VT = Op.getValueType(); 2867 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2868 DebugLoc dl = Op.getDebugLoc(); 2869 2870 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2871 switch (SetCCOpcode) { 2872 default: llvm_unreachable("Illegal FP comparison"); break; 2873 case ISD::SETUNE: 2874 case ISD::SETNE: Invert = true; // Fallthrough 2875 case ISD::SETOEQ: 2876 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2877 case ISD::SETOLT: 2878 case ISD::SETLT: Swap = true; // Fallthrough 2879 case ISD::SETOGT: 2880 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2881 case ISD::SETOLE: 2882 case ISD::SETLE: Swap = true; // Fallthrough 2883 case ISD::SETOGE: 2884 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2885 case ISD::SETUGE: Swap = true; // Fallthrough 2886 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2887 case ISD::SETUGT: Swap = true; // Fallthrough 2888 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2889 case ISD::SETUEQ: Invert = true; // Fallthrough 2890 case ISD::SETONE: 2891 // Expand this to (OLT | OGT). 2892 TmpOp0 = Op0; 2893 TmpOp1 = Op1; 2894 Opc = ISD::OR; 2895 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2896 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2897 break; 2898 case ISD::SETUO: Invert = true; // Fallthrough 2899 case ISD::SETO: 2900 // Expand this to (OLT | OGE). 2901 TmpOp0 = Op0; 2902 TmpOp1 = Op1; 2903 Opc = ISD::OR; 2904 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2905 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2906 break; 2907 } 2908 } else { 2909 // Integer comparisons. 2910 switch (SetCCOpcode) { 2911 default: llvm_unreachable("Illegal integer comparison"); break; 2912 case ISD::SETNE: Invert = true; 2913 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2914 case ISD::SETLT: Swap = true; 2915 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2916 case ISD::SETLE: Swap = true; 2917 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2918 case ISD::SETULT: Swap = true; 2919 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2920 case ISD::SETULE: Swap = true; 2921 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2922 } 2923 2924 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2925 if (Opc == ARMISD::VCEQ) { 2926 2927 SDValue AndOp; 2928 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2929 AndOp = Op0; 2930 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 2931 AndOp = Op1; 2932 2933 // Ignore bitconvert. 2934 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 2935 AndOp = AndOp.getOperand(0); 2936 2937 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 2938 Opc = ARMISD::VTST; 2939 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 2940 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 2941 Invert = !Invert; 2942 } 2943 } 2944 } 2945 2946 if (Swap) 2947 std::swap(Op0, Op1); 2948 2949 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 2950 2951 if (Invert) 2952 Result = DAG.getNOT(dl, Result, VT); 2953 2954 return Result; 2955} 2956 2957/// isNEONModifiedImm - Check if the specified splat value corresponds to a 2958/// valid vector constant for a NEON instruction with a "modified immediate" 2959/// operand (e.g., VMOV). If so, return the encoded value. 2960static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 2961 unsigned SplatBitSize, SelectionDAG &DAG, 2962 EVT &VT, bool is128Bits, bool isVMOV) { 2963 unsigned OpCmode, Imm; 2964 2965 // SplatBitSize is set to the smallest size that splats the vector, so a 2966 // zero vector will always have SplatBitSize == 8. However, NEON modified 2967 // immediate instructions others than VMOV do not support the 8-bit encoding 2968 // of a zero vector, and the default encoding of zero is supposed to be the 2969 // 32-bit version. 2970 if (SplatBits == 0) 2971 SplatBitSize = 32; 2972 2973 switch (SplatBitSize) { 2974 case 8: 2975 if (!isVMOV) 2976 return SDValue(); 2977 // Any 1-byte value is OK. Op=0, Cmode=1110. 2978 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 2979 OpCmode = 0xe; 2980 Imm = SplatBits; 2981 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 2982 break; 2983 2984 case 16: 2985 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 2986 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 2987 if ((SplatBits & ~0xff) == 0) { 2988 // Value = 0x00nn: Op=x, Cmode=100x. 2989 OpCmode = 0x8; 2990 Imm = SplatBits; 2991 break; 2992 } 2993 if ((SplatBits & ~0xff00) == 0) { 2994 // Value = 0xnn00: Op=x, Cmode=101x. 2995 OpCmode = 0xa; 2996 Imm = SplatBits >> 8; 2997 break; 2998 } 2999 return SDValue(); 3000 3001 case 32: 3002 // NEON's 32-bit VMOV supports splat values where: 3003 // * only one byte is nonzero, or 3004 // * the least significant byte is 0xff and the second byte is nonzero, or 3005 // * the least significant 2 bytes are 0xff and the third is nonzero. 3006 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3007 if ((SplatBits & ~0xff) == 0) { 3008 // Value = 0x000000nn: Op=x, Cmode=000x. 3009 OpCmode = 0; 3010 Imm = SplatBits; 3011 break; 3012 } 3013 if ((SplatBits & ~0xff00) == 0) { 3014 // Value = 0x0000nn00: Op=x, Cmode=001x. 3015 OpCmode = 0x2; 3016 Imm = SplatBits >> 8; 3017 break; 3018 } 3019 if ((SplatBits & ~0xff0000) == 0) { 3020 // Value = 0x00nn0000: Op=x, Cmode=010x. 3021 OpCmode = 0x4; 3022 Imm = SplatBits >> 16; 3023 break; 3024 } 3025 if ((SplatBits & ~0xff000000) == 0) { 3026 // Value = 0xnn000000: Op=x, Cmode=011x. 3027 OpCmode = 0x6; 3028 Imm = SplatBits >> 24; 3029 break; 3030 } 3031 3032 if ((SplatBits & ~0xffff) == 0 && 3033 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3034 // Value = 0x0000nnff: Op=x, Cmode=1100. 3035 OpCmode = 0xc; 3036 Imm = SplatBits >> 8; 3037 SplatBits |= 0xff; 3038 break; 3039 } 3040 3041 if ((SplatBits & ~0xffffff) == 0 && 3042 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3043 // Value = 0x00nnffff: Op=x, Cmode=1101. 3044 OpCmode = 0xd; 3045 Imm = SplatBits >> 16; 3046 SplatBits |= 0xffff; 3047 break; 3048 } 3049 3050 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3051 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3052 // VMOV.I32. A (very) minor optimization would be to replicate the value 3053 // and fall through here to test for a valid 64-bit splat. But, then the 3054 // caller would also need to check and handle the change in size. 3055 return SDValue(); 3056 3057 case 64: { 3058 if (!isVMOV) 3059 return SDValue(); 3060 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3061 uint64_t BitMask = 0xff; 3062 uint64_t Val = 0; 3063 unsigned ImmMask = 1; 3064 Imm = 0; 3065 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3066 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3067 Val |= BitMask; 3068 Imm |= ImmMask; 3069 } else if ((SplatBits & BitMask) != 0) { 3070 return SDValue(); 3071 } 3072 BitMask <<= 8; 3073 ImmMask <<= 1; 3074 } 3075 // Op=1, Cmode=1110. 3076 OpCmode = 0x1e; 3077 SplatBits = Val; 3078 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3079 break; 3080 } 3081 3082 default: 3083 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3084 return SDValue(); 3085 } 3086 3087 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3088 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3089} 3090 3091static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3092 bool &ReverseVEXT, unsigned &Imm) { 3093 unsigned NumElts = VT.getVectorNumElements(); 3094 ReverseVEXT = false; 3095 Imm = M[0]; 3096 3097 // If this is a VEXT shuffle, the immediate value is the index of the first 3098 // element. The other shuffle indices must be the successive elements after 3099 // the first one. 3100 unsigned ExpectedElt = Imm; 3101 for (unsigned i = 1; i < NumElts; ++i) { 3102 // Increment the expected index. If it wraps around, it may still be 3103 // a VEXT but the source vectors must be swapped. 3104 ExpectedElt += 1; 3105 if (ExpectedElt == NumElts * 2) { 3106 ExpectedElt = 0; 3107 ReverseVEXT = true; 3108 } 3109 3110 if (ExpectedElt != static_cast<unsigned>(M[i])) 3111 return false; 3112 } 3113 3114 // Adjust the index value if the source operands will be swapped. 3115 if (ReverseVEXT) 3116 Imm -= NumElts; 3117 3118 return true; 3119} 3120 3121/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3122/// instruction with the specified blocksize. (The order of the elements 3123/// within each block of the vector is reversed.) 3124static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3125 unsigned BlockSize) { 3126 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3127 "Only possible block sizes for VREV are: 16, 32, 64"); 3128 3129 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3130 if (EltSz == 64) 3131 return false; 3132 3133 unsigned NumElts = VT.getVectorNumElements(); 3134 unsigned BlockElts = M[0] + 1; 3135 3136 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3137 return false; 3138 3139 for (unsigned i = 0; i < NumElts; ++i) { 3140 if ((unsigned) M[i] != 3141 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3142 return false; 3143 } 3144 3145 return true; 3146} 3147 3148static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3149 unsigned &WhichResult) { 3150 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3151 if (EltSz == 64) 3152 return false; 3153 3154 unsigned NumElts = VT.getVectorNumElements(); 3155 WhichResult = (M[0] == 0 ? 0 : 1); 3156 for (unsigned i = 0; i < NumElts; i += 2) { 3157 if ((unsigned) M[i] != i + WhichResult || 3158 (unsigned) M[i+1] != i + NumElts + WhichResult) 3159 return false; 3160 } 3161 return true; 3162} 3163 3164/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3165/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3166/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3167static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3168 unsigned &WhichResult) { 3169 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3170 if (EltSz == 64) 3171 return false; 3172 3173 unsigned NumElts = VT.getVectorNumElements(); 3174 WhichResult = (M[0] == 0 ? 0 : 1); 3175 for (unsigned i = 0; i < NumElts; i += 2) { 3176 if ((unsigned) M[i] != i + WhichResult || 3177 (unsigned) M[i+1] != i + WhichResult) 3178 return false; 3179 } 3180 return true; 3181} 3182 3183static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3184 unsigned &WhichResult) { 3185 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3186 if (EltSz == 64) 3187 return false; 3188 3189 unsigned NumElts = VT.getVectorNumElements(); 3190 WhichResult = (M[0] == 0 ? 0 : 1); 3191 for (unsigned i = 0; i != NumElts; ++i) { 3192 if ((unsigned) M[i] != 2 * i + WhichResult) 3193 return false; 3194 } 3195 3196 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3197 if (VT.is64BitVector() && EltSz == 32) 3198 return false; 3199 3200 return true; 3201} 3202 3203/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3204/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3205/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3206static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3207 unsigned &WhichResult) { 3208 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3209 if (EltSz == 64) 3210 return false; 3211 3212 unsigned Half = VT.getVectorNumElements() / 2; 3213 WhichResult = (M[0] == 0 ? 0 : 1); 3214 for (unsigned j = 0; j != 2; ++j) { 3215 unsigned Idx = WhichResult; 3216 for (unsigned i = 0; i != Half; ++i) { 3217 if ((unsigned) M[i + j * Half] != Idx) 3218 return false; 3219 Idx += 2; 3220 } 3221 } 3222 3223 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3224 if (VT.is64BitVector() && EltSz == 32) 3225 return false; 3226 3227 return true; 3228} 3229 3230static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3231 unsigned &WhichResult) { 3232 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3233 if (EltSz == 64) 3234 return false; 3235 3236 unsigned NumElts = VT.getVectorNumElements(); 3237 WhichResult = (M[0] == 0 ? 0 : 1); 3238 unsigned Idx = WhichResult * NumElts / 2; 3239 for (unsigned i = 0; i != NumElts; i += 2) { 3240 if ((unsigned) M[i] != Idx || 3241 (unsigned) M[i+1] != Idx + NumElts) 3242 return false; 3243 Idx += 1; 3244 } 3245 3246 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3247 if (VT.is64BitVector() && EltSz == 32) 3248 return false; 3249 3250 return true; 3251} 3252 3253/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3254/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3255/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3256static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3257 unsigned &WhichResult) { 3258 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3259 if (EltSz == 64) 3260 return false; 3261 3262 unsigned NumElts = VT.getVectorNumElements(); 3263 WhichResult = (M[0] == 0 ? 0 : 1); 3264 unsigned Idx = WhichResult * NumElts / 2; 3265 for (unsigned i = 0; i != NumElts; i += 2) { 3266 if ((unsigned) M[i] != Idx || 3267 (unsigned) M[i+1] != Idx) 3268 return false; 3269 Idx += 1; 3270 } 3271 3272 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3273 if (VT.is64BitVector() && EltSz == 32) 3274 return false; 3275 3276 return true; 3277} 3278 3279// If N is an integer constant that can be moved into a register in one 3280// instruction, return an SDValue of such a constant (will become a MOV 3281// instruction). Otherwise return null. 3282static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3283 const ARMSubtarget *ST, DebugLoc dl) { 3284 uint64_t Val; 3285 if (!isa<ConstantSDNode>(N)) 3286 return SDValue(); 3287 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3288 3289 if (ST->isThumb1Only()) { 3290 if (Val <= 255 || ~Val <= 255) 3291 return DAG.getConstant(Val, MVT::i32); 3292 } else { 3293 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3294 return DAG.getConstant(Val, MVT::i32); 3295 } 3296 return SDValue(); 3297} 3298 3299// If this is a case we can't handle, return null and let the default 3300// expansion code take care of it. 3301static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3302 const ARMSubtarget *ST) { 3303 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3304 DebugLoc dl = Op.getDebugLoc(); 3305 EVT VT = Op.getValueType(); 3306 3307 APInt SplatBits, SplatUndef; 3308 unsigned SplatBitSize; 3309 bool HasAnyUndefs; 3310 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3311 if (SplatBitSize <= 64) { 3312 // Check if an immediate VMOV works. 3313 EVT VmovVT; 3314 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3315 SplatUndef.getZExtValue(), SplatBitSize, 3316 DAG, VmovVT, VT.is128BitVector(), true); 3317 if (Val.getNode()) { 3318 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3319 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3320 } 3321 3322 // Try an immediate VMVN. 3323 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3324 ((1LL << SplatBitSize) - 1)); 3325 Val = isNEONModifiedImm(NegatedImm, 3326 SplatUndef.getZExtValue(), SplatBitSize, 3327 DAG, VmovVT, VT.is128BitVector(), false); 3328 if (Val.getNode()) { 3329 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3330 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3331 } 3332 } 3333 } 3334 3335 // Scan through the operands to see if only one value is used. 3336 unsigned NumElts = VT.getVectorNumElements(); 3337 bool isOnlyLowElement = true; 3338 bool usesOnlyOneValue = true; 3339 bool isConstant = true; 3340 SDValue Value; 3341 for (unsigned i = 0; i < NumElts; ++i) { 3342 SDValue V = Op.getOperand(i); 3343 if (V.getOpcode() == ISD::UNDEF) 3344 continue; 3345 if (i > 0) 3346 isOnlyLowElement = false; 3347 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3348 isConstant = false; 3349 3350 if (!Value.getNode()) 3351 Value = V; 3352 else if (V != Value) 3353 usesOnlyOneValue = false; 3354 } 3355 3356 if (!Value.getNode()) 3357 return DAG.getUNDEF(VT); 3358 3359 if (isOnlyLowElement) 3360 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3361 3362 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3363 3364 if (EnableARMVDUPsplat) { 3365 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 3366 // i32 and try again. 3367 if (usesOnlyOneValue && EltSize <= 32) { 3368 if (!isConstant) 3369 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3370 if (VT.getVectorElementType().isFloatingPoint()) { 3371 SmallVector<SDValue, 8> Ops; 3372 for (unsigned i = 0; i < NumElts; ++i) 3373 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, 3374 Op.getOperand(i))); 3375 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], 3376 NumElts); 3377 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 3378 LowerBUILD_VECTOR(Val, DAG, ST)); 3379 } 3380 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 3381 if (Val.getNode()) 3382 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 3383 } 3384 } 3385 3386 // If all elements are constants and the case above didn't get hit, fall back 3387 // to the default expansion, which will generate a load from the constant 3388 // pool. 3389 if (isConstant) 3390 return SDValue(); 3391 3392 if (!EnableARMVDUPsplat) { 3393 // Use VDUP for non-constant splats. 3394 if (usesOnlyOneValue && EltSize <= 32) 3395 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3396 } 3397 3398 // Vectors with 32- or 64-bit elements can be built by directly assigning 3399 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 3400 // will be legalized. 3401 if (EltSize >= 32) { 3402 // Do the expansion with floating-point types, since that is what the VFP 3403 // registers are defined to use, and since i64 is not legal. 3404 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3405 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3406 SmallVector<SDValue, 8> Ops; 3407 for (unsigned i = 0; i < NumElts; ++i) 3408 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); 3409 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3410 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3411 } 3412 3413 return SDValue(); 3414} 3415 3416/// isShuffleMaskLegal - Targets can use this to indicate that they only 3417/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3418/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3419/// are assumed to be legal. 3420bool 3421ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 3422 EVT VT) const { 3423 if (VT.getVectorNumElements() == 4 && 3424 (VT.is128BitVector() || VT.is64BitVector())) { 3425 unsigned PFIndexes[4]; 3426 for (unsigned i = 0; i != 4; ++i) { 3427 if (M[i] < 0) 3428 PFIndexes[i] = 8; 3429 else 3430 PFIndexes[i] = M[i]; 3431 } 3432 3433 // Compute the index in the perfect shuffle table. 3434 unsigned PFTableIndex = 3435 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3436 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3437 unsigned Cost = (PFEntry >> 30); 3438 3439 if (Cost <= 4) 3440 return true; 3441 } 3442 3443 bool ReverseVEXT; 3444 unsigned Imm, WhichResult; 3445 3446 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3447 return (EltSize >= 32 || 3448 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 3449 isVREVMask(M, VT, 64) || 3450 isVREVMask(M, VT, 32) || 3451 isVREVMask(M, VT, 16) || 3452 isVEXTMask(M, VT, ReverseVEXT, Imm) || 3453 isVTRNMask(M, VT, WhichResult) || 3454 isVUZPMask(M, VT, WhichResult) || 3455 isVZIPMask(M, VT, WhichResult) || 3456 isVTRN_v_undef_Mask(M, VT, WhichResult) || 3457 isVUZP_v_undef_Mask(M, VT, WhichResult) || 3458 isVZIP_v_undef_Mask(M, VT, WhichResult)); 3459} 3460 3461/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3462/// the specified operations to build the shuffle. 3463static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 3464 SDValue RHS, SelectionDAG &DAG, 3465 DebugLoc dl) { 3466 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3467 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3468 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3469 3470 enum { 3471 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3472 OP_VREV, 3473 OP_VDUP0, 3474 OP_VDUP1, 3475 OP_VDUP2, 3476 OP_VDUP3, 3477 OP_VEXT1, 3478 OP_VEXT2, 3479 OP_VEXT3, 3480 OP_VUZPL, // VUZP, left result 3481 OP_VUZPR, // VUZP, right result 3482 OP_VZIPL, // VZIP, left result 3483 OP_VZIPR, // VZIP, right result 3484 OP_VTRNL, // VTRN, left result 3485 OP_VTRNR // VTRN, right result 3486 }; 3487 3488 if (OpNum == OP_COPY) { 3489 if (LHSID == (1*9+2)*9+3) return LHS; 3490 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3491 return RHS; 3492 } 3493 3494 SDValue OpLHS, OpRHS; 3495 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 3496 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 3497 EVT VT = OpLHS.getValueType(); 3498 3499 switch (OpNum) { 3500 default: llvm_unreachable("Unknown shuffle opcode!"); 3501 case OP_VREV: 3502 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 3503 case OP_VDUP0: 3504 case OP_VDUP1: 3505 case OP_VDUP2: 3506 case OP_VDUP3: 3507 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 3508 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 3509 case OP_VEXT1: 3510 case OP_VEXT2: 3511 case OP_VEXT3: 3512 return DAG.getNode(ARMISD::VEXT, dl, VT, 3513 OpLHS, OpRHS, 3514 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 3515 case OP_VUZPL: 3516 case OP_VUZPR: 3517 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3518 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 3519 case OP_VZIPL: 3520 case OP_VZIPR: 3521 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3522 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 3523 case OP_VTRNL: 3524 case OP_VTRNR: 3525 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3526 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 3527 } 3528} 3529 3530static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3531 SDValue V1 = Op.getOperand(0); 3532 SDValue V2 = Op.getOperand(1); 3533 DebugLoc dl = Op.getDebugLoc(); 3534 EVT VT = Op.getValueType(); 3535 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3536 SmallVector<int, 8> ShuffleMask; 3537 3538 // Convert shuffles that are directly supported on NEON to target-specific 3539 // DAG nodes, instead of keeping them as shuffles and matching them again 3540 // during code selection. This is more efficient and avoids the possibility 3541 // of inconsistencies between legalization and selection. 3542 // FIXME: floating-point vectors should be canonicalized to integer vectors 3543 // of the same time so that they get CSEd properly. 3544 SVN->getMask(ShuffleMask); 3545 3546 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3547 if (EltSize <= 32) { 3548 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 3549 int Lane = SVN->getSplatIndex(); 3550 // If this is undef splat, generate it via "just" vdup, if possible. 3551 if (Lane == -1) Lane = 0; 3552 3553 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 3554 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 3555 } 3556 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 3557 DAG.getConstant(Lane, MVT::i32)); 3558 } 3559 3560 bool ReverseVEXT; 3561 unsigned Imm; 3562 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 3563 if (ReverseVEXT) 3564 std::swap(V1, V2); 3565 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 3566 DAG.getConstant(Imm, MVT::i32)); 3567 } 3568 3569 if (isVREVMask(ShuffleMask, VT, 64)) 3570 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 3571 if (isVREVMask(ShuffleMask, VT, 32)) 3572 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 3573 if (isVREVMask(ShuffleMask, VT, 16)) 3574 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 3575 3576 // Check for Neon shuffles that modify both input vectors in place. 3577 // If both results are used, i.e., if there are two shuffles with the same 3578 // source operands and with masks corresponding to both results of one of 3579 // these operations, DAG memoization will ensure that a single node is 3580 // used for both shuffles. 3581 unsigned WhichResult; 3582 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 3583 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3584 V1, V2).getValue(WhichResult); 3585 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 3586 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3587 V1, V2).getValue(WhichResult); 3588 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 3589 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3590 V1, V2).getValue(WhichResult); 3591 3592 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3593 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3594 V1, V1).getValue(WhichResult); 3595 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3596 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3597 V1, V1).getValue(WhichResult); 3598 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3599 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3600 V1, V1).getValue(WhichResult); 3601 } 3602 3603 // If the shuffle is not directly supported and it has 4 elements, use 3604 // the PerfectShuffle-generated table to synthesize it from other shuffles. 3605 unsigned NumElts = VT.getVectorNumElements(); 3606 if (NumElts == 4) { 3607 unsigned PFIndexes[4]; 3608 for (unsigned i = 0; i != 4; ++i) { 3609 if (ShuffleMask[i] < 0) 3610 PFIndexes[i] = 8; 3611 else 3612 PFIndexes[i] = ShuffleMask[i]; 3613 } 3614 3615 // Compute the index in the perfect shuffle table. 3616 unsigned PFTableIndex = 3617 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3618 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3619 unsigned Cost = (PFEntry >> 30); 3620 3621 if (Cost <= 4) 3622 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 3623 } 3624 3625 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 3626 if (EltSize >= 32) { 3627 // Do the expansion with floating-point types, since that is what the VFP 3628 // registers are defined to use, and since i64 is not legal. 3629 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3630 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3631 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); 3632 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); 3633 SmallVector<SDValue, 8> Ops; 3634 for (unsigned i = 0; i < NumElts; ++i) { 3635 if (ShuffleMask[i] < 0) 3636 Ops.push_back(DAG.getUNDEF(EltVT)); 3637 else 3638 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 3639 ShuffleMask[i] < (int)NumElts ? V1 : V2, 3640 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 3641 MVT::i32))); 3642 } 3643 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3644 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3645 } 3646 3647 return SDValue(); 3648} 3649 3650static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 3651 EVT VT = Op.getValueType(); 3652 DebugLoc dl = Op.getDebugLoc(); 3653 SDValue Vec = Op.getOperand(0); 3654 SDValue Lane = Op.getOperand(1); 3655 assert(VT == MVT::i32 && 3656 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 3657 "unexpected type for custom-lowering vector extract"); 3658 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 3659} 3660 3661static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3662 // The only time a CONCAT_VECTORS operation can have legal types is when 3663 // two 64-bit vectors are concatenated to a 128-bit vector. 3664 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 3665 "unexpected CONCAT_VECTORS"); 3666 DebugLoc dl = Op.getDebugLoc(); 3667 SDValue Val = DAG.getUNDEF(MVT::v2f64); 3668 SDValue Op0 = Op.getOperand(0); 3669 SDValue Op1 = Op.getOperand(1); 3670 if (Op0.getOpcode() != ISD::UNDEF) 3671 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3672 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 3673 DAG.getIntPtrConstant(0)); 3674 if (Op1.getOpcode() != ISD::UNDEF) 3675 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3676 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 3677 DAG.getIntPtrConstant(1)); 3678 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 3679} 3680 3681SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 3682 switch (Op.getOpcode()) { 3683 default: llvm_unreachable("Don't know how to custom lower this!"); 3684 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3685 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 3686 case ISD::GlobalAddress: 3687 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 3688 LowerGlobalAddressELF(Op, DAG); 3689 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3690 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3691 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 3692 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 3693 case ISD::VASTART: return LowerVASTART(Op, DAG); 3694 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 3695 case ISD::SINT_TO_FP: 3696 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 3697 case ISD::FP_TO_SINT: 3698 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 3699 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 3700 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3701 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3702 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 3703 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 3704 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 3705 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 3706 Subtarget); 3707 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 3708 case ISD::SHL: 3709 case ISD::SRL: 3710 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 3711 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 3712 case ISD::SRL_PARTS: 3713 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 3714 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 3715 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 3716 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 3717 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3718 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3719 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 3720 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3721 } 3722 return SDValue(); 3723} 3724 3725/// ReplaceNodeResults - Replace the results of node with an illegal result 3726/// type with new values built out of custom code. 3727void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3728 SmallVectorImpl<SDValue>&Results, 3729 SelectionDAG &DAG) const { 3730 SDValue Res; 3731 switch (N->getOpcode()) { 3732 default: 3733 llvm_unreachable("Don't know how to custom expand this!"); 3734 break; 3735 case ISD::BIT_CONVERT: 3736 Res = ExpandBIT_CONVERT(N, DAG); 3737 break; 3738 case ISD::SRL: 3739 case ISD::SRA: 3740 Res = LowerShift(N, DAG, Subtarget); 3741 break; 3742 } 3743 if (Res.getNode()) 3744 Results.push_back(Res); 3745} 3746 3747//===----------------------------------------------------------------------===// 3748// ARM Scheduler Hooks 3749//===----------------------------------------------------------------------===// 3750 3751MachineBasicBlock * 3752ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 3753 MachineBasicBlock *BB, 3754 unsigned Size) const { 3755 unsigned dest = MI->getOperand(0).getReg(); 3756 unsigned ptr = MI->getOperand(1).getReg(); 3757 unsigned oldval = MI->getOperand(2).getReg(); 3758 unsigned newval = MI->getOperand(3).getReg(); 3759 unsigned scratch = BB->getParent()->getRegInfo() 3760 .createVirtualRegister(ARM::GPRRegisterClass); 3761 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3762 DebugLoc dl = MI->getDebugLoc(); 3763 bool isThumb2 = Subtarget->isThumb2(); 3764 3765 unsigned ldrOpc, strOpc; 3766 switch (Size) { 3767 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3768 case 1: 3769 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3770 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; 3771 break; 3772 case 2: 3773 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3774 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3775 break; 3776 case 4: 3777 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3778 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3779 break; 3780 } 3781 3782 MachineFunction *MF = BB->getParent(); 3783 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3784 MachineFunction::iterator It = BB; 3785 ++It; // insert the new blocks after the current block 3786 3787 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3788 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3789 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3790 MF->insert(It, loop1MBB); 3791 MF->insert(It, loop2MBB); 3792 MF->insert(It, exitMBB); 3793 3794 // Transfer the remainder of BB and its successor edges to exitMBB. 3795 exitMBB->splice(exitMBB->begin(), BB, 3796 llvm::next(MachineBasicBlock::iterator(MI)), 3797 BB->end()); 3798 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3799 3800 // thisMBB: 3801 // ... 3802 // fallthrough --> loop1MBB 3803 BB->addSuccessor(loop1MBB); 3804 3805 // loop1MBB: 3806 // ldrex dest, [ptr] 3807 // cmp dest, oldval 3808 // bne exitMBB 3809 BB = loop1MBB; 3810 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3811 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 3812 .addReg(dest).addReg(oldval)); 3813 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3814 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3815 BB->addSuccessor(loop2MBB); 3816 BB->addSuccessor(exitMBB); 3817 3818 // loop2MBB: 3819 // strex scratch, newval, [ptr] 3820 // cmp scratch, #0 3821 // bne loop1MBB 3822 BB = loop2MBB; 3823 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 3824 .addReg(ptr)); 3825 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3826 .addReg(scratch).addImm(0)); 3827 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3828 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3829 BB->addSuccessor(loop1MBB); 3830 BB->addSuccessor(exitMBB); 3831 3832 // exitMBB: 3833 // ... 3834 BB = exitMBB; 3835 3836 MI->eraseFromParent(); // The instruction is gone now. 3837 3838 return BB; 3839} 3840 3841MachineBasicBlock * 3842ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3843 unsigned Size, unsigned BinOpcode) const { 3844 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3845 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3846 3847 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3848 MachineFunction *MF = BB->getParent(); 3849 MachineFunction::iterator It = BB; 3850 ++It; 3851 3852 unsigned dest = MI->getOperand(0).getReg(); 3853 unsigned ptr = MI->getOperand(1).getReg(); 3854 unsigned incr = MI->getOperand(2).getReg(); 3855 DebugLoc dl = MI->getDebugLoc(); 3856 3857 bool isThumb2 = Subtarget->isThumb2(); 3858 unsigned ldrOpc, strOpc; 3859 switch (Size) { 3860 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3861 case 1: 3862 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3863 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 3864 break; 3865 case 2: 3866 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3867 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3868 break; 3869 case 4: 3870 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3871 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3872 break; 3873 } 3874 3875 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3876 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3877 MF->insert(It, loopMBB); 3878 MF->insert(It, exitMBB); 3879 3880 // Transfer the remainder of BB and its successor edges to exitMBB. 3881 exitMBB->splice(exitMBB->begin(), BB, 3882 llvm::next(MachineBasicBlock::iterator(MI)), 3883 BB->end()); 3884 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3885 3886 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3887 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3888 unsigned scratch2 = (!BinOpcode) ? incr : 3889 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3890 3891 // thisMBB: 3892 // ... 3893 // fallthrough --> loopMBB 3894 BB->addSuccessor(loopMBB); 3895 3896 // loopMBB: 3897 // ldrex dest, ptr 3898 // <binop> scratch2, dest, incr 3899 // strex scratch, scratch2, ptr 3900 // cmp scratch, #0 3901 // bne- loopMBB 3902 // fallthrough --> exitMBB 3903 BB = loopMBB; 3904 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3905 if (BinOpcode) { 3906 // operand order needs to go the other way for NAND 3907 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 3908 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3909 addReg(incr).addReg(dest)).addReg(0); 3910 else 3911 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3912 addReg(dest).addReg(incr)).addReg(0); 3913 } 3914 3915 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 3916 .addReg(ptr)); 3917 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3918 .addReg(scratch).addImm(0)); 3919 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3920 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3921 3922 BB->addSuccessor(loopMBB); 3923 BB->addSuccessor(exitMBB); 3924 3925 // exitMBB: 3926 // ... 3927 BB = exitMBB; 3928 3929 MI->eraseFromParent(); // The instruction is gone now. 3930 3931 return BB; 3932} 3933 3934static 3935MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 3936 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 3937 E = MBB->succ_end(); I != E; ++I) 3938 if (*I != Succ) 3939 return *I; 3940 llvm_unreachable("Expecting a BB with two successors!"); 3941} 3942 3943MachineBasicBlock * 3944ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3945 MachineBasicBlock *BB) const { 3946 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3947 DebugLoc dl = MI->getDebugLoc(); 3948 bool isThumb2 = Subtarget->isThumb2(); 3949 switch (MI->getOpcode()) { 3950 default: 3951 MI->dump(); 3952 llvm_unreachable("Unexpected instr type to insert"); 3953 3954 case ARM::ATOMIC_LOAD_ADD_I8: 3955 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3956 case ARM::ATOMIC_LOAD_ADD_I16: 3957 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3958 case ARM::ATOMIC_LOAD_ADD_I32: 3959 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3960 3961 case ARM::ATOMIC_LOAD_AND_I8: 3962 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3963 case ARM::ATOMIC_LOAD_AND_I16: 3964 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3965 case ARM::ATOMIC_LOAD_AND_I32: 3966 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3967 3968 case ARM::ATOMIC_LOAD_OR_I8: 3969 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3970 case ARM::ATOMIC_LOAD_OR_I16: 3971 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3972 case ARM::ATOMIC_LOAD_OR_I32: 3973 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3974 3975 case ARM::ATOMIC_LOAD_XOR_I8: 3976 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3977 case ARM::ATOMIC_LOAD_XOR_I16: 3978 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3979 case ARM::ATOMIC_LOAD_XOR_I32: 3980 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3981 3982 case ARM::ATOMIC_LOAD_NAND_I8: 3983 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3984 case ARM::ATOMIC_LOAD_NAND_I16: 3985 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3986 case ARM::ATOMIC_LOAD_NAND_I32: 3987 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3988 3989 case ARM::ATOMIC_LOAD_SUB_I8: 3990 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3991 case ARM::ATOMIC_LOAD_SUB_I16: 3992 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3993 case ARM::ATOMIC_LOAD_SUB_I32: 3994 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3995 3996 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 3997 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 3998 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 3999 4000 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 4001 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 4002 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 4003 4004 case ARM::tMOVCCr_pseudo: { 4005 // To "insert" a SELECT_CC instruction, we actually have to insert the 4006 // diamond control-flow pattern. The incoming instruction knows the 4007 // destination vreg to set, the condition code register to branch on, the 4008 // true/false values to select between, and a branch opcode to use. 4009 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4010 MachineFunction::iterator It = BB; 4011 ++It; 4012 4013 // thisMBB: 4014 // ... 4015 // TrueVal = ... 4016 // cmpTY ccX, r1, r2 4017 // bCC copy1MBB 4018 // fallthrough --> copy0MBB 4019 MachineBasicBlock *thisMBB = BB; 4020 MachineFunction *F = BB->getParent(); 4021 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 4022 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 4023 F->insert(It, copy0MBB); 4024 F->insert(It, sinkMBB); 4025 4026 // Transfer the remainder of BB and its successor edges to sinkMBB. 4027 sinkMBB->splice(sinkMBB->begin(), BB, 4028 llvm::next(MachineBasicBlock::iterator(MI)), 4029 BB->end()); 4030 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 4031 4032 BB->addSuccessor(copy0MBB); 4033 BB->addSuccessor(sinkMBB); 4034 4035 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 4036 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 4037 4038 // copy0MBB: 4039 // %FalseValue = ... 4040 // # fallthrough to sinkMBB 4041 BB = copy0MBB; 4042 4043 // Update machine-CFG edges 4044 BB->addSuccessor(sinkMBB); 4045 4046 // sinkMBB: 4047 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4048 // ... 4049 BB = sinkMBB; 4050 BuildMI(*BB, BB->begin(), dl, 4051 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 4052 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4053 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4054 4055 MI->eraseFromParent(); // The pseudo instruction is gone now. 4056 return BB; 4057 } 4058 4059 case ARM::BCCi64: 4060 case ARM::BCCZi64: { 4061 // Compare both parts that make up the double comparison separately for 4062 // equality. 4063 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 4064 4065 unsigned LHS1 = MI->getOperand(1).getReg(); 4066 unsigned LHS2 = MI->getOperand(2).getReg(); 4067 if (RHSisZero) { 4068 AddDefaultPred(BuildMI(BB, dl, 4069 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4070 .addReg(LHS1).addImm(0)); 4071 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4072 .addReg(LHS2).addImm(0) 4073 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4074 } else { 4075 unsigned RHS1 = MI->getOperand(3).getReg(); 4076 unsigned RHS2 = MI->getOperand(4).getReg(); 4077 AddDefaultPred(BuildMI(BB, dl, 4078 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4079 .addReg(LHS1).addReg(RHS1)); 4080 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4081 .addReg(LHS2).addReg(RHS2) 4082 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4083 } 4084 4085 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 4086 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 4087 if (MI->getOperand(0).getImm() == ARMCC::NE) 4088 std::swap(destMBB, exitMBB); 4089 4090 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4091 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 4092 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) 4093 .addMBB(exitMBB); 4094 4095 MI->eraseFromParent(); // The pseudo instruction is gone now. 4096 return BB; 4097 } 4098 4099 case ARM::tANDsp: 4100 case ARM::tADDspr_: 4101 case ARM::tSUBspi_: 4102 case ARM::t2SUBrSPi_: 4103 case ARM::t2SUBrSPi12_: 4104 case ARM::t2SUBrSPs_: { 4105 MachineFunction *MF = BB->getParent(); 4106 unsigned DstReg = MI->getOperand(0).getReg(); 4107 unsigned SrcReg = MI->getOperand(1).getReg(); 4108 bool DstIsDead = MI->getOperand(0).isDead(); 4109 bool SrcIsKill = MI->getOperand(1).isKill(); 4110 4111 if (SrcReg != ARM::SP) { 4112 // Copy the source to SP from virtual register. 4113 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); 4114 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 4115 ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; 4116 BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) 4117 .addReg(SrcReg, getKillRegState(SrcIsKill)); 4118 } 4119 4120 unsigned OpOpc = 0; 4121 bool NeedPred = false, NeedCC = false, NeedOp3 = false; 4122 switch (MI->getOpcode()) { 4123 default: 4124 llvm_unreachable("Unexpected pseudo instruction!"); 4125 case ARM::tANDsp: 4126 OpOpc = ARM::tAND; 4127 NeedPred = true; 4128 break; 4129 case ARM::tADDspr_: 4130 OpOpc = ARM::tADDspr; 4131 break; 4132 case ARM::tSUBspi_: 4133 OpOpc = ARM::tSUBspi; 4134 break; 4135 case ARM::t2SUBrSPi_: 4136 OpOpc = ARM::t2SUBrSPi; 4137 NeedPred = true; NeedCC = true; 4138 break; 4139 case ARM::t2SUBrSPi12_: 4140 OpOpc = ARM::t2SUBrSPi12; 4141 NeedPred = true; 4142 break; 4143 case ARM::t2SUBrSPs_: 4144 OpOpc = ARM::t2SUBrSPs; 4145 NeedPred = true; NeedCC = true; NeedOp3 = true; 4146 break; 4147 } 4148 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); 4149 if (OpOpc == ARM::tAND) 4150 AddDefaultT1CC(MIB); 4151 MIB.addReg(ARM::SP); 4152 MIB.addOperand(MI->getOperand(2)); 4153 if (NeedOp3) 4154 MIB.addOperand(MI->getOperand(3)); 4155 if (NeedPred) 4156 AddDefaultPred(MIB); 4157 if (NeedCC) 4158 AddDefaultCC(MIB); 4159 4160 // Copy the result from SP to virtual register. 4161 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); 4162 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 4163 ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; 4164 BuildMI(*BB, MI, dl, TII->get(CopyOpc)) 4165 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) 4166 .addReg(ARM::SP); 4167 MI->eraseFromParent(); // The pseudo instruction is gone now. 4168 return BB; 4169 } 4170 } 4171} 4172 4173//===----------------------------------------------------------------------===// 4174// ARM Optimization Hooks 4175//===----------------------------------------------------------------------===// 4176 4177static 4178SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 4179 TargetLowering::DAGCombinerInfo &DCI) { 4180 SelectionDAG &DAG = DCI.DAG; 4181 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4182 EVT VT = N->getValueType(0); 4183 unsigned Opc = N->getOpcode(); 4184 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 4185 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 4186 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 4187 ISD::CondCode CC = ISD::SETCC_INVALID; 4188 4189 if (isSlctCC) { 4190 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 4191 } else { 4192 SDValue CCOp = Slct.getOperand(0); 4193 if (CCOp.getOpcode() == ISD::SETCC) 4194 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 4195 } 4196 4197 bool DoXform = false; 4198 bool InvCC = false; 4199 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 4200 "Bad input!"); 4201 4202 if (LHS.getOpcode() == ISD::Constant && 4203 cast<ConstantSDNode>(LHS)->isNullValue()) { 4204 DoXform = true; 4205 } else if (CC != ISD::SETCC_INVALID && 4206 RHS.getOpcode() == ISD::Constant && 4207 cast<ConstantSDNode>(RHS)->isNullValue()) { 4208 std::swap(LHS, RHS); 4209 SDValue Op0 = Slct.getOperand(0); 4210 EVT OpVT = isSlctCC ? Op0.getValueType() : 4211 Op0.getOperand(0).getValueType(); 4212 bool isInt = OpVT.isInteger(); 4213 CC = ISD::getSetCCInverse(CC, isInt); 4214 4215 if (!TLI.isCondCodeLegal(CC, OpVT)) 4216 return SDValue(); // Inverse operator isn't legal. 4217 4218 DoXform = true; 4219 InvCC = true; 4220 } 4221 4222 if (DoXform) { 4223 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 4224 if (isSlctCC) 4225 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 4226 Slct.getOperand(0), Slct.getOperand(1), CC); 4227 SDValue CCOp = Slct.getOperand(0); 4228 if (InvCC) 4229 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 4230 CCOp.getOperand(0), CCOp.getOperand(1), CC); 4231 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4232 CCOp, OtherOp, Result); 4233 } 4234 return SDValue(); 4235} 4236 4237/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 4238/// operands N0 and N1. This is a helper for PerformADDCombine that is 4239/// called with the default operands, and if that fails, with commuted 4240/// operands. 4241static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 4242 TargetLowering::DAGCombinerInfo &DCI) { 4243 SelectionDAG &DAG = DCI.DAG; 4244 4245 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 4246 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 4247 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 4248 if (Result.getNode()) return Result; 4249 } 4250 4251 // fold (add (arm_neon_vabd a, b) c) -> (arm_neon_vaba c, a, b) 4252 EVT VT = N->getValueType(0); 4253 if (N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && VT.isInteger()) { 4254 unsigned IntNo = cast<ConstantSDNode>(N0.getOperand(0))->getZExtValue(); 4255 if (IntNo == Intrinsic::arm_neon_vabds) 4256 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT, 4257 DAG.getConstant(Intrinsic::arm_neon_vabas, MVT::i32), 4258 N1, N0.getOperand(1), N0.getOperand(2)); 4259 if (IntNo == Intrinsic::arm_neon_vabdu) 4260 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT, 4261 DAG.getConstant(Intrinsic::arm_neon_vabau, MVT::i32), 4262 N1, N0.getOperand(1), N0.getOperand(2)); 4263 } 4264 4265 return SDValue(); 4266} 4267 4268/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 4269/// 4270static SDValue PerformADDCombine(SDNode *N, 4271 TargetLowering::DAGCombinerInfo &DCI) { 4272 SDValue N0 = N->getOperand(0); 4273 SDValue N1 = N->getOperand(1); 4274 4275 // First try with the default operand order. 4276 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); 4277 if (Result.getNode()) 4278 return Result; 4279 4280 // If that didn't work, try again with the operands commuted. 4281 return PerformADDCombineWithOperands(N, N1, N0, DCI); 4282} 4283 4284/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 4285/// 4286static SDValue PerformSUBCombine(SDNode *N, 4287 TargetLowering::DAGCombinerInfo &DCI) { 4288 SDValue N0 = N->getOperand(0); 4289 SDValue N1 = N->getOperand(1); 4290 4291 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 4292 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 4293 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 4294 if (Result.getNode()) return Result; 4295 } 4296 4297 return SDValue(); 4298} 4299 4300static SDValue PerformMULCombine(SDNode *N, 4301 TargetLowering::DAGCombinerInfo &DCI, 4302 const ARMSubtarget *Subtarget) { 4303 SelectionDAG &DAG = DCI.DAG; 4304 4305 if (Subtarget->isThumb1Only()) 4306 return SDValue(); 4307 4308 if (DAG.getMachineFunction(). 4309 getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 4310 return SDValue(); 4311 4312 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 4313 return SDValue(); 4314 4315 EVT VT = N->getValueType(0); 4316 if (VT != MVT::i32) 4317 return SDValue(); 4318 4319 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4320 if (!C) 4321 return SDValue(); 4322 4323 uint64_t MulAmt = C->getZExtValue(); 4324 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 4325 ShiftAmt = ShiftAmt & (32 - 1); 4326 SDValue V = N->getOperand(0); 4327 DebugLoc DL = N->getDebugLoc(); 4328 4329 SDValue Res; 4330 MulAmt >>= ShiftAmt; 4331 if (isPowerOf2_32(MulAmt - 1)) { 4332 // (mul x, 2^N + 1) => (add (shl x, N), x) 4333 Res = DAG.getNode(ISD::ADD, DL, VT, 4334 V, DAG.getNode(ISD::SHL, DL, VT, 4335 V, DAG.getConstant(Log2_32(MulAmt-1), 4336 MVT::i32))); 4337 } else if (isPowerOf2_32(MulAmt + 1)) { 4338 // (mul x, 2^N - 1) => (sub (shl x, N), x) 4339 Res = DAG.getNode(ISD::SUB, DL, VT, 4340 DAG.getNode(ISD::SHL, DL, VT, 4341 V, DAG.getConstant(Log2_32(MulAmt+1), 4342 MVT::i32)), 4343 V); 4344 } else 4345 return SDValue(); 4346 4347 if (ShiftAmt != 0) 4348 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 4349 DAG.getConstant(ShiftAmt, MVT::i32)); 4350 4351 // Do not add new nodes to DAG combiner worklist. 4352 DCI.CombineTo(N, Res, false); 4353 return SDValue(); 4354} 4355 4356/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 4357static SDValue PerformORCombine(SDNode *N, 4358 TargetLowering::DAGCombinerInfo &DCI, 4359 const ARMSubtarget *Subtarget) { 4360 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 4361 // reasonable. 4362 4363 // BFI is only available on V6T2+ 4364 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 4365 return SDValue(); 4366 4367 SelectionDAG &DAG = DCI.DAG; 4368 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 4369 DebugLoc DL = N->getDebugLoc(); 4370 // 1) or (and A, mask), val => ARMbfi A, val, mask 4371 // iff (val & mask) == val 4372 // 4373 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4374 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 4375 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4376 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 4377 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4378 // (i.e., copy a bitfield value into another bitfield of the same width) 4379 if (N0.getOpcode() != ISD::AND) 4380 return SDValue(); 4381 4382 EVT VT = N->getValueType(0); 4383 if (VT != MVT::i32) 4384 return SDValue(); 4385 4386 4387 // The value and the mask need to be constants so we can verify this is 4388 // actually a bitfield set. If the mask is 0xffff, we can do better 4389 // via a movt instruction, so don't use BFI in that case. 4390 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4391 if (!C) 4392 return SDValue(); 4393 unsigned Mask = C->getZExtValue(); 4394 if (Mask == 0xffff) 4395 return SDValue(); 4396 SDValue Res; 4397 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 4398 if ((C = dyn_cast<ConstantSDNode>(N1))) { 4399 unsigned Val = C->getZExtValue(); 4400 if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) 4401 return SDValue(); 4402 Val >>= CountTrailingZeros_32(~Mask); 4403 4404 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), 4405 DAG.getConstant(Val, MVT::i32), 4406 DAG.getConstant(Mask, MVT::i32)); 4407 4408 // Do not add new nodes to DAG combiner worklist. 4409 DCI.CombineTo(N, Res, false); 4410 } else if (N1.getOpcode() == ISD::AND) { 4411 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4412 C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4413 if (!C) 4414 return SDValue(); 4415 unsigned Mask2 = C->getZExtValue(); 4416 4417 if (ARM::isBitFieldInvertedMask(Mask) && 4418 ARM::isBitFieldInvertedMask(~Mask2) && 4419 (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { 4420 // The pack halfword instruction works better for masks that fit it, 4421 // so use that when it's available. 4422 if (Subtarget->hasT2ExtractPack() && 4423 (Mask == 0xffff || Mask == 0xffff0000)) 4424 return SDValue(); 4425 // 2a 4426 unsigned lsb = CountTrailingZeros_32(Mask2); 4427 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 4428 DAG.getConstant(lsb, MVT::i32)); 4429 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, 4430 DAG.getConstant(Mask, MVT::i32)); 4431 // Do not add new nodes to DAG combiner worklist. 4432 DCI.CombineTo(N, Res, false); 4433 } else if (ARM::isBitFieldInvertedMask(~Mask) && 4434 ARM::isBitFieldInvertedMask(Mask2) && 4435 (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { 4436 // The pack halfword instruction works better for masks that fit it, 4437 // so use that when it's available. 4438 if (Subtarget->hasT2ExtractPack() && 4439 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 4440 return SDValue(); 4441 // 2b 4442 unsigned lsb = CountTrailingZeros_32(Mask); 4443 Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4444 DAG.getConstant(lsb, MVT::i32)); 4445 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 4446 DAG.getConstant(Mask2, MVT::i32)); 4447 // Do not add new nodes to DAG combiner worklist. 4448 DCI.CombineTo(N, Res, false); 4449 } 4450 } 4451 4452 return SDValue(); 4453} 4454 4455/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 4456/// ARMISD::VMOVRRD. 4457static SDValue PerformVMOVRRDCombine(SDNode *N, 4458 TargetLowering::DAGCombinerInfo &DCI) { 4459 // fmrrd(fmdrr x, y) -> x,y 4460 SDValue InDouble = N->getOperand(0); 4461 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 4462 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 4463 return SDValue(); 4464} 4465 4466/// PerformVDUPLANECombine - Target-specific dag combine xforms for 4467/// ARMISD::VDUPLANE. 4468static SDValue PerformVDUPLANECombine(SDNode *N, 4469 TargetLowering::DAGCombinerInfo &DCI) { 4470 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 4471 // redundant. 4472 SDValue Op = N->getOperand(0); 4473 EVT VT = N->getValueType(0); 4474 4475 // Ignore bit_converts. 4476 while (Op.getOpcode() == ISD::BIT_CONVERT) 4477 Op = Op.getOperand(0); 4478 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 4479 return SDValue(); 4480 4481 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 4482 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 4483 // The canonical VMOV for a zero vector uses a 32-bit element size. 4484 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4485 unsigned EltBits; 4486 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 4487 EltSize = 8; 4488 if (EltSize > VT.getVectorElementType().getSizeInBits()) 4489 return SDValue(); 4490 4491 SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); 4492 return DCI.CombineTo(N, Res, false); 4493} 4494 4495/// getVShiftImm - Check if this is a valid build_vector for the immediate 4496/// operand of a vector shift operation, where all the elements of the 4497/// build_vector must have the same constant integer value. 4498static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 4499 // Ignore bit_converts. 4500 while (Op.getOpcode() == ISD::BIT_CONVERT) 4501 Op = Op.getOperand(0); 4502 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 4503 APInt SplatBits, SplatUndef; 4504 unsigned SplatBitSize; 4505 bool HasAnyUndefs; 4506 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 4507 HasAnyUndefs, ElementBits) || 4508 SplatBitSize > ElementBits) 4509 return false; 4510 Cnt = SplatBits.getSExtValue(); 4511 return true; 4512} 4513 4514/// isVShiftLImm - Check if this is a valid build_vector for the immediate 4515/// operand of a vector shift left operation. That value must be in the range: 4516/// 0 <= Value < ElementBits for a left shift; or 4517/// 0 <= Value <= ElementBits for a long left shift. 4518static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 4519 assert(VT.isVector() && "vector shift count is not a vector type"); 4520 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4521 if (! getVShiftImm(Op, ElementBits, Cnt)) 4522 return false; 4523 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 4524} 4525 4526/// isVShiftRImm - Check if this is a valid build_vector for the immediate 4527/// operand of a vector shift right operation. For a shift opcode, the value 4528/// is positive, but for an intrinsic the value count must be negative. The 4529/// absolute value must be in the range: 4530/// 1 <= |Value| <= ElementBits for a right shift; or 4531/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 4532static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 4533 int64_t &Cnt) { 4534 assert(VT.isVector() && "vector shift count is not a vector type"); 4535 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4536 if (! getVShiftImm(Op, ElementBits, Cnt)) 4537 return false; 4538 if (isIntrinsic) 4539 Cnt = -Cnt; 4540 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 4541} 4542 4543/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 4544static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 4545 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4546 switch (IntNo) { 4547 default: 4548 // Don't do anything for most intrinsics. 4549 break; 4550 4551 // Vector shifts: check for immediate versions and lower them. 4552 // Note: This is done during DAG combining instead of DAG legalizing because 4553 // the build_vectors for 64-bit vector element shift counts are generally 4554 // not legal, and it is hard to see their values after they get legalized to 4555 // loads from a constant pool. 4556 case Intrinsic::arm_neon_vshifts: 4557 case Intrinsic::arm_neon_vshiftu: 4558 case Intrinsic::arm_neon_vshiftls: 4559 case Intrinsic::arm_neon_vshiftlu: 4560 case Intrinsic::arm_neon_vshiftn: 4561 case Intrinsic::arm_neon_vrshifts: 4562 case Intrinsic::arm_neon_vrshiftu: 4563 case Intrinsic::arm_neon_vrshiftn: 4564 case Intrinsic::arm_neon_vqshifts: 4565 case Intrinsic::arm_neon_vqshiftu: 4566 case Intrinsic::arm_neon_vqshiftsu: 4567 case Intrinsic::arm_neon_vqshiftns: 4568 case Intrinsic::arm_neon_vqshiftnu: 4569 case Intrinsic::arm_neon_vqshiftnsu: 4570 case Intrinsic::arm_neon_vqrshiftns: 4571 case Intrinsic::arm_neon_vqrshiftnu: 4572 case Intrinsic::arm_neon_vqrshiftnsu: { 4573 EVT VT = N->getOperand(1).getValueType(); 4574 int64_t Cnt; 4575 unsigned VShiftOpc = 0; 4576 4577 switch (IntNo) { 4578 case Intrinsic::arm_neon_vshifts: 4579 case Intrinsic::arm_neon_vshiftu: 4580 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 4581 VShiftOpc = ARMISD::VSHL; 4582 break; 4583 } 4584 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 4585 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 4586 ARMISD::VSHRs : ARMISD::VSHRu); 4587 break; 4588 } 4589 return SDValue(); 4590 4591 case Intrinsic::arm_neon_vshiftls: 4592 case Intrinsic::arm_neon_vshiftlu: 4593 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 4594 break; 4595 llvm_unreachable("invalid shift count for vshll intrinsic"); 4596 4597 case Intrinsic::arm_neon_vrshifts: 4598 case Intrinsic::arm_neon_vrshiftu: 4599 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 4600 break; 4601 return SDValue(); 4602 4603 case Intrinsic::arm_neon_vqshifts: 4604 case Intrinsic::arm_neon_vqshiftu: 4605 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4606 break; 4607 return SDValue(); 4608 4609 case Intrinsic::arm_neon_vqshiftsu: 4610 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4611 break; 4612 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 4613 4614 case Intrinsic::arm_neon_vshiftn: 4615 case Intrinsic::arm_neon_vrshiftn: 4616 case Intrinsic::arm_neon_vqshiftns: 4617 case Intrinsic::arm_neon_vqshiftnu: 4618 case Intrinsic::arm_neon_vqshiftnsu: 4619 case Intrinsic::arm_neon_vqrshiftns: 4620 case Intrinsic::arm_neon_vqrshiftnu: 4621 case Intrinsic::arm_neon_vqrshiftnsu: 4622 // Narrowing shifts require an immediate right shift. 4623 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 4624 break; 4625 llvm_unreachable("invalid shift count for narrowing vector shift " 4626 "intrinsic"); 4627 4628 default: 4629 llvm_unreachable("unhandled vector shift"); 4630 } 4631 4632 switch (IntNo) { 4633 case Intrinsic::arm_neon_vshifts: 4634 case Intrinsic::arm_neon_vshiftu: 4635 // Opcode already set above. 4636 break; 4637 case Intrinsic::arm_neon_vshiftls: 4638 case Intrinsic::arm_neon_vshiftlu: 4639 if (Cnt == VT.getVectorElementType().getSizeInBits()) 4640 VShiftOpc = ARMISD::VSHLLi; 4641 else 4642 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 4643 ARMISD::VSHLLs : ARMISD::VSHLLu); 4644 break; 4645 case Intrinsic::arm_neon_vshiftn: 4646 VShiftOpc = ARMISD::VSHRN; break; 4647 case Intrinsic::arm_neon_vrshifts: 4648 VShiftOpc = ARMISD::VRSHRs; break; 4649 case Intrinsic::arm_neon_vrshiftu: 4650 VShiftOpc = ARMISD::VRSHRu; break; 4651 case Intrinsic::arm_neon_vrshiftn: 4652 VShiftOpc = ARMISD::VRSHRN; break; 4653 case Intrinsic::arm_neon_vqshifts: 4654 VShiftOpc = ARMISD::VQSHLs; break; 4655 case Intrinsic::arm_neon_vqshiftu: 4656 VShiftOpc = ARMISD::VQSHLu; break; 4657 case Intrinsic::arm_neon_vqshiftsu: 4658 VShiftOpc = ARMISD::VQSHLsu; break; 4659 case Intrinsic::arm_neon_vqshiftns: 4660 VShiftOpc = ARMISD::VQSHRNs; break; 4661 case Intrinsic::arm_neon_vqshiftnu: 4662 VShiftOpc = ARMISD::VQSHRNu; break; 4663 case Intrinsic::arm_neon_vqshiftnsu: 4664 VShiftOpc = ARMISD::VQSHRNsu; break; 4665 case Intrinsic::arm_neon_vqrshiftns: 4666 VShiftOpc = ARMISD::VQRSHRNs; break; 4667 case Intrinsic::arm_neon_vqrshiftnu: 4668 VShiftOpc = ARMISD::VQRSHRNu; break; 4669 case Intrinsic::arm_neon_vqrshiftnsu: 4670 VShiftOpc = ARMISD::VQRSHRNsu; break; 4671 } 4672 4673 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4674 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 4675 } 4676 4677 case Intrinsic::arm_neon_vshiftins: { 4678 EVT VT = N->getOperand(1).getValueType(); 4679 int64_t Cnt; 4680 unsigned VShiftOpc = 0; 4681 4682 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 4683 VShiftOpc = ARMISD::VSLI; 4684 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 4685 VShiftOpc = ARMISD::VSRI; 4686 else { 4687 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 4688 } 4689 4690 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4691 N->getOperand(1), N->getOperand(2), 4692 DAG.getConstant(Cnt, MVT::i32)); 4693 } 4694 4695 case Intrinsic::arm_neon_vqrshifts: 4696 case Intrinsic::arm_neon_vqrshiftu: 4697 // No immediate versions of these to check for. 4698 break; 4699 } 4700 4701 return SDValue(); 4702} 4703 4704/// PerformShiftCombine - Checks for immediate versions of vector shifts and 4705/// lowers them. As with the vector shift intrinsics, this is done during DAG 4706/// combining instead of DAG legalizing because the build_vectors for 64-bit 4707/// vector element shift counts are generally not legal, and it is hard to see 4708/// their values after they get legalized to loads from a constant pool. 4709static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 4710 const ARMSubtarget *ST) { 4711 EVT VT = N->getValueType(0); 4712 4713 // Nothing to be done for scalar shifts. 4714 if (! VT.isVector()) 4715 return SDValue(); 4716 4717 assert(ST->hasNEON() && "unexpected vector shift"); 4718 int64_t Cnt; 4719 4720 switch (N->getOpcode()) { 4721 default: llvm_unreachable("unexpected shift opcode"); 4722 4723 case ISD::SHL: 4724 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 4725 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 4726 DAG.getConstant(Cnt, MVT::i32)); 4727 break; 4728 4729 case ISD::SRA: 4730 case ISD::SRL: 4731 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 4732 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 4733 ARMISD::VSHRs : ARMISD::VSHRu); 4734 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 4735 DAG.getConstant(Cnt, MVT::i32)); 4736 } 4737 } 4738 return SDValue(); 4739} 4740 4741/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 4742/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 4743static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 4744 const ARMSubtarget *ST) { 4745 SDValue N0 = N->getOperand(0); 4746 4747 // Check for sign- and zero-extensions of vector extract operations of 8- 4748 // and 16-bit vector elements. NEON supports these directly. They are 4749 // handled during DAG combining because type legalization will promote them 4750 // to 32-bit types and it is messy to recognize the operations after that. 4751 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 4752 SDValue Vec = N0.getOperand(0); 4753 SDValue Lane = N0.getOperand(1); 4754 EVT VT = N->getValueType(0); 4755 EVT EltVT = N0.getValueType(); 4756 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4757 4758 if (VT == MVT::i32 && 4759 (EltVT == MVT::i8 || EltVT == MVT::i16) && 4760 TLI.isTypeLegal(Vec.getValueType())) { 4761 4762 unsigned Opc = 0; 4763 switch (N->getOpcode()) { 4764 default: llvm_unreachable("unexpected opcode"); 4765 case ISD::SIGN_EXTEND: 4766 Opc = ARMISD::VGETLANEs; 4767 break; 4768 case ISD::ZERO_EXTEND: 4769 case ISD::ANY_EXTEND: 4770 Opc = ARMISD::VGETLANEu; 4771 break; 4772 } 4773 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 4774 } 4775 } 4776 4777 return SDValue(); 4778} 4779 4780/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 4781/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 4782static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 4783 const ARMSubtarget *ST) { 4784 // If the target supports NEON, try to use vmax/vmin instructions for f32 4785 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 4786 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 4787 // a NaN; only do the transformation when it matches that behavior. 4788 4789 // For now only do this when using NEON for FP operations; if using VFP, it 4790 // is not obvious that the benefit outweighs the cost of switching to the 4791 // NEON pipeline. 4792 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 4793 N->getValueType(0) != MVT::f32) 4794 return SDValue(); 4795 4796 SDValue CondLHS = N->getOperand(0); 4797 SDValue CondRHS = N->getOperand(1); 4798 SDValue LHS = N->getOperand(2); 4799 SDValue RHS = N->getOperand(3); 4800 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4801 4802 unsigned Opcode = 0; 4803 bool IsReversed; 4804 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 4805 IsReversed = false; // x CC y ? x : y 4806 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 4807 IsReversed = true ; // x CC y ? y : x 4808 } else { 4809 return SDValue(); 4810 } 4811 4812 bool IsUnordered; 4813 switch (CC) { 4814 default: break; 4815 case ISD::SETOLT: 4816 case ISD::SETOLE: 4817 case ISD::SETLT: 4818 case ISD::SETLE: 4819 case ISD::SETULT: 4820 case ISD::SETULE: 4821 // If LHS is NaN, an ordered comparison will be false and the result will 4822 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 4823 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4824 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 4825 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4826 break; 4827 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 4828 // will return -0, so vmin can only be used for unsafe math or if one of 4829 // the operands is known to be nonzero. 4830 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 4831 !UnsafeFPMath && 4832 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4833 break; 4834 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 4835 break; 4836 4837 case ISD::SETOGT: 4838 case ISD::SETOGE: 4839 case ISD::SETGT: 4840 case ISD::SETGE: 4841 case ISD::SETUGT: 4842 case ISD::SETUGE: 4843 // If LHS is NaN, an ordered comparison will be false and the result will 4844 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 4845 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4846 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 4847 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4848 break; 4849 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 4850 // will return +0, so vmax can only be used for unsafe math or if one of 4851 // the operands is known to be nonzero. 4852 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 4853 !UnsafeFPMath && 4854 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4855 break; 4856 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 4857 break; 4858 } 4859 4860 if (!Opcode) 4861 return SDValue(); 4862 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 4863} 4864 4865SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 4866 DAGCombinerInfo &DCI) const { 4867 switch (N->getOpcode()) { 4868 default: break; 4869 case ISD::ADD: return PerformADDCombine(N, DCI); 4870 case ISD::SUB: return PerformSUBCombine(N, DCI); 4871 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 4872 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 4873 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 4874 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 4875 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 4876 case ISD::SHL: 4877 case ISD::SRA: 4878 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 4879 case ISD::SIGN_EXTEND: 4880 case ISD::ZERO_EXTEND: 4881 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 4882 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 4883 } 4884 return SDValue(); 4885} 4886 4887bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 4888 if (!Subtarget->hasV6Ops()) 4889 // Pre-v6 does not support unaligned mem access. 4890 return false; 4891 4892 // v6+ may or may not support unaligned mem access depending on the system 4893 // configuration. 4894 // FIXME: This is pretty conservative. Should we provide cmdline option to 4895 // control the behaviour? 4896 if (!Subtarget->isTargetDarwin()) 4897 return false; 4898 4899 switch (VT.getSimpleVT().SimpleTy) { 4900 default: 4901 return false; 4902 case MVT::i8: 4903 case MVT::i16: 4904 case MVT::i32: 4905 return true; 4906 // FIXME: VLD1 etc with standard alignment is legal. 4907 } 4908} 4909 4910static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 4911 if (V < 0) 4912 return false; 4913 4914 unsigned Scale = 1; 4915 switch (VT.getSimpleVT().SimpleTy) { 4916 default: return false; 4917 case MVT::i1: 4918 case MVT::i8: 4919 // Scale == 1; 4920 break; 4921 case MVT::i16: 4922 // Scale == 2; 4923 Scale = 2; 4924 break; 4925 case MVT::i32: 4926 // Scale == 4; 4927 Scale = 4; 4928 break; 4929 } 4930 4931 if ((V & (Scale - 1)) != 0) 4932 return false; 4933 V /= Scale; 4934 return V == (V & ((1LL << 5) - 1)); 4935} 4936 4937static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 4938 const ARMSubtarget *Subtarget) { 4939 bool isNeg = false; 4940 if (V < 0) { 4941 isNeg = true; 4942 V = - V; 4943 } 4944 4945 switch (VT.getSimpleVT().SimpleTy) { 4946 default: return false; 4947 case MVT::i1: 4948 case MVT::i8: 4949 case MVT::i16: 4950 case MVT::i32: 4951 // + imm12 or - imm8 4952 if (isNeg) 4953 return V == (V & ((1LL << 8) - 1)); 4954 return V == (V & ((1LL << 12) - 1)); 4955 case MVT::f32: 4956 case MVT::f64: 4957 // Same as ARM mode. FIXME: NEON? 4958 if (!Subtarget->hasVFP2()) 4959 return false; 4960 if ((V & 3) != 0) 4961 return false; 4962 V >>= 2; 4963 return V == (V & ((1LL << 8) - 1)); 4964 } 4965} 4966 4967/// isLegalAddressImmediate - Return true if the integer value can be used 4968/// as the offset of the target addressing mode for load / store of the 4969/// given type. 4970static bool isLegalAddressImmediate(int64_t V, EVT VT, 4971 const ARMSubtarget *Subtarget) { 4972 if (V == 0) 4973 return true; 4974 4975 if (!VT.isSimple()) 4976 return false; 4977 4978 if (Subtarget->isThumb1Only()) 4979 return isLegalT1AddressImmediate(V, VT); 4980 else if (Subtarget->isThumb2()) 4981 return isLegalT2AddressImmediate(V, VT, Subtarget); 4982 4983 // ARM mode. 4984 if (V < 0) 4985 V = - V; 4986 switch (VT.getSimpleVT().SimpleTy) { 4987 default: return false; 4988 case MVT::i1: 4989 case MVT::i8: 4990 case MVT::i32: 4991 // +- imm12 4992 return V == (V & ((1LL << 12) - 1)); 4993 case MVT::i16: 4994 // +- imm8 4995 return V == (V & ((1LL << 8) - 1)); 4996 case MVT::f32: 4997 case MVT::f64: 4998 if (!Subtarget->hasVFP2()) // FIXME: NEON? 4999 return false; 5000 if ((V & 3) != 0) 5001 return false; 5002 V >>= 2; 5003 return V == (V & ((1LL << 8) - 1)); 5004 } 5005} 5006 5007bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 5008 EVT VT) const { 5009 int Scale = AM.Scale; 5010 if (Scale < 0) 5011 return false; 5012 5013 switch (VT.getSimpleVT().SimpleTy) { 5014 default: return false; 5015 case MVT::i1: 5016 case MVT::i8: 5017 case MVT::i16: 5018 case MVT::i32: 5019 if (Scale == 1) 5020 return true; 5021 // r + r << imm 5022 Scale = Scale & ~1; 5023 return Scale == 2 || Scale == 4 || Scale == 8; 5024 case MVT::i64: 5025 // r + r 5026 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 5027 return true; 5028 return false; 5029 case MVT::isVoid: 5030 // Note, we allow "void" uses (basically, uses that aren't loads or 5031 // stores), because arm allows folding a scale into many arithmetic 5032 // operations. This should be made more precise and revisited later. 5033 5034 // Allow r << imm, but the imm has to be a multiple of two. 5035 if (Scale & 1) return false; 5036 return isPowerOf2_32(Scale); 5037 } 5038} 5039 5040/// isLegalAddressingMode - Return true if the addressing mode represented 5041/// by AM is legal for this target, for a load/store of the specified type. 5042bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 5043 const Type *Ty) const { 5044 EVT VT = getValueType(Ty, true); 5045 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 5046 return false; 5047 5048 // Can never fold addr of global into load/store. 5049 if (AM.BaseGV) 5050 return false; 5051 5052 switch (AM.Scale) { 5053 case 0: // no scale reg, must be "r+i" or "r", or "i". 5054 break; 5055 case 1: 5056 if (Subtarget->isThumb1Only()) 5057 return false; 5058 // FALL THROUGH. 5059 default: 5060 // ARM doesn't support any R+R*scale+imm addr modes. 5061 if (AM.BaseOffs) 5062 return false; 5063 5064 if (!VT.isSimple()) 5065 return false; 5066 5067 if (Subtarget->isThumb2()) 5068 return isLegalT2ScaledAddressingMode(AM, VT); 5069 5070 int Scale = AM.Scale; 5071 switch (VT.getSimpleVT().SimpleTy) { 5072 default: return false; 5073 case MVT::i1: 5074 case MVT::i8: 5075 case MVT::i32: 5076 if (Scale < 0) Scale = -Scale; 5077 if (Scale == 1) 5078 return true; 5079 // r + r << imm 5080 return isPowerOf2_32(Scale & ~1); 5081 case MVT::i16: 5082 case MVT::i64: 5083 // r + r 5084 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 5085 return true; 5086 return false; 5087 5088 case MVT::isVoid: 5089 // Note, we allow "void" uses (basically, uses that aren't loads or 5090 // stores), because arm allows folding a scale into many arithmetic 5091 // operations. This should be made more precise and revisited later. 5092 5093 // Allow r << imm, but the imm has to be a multiple of two. 5094 if (Scale & 1) return false; 5095 return isPowerOf2_32(Scale); 5096 } 5097 break; 5098 } 5099 return true; 5100} 5101 5102/// isLegalICmpImmediate - Return true if the specified immediate is legal 5103/// icmp immediate, that is the target has icmp instructions which can compare 5104/// a register against the immediate without having to materialize the 5105/// immediate into a register. 5106bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 5107 if (!Subtarget->isThumb()) 5108 return ARM_AM::getSOImmVal(Imm) != -1; 5109 if (Subtarget->isThumb2()) 5110 return ARM_AM::getT2SOImmVal(Imm) != -1; 5111 return Imm >= 0 && Imm <= 255; 5112} 5113 5114static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 5115 bool isSEXTLoad, SDValue &Base, 5116 SDValue &Offset, bool &isInc, 5117 SelectionDAG &DAG) { 5118 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5119 return false; 5120 5121 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 5122 // AddressingMode 3 5123 Base = Ptr->getOperand(0); 5124 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5125 int RHSC = (int)RHS->getZExtValue(); 5126 if (RHSC < 0 && RHSC > -256) { 5127 assert(Ptr->getOpcode() == ISD::ADD); 5128 isInc = false; 5129 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5130 return true; 5131 } 5132 } 5133 isInc = (Ptr->getOpcode() == ISD::ADD); 5134 Offset = Ptr->getOperand(1); 5135 return true; 5136 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 5137 // AddressingMode 2 5138 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5139 int RHSC = (int)RHS->getZExtValue(); 5140 if (RHSC < 0 && RHSC > -0x1000) { 5141 assert(Ptr->getOpcode() == ISD::ADD); 5142 isInc = false; 5143 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5144 Base = Ptr->getOperand(0); 5145 return true; 5146 } 5147 } 5148 5149 if (Ptr->getOpcode() == ISD::ADD) { 5150 isInc = true; 5151 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 5152 if (ShOpcVal != ARM_AM::no_shift) { 5153 Base = Ptr->getOperand(1); 5154 Offset = Ptr->getOperand(0); 5155 } else { 5156 Base = Ptr->getOperand(0); 5157 Offset = Ptr->getOperand(1); 5158 } 5159 return true; 5160 } 5161 5162 isInc = (Ptr->getOpcode() == ISD::ADD); 5163 Base = Ptr->getOperand(0); 5164 Offset = Ptr->getOperand(1); 5165 return true; 5166 } 5167 5168 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 5169 return false; 5170} 5171 5172static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 5173 bool isSEXTLoad, SDValue &Base, 5174 SDValue &Offset, bool &isInc, 5175 SelectionDAG &DAG) { 5176 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5177 return false; 5178 5179 Base = Ptr->getOperand(0); 5180 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5181 int RHSC = (int)RHS->getZExtValue(); 5182 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 5183 assert(Ptr->getOpcode() == ISD::ADD); 5184 isInc = false; 5185 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5186 return true; 5187 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 5188 isInc = Ptr->getOpcode() == ISD::ADD; 5189 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 5190 return true; 5191 } 5192 } 5193 5194 return false; 5195} 5196 5197/// getPreIndexedAddressParts - returns true by value, base pointer and 5198/// offset pointer and addressing mode by reference if the node's address 5199/// can be legally represented as pre-indexed load / store address. 5200bool 5201ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 5202 SDValue &Offset, 5203 ISD::MemIndexedMode &AM, 5204 SelectionDAG &DAG) const { 5205 if (Subtarget->isThumb1Only()) 5206 return false; 5207 5208 EVT VT; 5209 SDValue Ptr; 5210 bool isSEXTLoad = false; 5211 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5212 Ptr = LD->getBasePtr(); 5213 VT = LD->getMemoryVT(); 5214 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5215 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5216 Ptr = ST->getBasePtr(); 5217 VT = ST->getMemoryVT(); 5218 } else 5219 return false; 5220 5221 bool isInc; 5222 bool isLegal = false; 5223 if (Subtarget->isThumb2()) 5224 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5225 Offset, isInc, DAG); 5226 else 5227 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5228 Offset, isInc, DAG); 5229 if (!isLegal) 5230 return false; 5231 5232 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 5233 return true; 5234} 5235 5236/// getPostIndexedAddressParts - returns true by value, base pointer and 5237/// offset pointer and addressing mode by reference if this node can be 5238/// combined with a load / store to form a post-indexed load / store. 5239bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 5240 SDValue &Base, 5241 SDValue &Offset, 5242 ISD::MemIndexedMode &AM, 5243 SelectionDAG &DAG) const { 5244 if (Subtarget->isThumb1Only()) 5245 return false; 5246 5247 EVT VT; 5248 SDValue Ptr; 5249 bool isSEXTLoad = false; 5250 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5251 VT = LD->getMemoryVT(); 5252 Ptr = LD->getBasePtr(); 5253 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5254 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5255 VT = ST->getMemoryVT(); 5256 Ptr = ST->getBasePtr(); 5257 } else 5258 return false; 5259 5260 bool isInc; 5261 bool isLegal = false; 5262 if (Subtarget->isThumb2()) 5263 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5264 isInc, DAG); 5265 else 5266 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5267 isInc, DAG); 5268 if (!isLegal) 5269 return false; 5270 5271 if (Ptr != Base) { 5272 // Swap base ptr and offset to catch more post-index load / store when 5273 // it's legal. In Thumb2 mode, offset must be an immediate. 5274 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 5275 !Subtarget->isThumb2()) 5276 std::swap(Base, Offset); 5277 5278 // Post-indexed load / store update the base pointer. 5279 if (Ptr != Base) 5280 return false; 5281 } 5282 5283 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 5284 return true; 5285} 5286 5287void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 5288 const APInt &Mask, 5289 APInt &KnownZero, 5290 APInt &KnownOne, 5291 const SelectionDAG &DAG, 5292 unsigned Depth) const { 5293 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 5294 switch (Op.getOpcode()) { 5295 default: break; 5296 case ARMISD::CMOV: { 5297 // Bits are known zero/one if known on the LHS and RHS. 5298 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 5299 if (KnownZero == 0 && KnownOne == 0) return; 5300 5301 APInt KnownZeroRHS, KnownOneRHS; 5302 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 5303 KnownZeroRHS, KnownOneRHS, Depth+1); 5304 KnownZero &= KnownZeroRHS; 5305 KnownOne &= KnownOneRHS; 5306 return; 5307 } 5308 } 5309} 5310 5311//===----------------------------------------------------------------------===// 5312// ARM Inline Assembly Support 5313//===----------------------------------------------------------------------===// 5314 5315/// getConstraintType - Given a constraint letter, return the type of 5316/// constraint it is for this target. 5317ARMTargetLowering::ConstraintType 5318ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 5319 if (Constraint.size() == 1) { 5320 switch (Constraint[0]) { 5321 default: break; 5322 case 'l': return C_RegisterClass; 5323 case 'w': return C_RegisterClass; 5324 } 5325 } 5326 return TargetLowering::getConstraintType(Constraint); 5327} 5328 5329std::pair<unsigned, const TargetRegisterClass*> 5330ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5331 EVT VT) const { 5332 if (Constraint.size() == 1) { 5333 // GCC ARM Constraint Letters 5334 switch (Constraint[0]) { 5335 case 'l': 5336 if (Subtarget->isThumb()) 5337 return std::make_pair(0U, ARM::tGPRRegisterClass); 5338 else 5339 return std::make_pair(0U, ARM::GPRRegisterClass); 5340 case 'r': 5341 return std::make_pair(0U, ARM::GPRRegisterClass); 5342 case 'w': 5343 if (VT == MVT::f32) 5344 return std::make_pair(0U, ARM::SPRRegisterClass); 5345 if (VT.getSizeInBits() == 64) 5346 return std::make_pair(0U, ARM::DPRRegisterClass); 5347 if (VT.getSizeInBits() == 128) 5348 return std::make_pair(0U, ARM::QPRRegisterClass); 5349 break; 5350 } 5351 } 5352 if (StringRef("{cc}").equals_lower(Constraint)) 5353 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 5354 5355 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5356} 5357 5358std::vector<unsigned> ARMTargetLowering:: 5359getRegClassForInlineAsmConstraint(const std::string &Constraint, 5360 EVT VT) const { 5361 if (Constraint.size() != 1) 5362 return std::vector<unsigned>(); 5363 5364 switch (Constraint[0]) { // GCC ARM Constraint Letters 5365 default: break; 5366 case 'l': 5367 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5368 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5369 0); 5370 case 'r': 5371 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5372 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5373 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 5374 ARM::R12, ARM::LR, 0); 5375 case 'w': 5376 if (VT == MVT::f32) 5377 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 5378 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 5379 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 5380 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 5381 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 5382 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 5383 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 5384 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 5385 if (VT.getSizeInBits() == 64) 5386 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 5387 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 5388 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 5389 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 5390 if (VT.getSizeInBits() == 128) 5391 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 5392 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 5393 break; 5394 } 5395 5396 return std::vector<unsigned>(); 5397} 5398 5399/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5400/// vector. If it is invalid, don't add anything to Ops. 5401void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 5402 char Constraint, 5403 std::vector<SDValue>&Ops, 5404 SelectionDAG &DAG) const { 5405 SDValue Result(0, 0); 5406 5407 switch (Constraint) { 5408 default: break; 5409 case 'I': case 'J': case 'K': case 'L': 5410 case 'M': case 'N': case 'O': 5411 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 5412 if (!C) 5413 return; 5414 5415 int64_t CVal64 = C->getSExtValue(); 5416 int CVal = (int) CVal64; 5417 // None of these constraints allow values larger than 32 bits. Check 5418 // that the value fits in an int. 5419 if (CVal != CVal64) 5420 return; 5421 5422 switch (Constraint) { 5423 case 'I': 5424 if (Subtarget->isThumb1Only()) { 5425 // This must be a constant between 0 and 255, for ADD 5426 // immediates. 5427 if (CVal >= 0 && CVal <= 255) 5428 break; 5429 } else if (Subtarget->isThumb2()) { 5430 // A constant that can be used as an immediate value in a 5431 // data-processing instruction. 5432 if (ARM_AM::getT2SOImmVal(CVal) != -1) 5433 break; 5434 } else { 5435 // A constant that can be used as an immediate value in a 5436 // data-processing instruction. 5437 if (ARM_AM::getSOImmVal(CVal) != -1) 5438 break; 5439 } 5440 return; 5441 5442 case 'J': 5443 if (Subtarget->isThumb()) { // FIXME thumb2 5444 // This must be a constant between -255 and -1, for negated ADD 5445 // immediates. This can be used in GCC with an "n" modifier that 5446 // prints the negated value, for use with SUB instructions. It is 5447 // not useful otherwise but is implemented for compatibility. 5448 if (CVal >= -255 && CVal <= -1) 5449 break; 5450 } else { 5451 // This must be a constant between -4095 and 4095. It is not clear 5452 // what this constraint is intended for. Implemented for 5453 // compatibility with GCC. 5454 if (CVal >= -4095 && CVal <= 4095) 5455 break; 5456 } 5457 return; 5458 5459 case 'K': 5460 if (Subtarget->isThumb1Only()) { 5461 // A 32-bit value where only one byte has a nonzero value. Exclude 5462 // zero to match GCC. This constraint is used by GCC internally for 5463 // constants that can be loaded with a move/shift combination. 5464 // It is not useful otherwise but is implemented for compatibility. 5465 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 5466 break; 5467 } else if (Subtarget->isThumb2()) { 5468 // A constant whose bitwise inverse can be used as an immediate 5469 // value in a data-processing instruction. This can be used in GCC 5470 // with a "B" modifier that prints the inverted value, for use with 5471 // BIC and MVN instructions. It is not useful otherwise but is 5472 // implemented for compatibility. 5473 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 5474 break; 5475 } else { 5476 // A constant whose bitwise inverse can be used as an immediate 5477 // value in a data-processing instruction. This can be used in GCC 5478 // with a "B" modifier that prints the inverted value, for use with 5479 // BIC and MVN instructions. It is not useful otherwise but is 5480 // implemented for compatibility. 5481 if (ARM_AM::getSOImmVal(~CVal) != -1) 5482 break; 5483 } 5484 return; 5485 5486 case 'L': 5487 if (Subtarget->isThumb1Only()) { 5488 // This must be a constant between -7 and 7, 5489 // for 3-operand ADD/SUB immediate instructions. 5490 if (CVal >= -7 && CVal < 7) 5491 break; 5492 } else if (Subtarget->isThumb2()) { 5493 // A constant whose negation can be used as an immediate value in a 5494 // data-processing instruction. This can be used in GCC with an "n" 5495 // modifier that prints the negated value, for use with SUB 5496 // instructions. It is not useful otherwise but is implemented for 5497 // compatibility. 5498 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 5499 break; 5500 } else { 5501 // A constant whose negation can be used as an immediate value in a 5502 // data-processing instruction. This can be used in GCC with an "n" 5503 // modifier that prints the negated value, for use with SUB 5504 // instructions. It is not useful otherwise but is implemented for 5505 // compatibility. 5506 if (ARM_AM::getSOImmVal(-CVal) != -1) 5507 break; 5508 } 5509 return; 5510 5511 case 'M': 5512 if (Subtarget->isThumb()) { // FIXME thumb2 5513 // This must be a multiple of 4 between 0 and 1020, for 5514 // ADD sp + immediate. 5515 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 5516 break; 5517 } else { 5518 // A power of two or a constant between 0 and 32. This is used in 5519 // GCC for the shift amount on shifted register operands, but it is 5520 // useful in general for any shift amounts. 5521 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 5522 break; 5523 } 5524 return; 5525 5526 case 'N': 5527 if (Subtarget->isThumb()) { // FIXME thumb2 5528 // This must be a constant between 0 and 31, for shift amounts. 5529 if (CVal >= 0 && CVal <= 31) 5530 break; 5531 } 5532 return; 5533 5534 case 'O': 5535 if (Subtarget->isThumb()) { // FIXME thumb2 5536 // This must be a multiple of 4 between -508 and 508, for 5537 // ADD/SUB sp = sp + immediate. 5538 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 5539 break; 5540 } 5541 return; 5542 } 5543 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 5544 break; 5545 } 5546 5547 if (Result.getNode()) { 5548 Ops.push_back(Result); 5549 return; 5550 } 5551 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5552} 5553 5554bool 5555ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 5556 // The ARM target isn't yet aware of offsets. 5557 return false; 5558} 5559 5560int ARM::getVFPf32Imm(const APFloat &FPImm) { 5561 APInt Imm = FPImm.bitcastToAPInt(); 5562 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 5563 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 5564 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 5565 5566 // We can handle 4 bits of mantissa. 5567 // mantissa = (16+UInt(e:f:g:h))/16. 5568 if (Mantissa & 0x7ffff) 5569 return -1; 5570 Mantissa >>= 19; 5571 if ((Mantissa & 0xf) != Mantissa) 5572 return -1; 5573 5574 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5575 if (Exp < -3 || Exp > 4) 5576 return -1; 5577 Exp = ((Exp+3) & 0x7) ^ 4; 5578 5579 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5580} 5581 5582int ARM::getVFPf64Imm(const APFloat &FPImm) { 5583 APInt Imm = FPImm.bitcastToAPInt(); 5584 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 5585 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 5586 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 5587 5588 // We can handle 4 bits of mantissa. 5589 // mantissa = (16+UInt(e:f:g:h))/16. 5590 if (Mantissa & 0xffffffffffffLL) 5591 return -1; 5592 Mantissa >>= 48; 5593 if ((Mantissa & 0xf) != Mantissa) 5594 return -1; 5595 5596 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5597 if (Exp < -3 || Exp > 4) 5598 return -1; 5599 Exp = ((Exp+3) & 0x7) ^ 4; 5600 5601 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5602} 5603 5604bool ARM::isBitFieldInvertedMask(unsigned v) { 5605 if (v == 0xffffffff) 5606 return 0; 5607 // there can be 1's on either or both "outsides", all the "inside" 5608 // bits must be 0's 5609 unsigned int lsb = 0, msb = 31; 5610 while (v & (1 << msb)) --msb; 5611 while (v & (1 << lsb)) ++lsb; 5612 for (unsigned int i = lsb; i <= msb; ++i) { 5613 if (v & (1 << i)) 5614 return 0; 5615 } 5616 return 1; 5617} 5618 5619/// isFPImmLegal - Returns true if the target can instruction select the 5620/// specified FP immediate natively. If false, the legalizer will 5621/// materialize the FP immediate as a load from a constant pool. 5622bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 5623 if (!Subtarget->hasVFP3()) 5624 return false; 5625 if (VT == MVT::f32) 5626 return ARM::getVFPf32Imm(Imm) != -1; 5627 if (VT == MVT::f64) 5628 return ARM::getVFPf64Imm(Imm) != -1; 5629 return false; 5630} 5631