ARMISelLowering.cpp revision c2723a57f35dd69bd261faaa71ee7aa05f40a87d
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMConstantPoolValue.h" 19#include "ARMISelLowering.h" 20#include "ARMMachineFunctionInfo.h" 21#include "ARMPerfectShuffle.h" 22#include "ARMRegisterInfo.h" 23#include "ARMSubtarget.h" 24#include "ARMTargetMachine.h" 25#include "ARMTargetObjectFile.h" 26#include "llvm/CallingConv.h" 27#include "llvm/Constants.h" 28#include "llvm/Function.h" 29#include "llvm/GlobalValue.h" 30#include "llvm/Instruction.h" 31#include "llvm/Intrinsics.h" 32#include "llvm/Type.h" 33#include "llvm/CodeGen/CallingConvLower.h" 34#include "llvm/CodeGen/MachineBasicBlock.h" 35#include "llvm/CodeGen/MachineFrameInfo.h" 36#include "llvm/CodeGen/MachineFunction.h" 37#include "llvm/CodeGen/MachineInstrBuilder.h" 38#include "llvm/CodeGen/MachineRegisterInfo.h" 39#include "llvm/CodeGen/PseudoSourceValue.h" 40#include "llvm/CodeGen/SelectionDAG.h" 41#include "llvm/MC/MCSectionMachO.h" 42#include "llvm/Target/TargetOptions.h" 43#include "llvm/ADT/VectorExtras.h" 44#include "llvm/ADT/Statistic.h" 45#include "llvm/Support/CommandLine.h" 46#include "llvm/Support/ErrorHandling.h" 47#include "llvm/Support/MathExtras.h" 48#include "llvm/Support/raw_ostream.h" 49#include <sstream> 50using namespace llvm; 51 52STATISTIC(NumTailCalls, "Number of tail calls"); 53 54// This option should go away when tail calls fully work. 55static cl::opt<bool> 56EnableARMTailCalls("arm-tail-calls", cl::Hidden, 57 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 58 cl::init(true)); 59 60static cl::opt<bool> 61EnableARMLongCalls("arm-long-calls", cl::Hidden, 62 cl::desc("Generate calls via indirect call instructions"), 63 cl::init(false)); 64 65static cl::opt<bool> 66ARMInterworking("arm-interworking", cl::Hidden, 67 cl::desc("Enable / disable ARM interworking (for debugging only)"), 68 cl::init(true)); 69 70static cl::opt<bool> 71EnableARMCodePlacement("arm-code-placement", cl::Hidden, 72 cl::desc("Enable code placement pass for ARM"), 73 cl::init(false)); 74 75static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 76 CCValAssign::LocInfo &LocInfo, 77 ISD::ArgFlagsTy &ArgFlags, 78 CCState &State); 79static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 80 CCValAssign::LocInfo &LocInfo, 81 ISD::ArgFlagsTy &ArgFlags, 82 CCState &State); 83static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 84 CCValAssign::LocInfo &LocInfo, 85 ISD::ArgFlagsTy &ArgFlags, 86 CCState &State); 87static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 88 CCValAssign::LocInfo &LocInfo, 89 ISD::ArgFlagsTy &ArgFlags, 90 CCState &State); 91 92void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 93 EVT PromotedBitwiseVT) { 94 if (VT != PromotedLdStVT) { 95 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 96 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 97 PromotedLdStVT.getSimpleVT()); 98 99 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 100 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 101 PromotedLdStVT.getSimpleVT()); 102 } 103 104 EVT ElemTy = VT.getVectorElementType(); 105 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 106 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 107 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 108 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 109 if (ElemTy != MVT::i32) { 110 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 111 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 112 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 113 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 114 } 115 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 116 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 117 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 118 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 119 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 120 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 121 if (VT.isInteger()) { 122 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 123 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 124 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 125 } 126 127 // Promote all bit-wise operations. 128 if (VT.isInteger() && VT != PromotedBitwiseVT) { 129 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 130 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 131 PromotedBitwiseVT.getSimpleVT()); 132 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 133 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 134 PromotedBitwiseVT.getSimpleVT()); 135 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 136 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 137 PromotedBitwiseVT.getSimpleVT()); 138 } 139 140 // Neon does not support vector divide/remainder operations. 141 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 142 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 143 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 144 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 145 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 146 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 147} 148 149void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 150 addRegisterClass(VT, ARM::DPRRegisterClass); 151 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 152} 153 154void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 155 addRegisterClass(VT, ARM::QPRRegisterClass); 156 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 157} 158 159static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 160 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 161 return new TargetLoweringObjectFileMachO(); 162 163 return new ARMElfTargetObjectFile(); 164} 165 166ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 167 : TargetLowering(TM, createTLOF(TM)) { 168 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 169 RegInfo = TM.getRegisterInfo(); 170 171 if (Subtarget->isTargetDarwin()) { 172 // Uses VFP for Thumb libfuncs if available. 173 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 174 // Single-precision floating-point arithmetic. 175 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 176 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 177 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 178 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 179 180 // Double-precision floating-point arithmetic. 181 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 182 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 183 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 184 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 185 186 // Single-precision comparisons. 187 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 188 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 189 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 190 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 191 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 192 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 193 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 194 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 195 196 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 197 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 198 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 199 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 200 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 201 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 202 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 203 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 204 205 // Double-precision comparisons. 206 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 207 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 208 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 209 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 210 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 211 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 212 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 213 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 214 215 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 216 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 217 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 218 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 219 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 220 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 221 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 222 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 223 224 // Floating-point to integer conversions. 225 // i64 conversions are done via library routines even when generating VFP 226 // instructions, so use the same ones. 227 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 228 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 229 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 230 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 231 232 // Conversions between floating types. 233 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 234 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 235 236 // Integer to floating-point conversions. 237 // i64 conversions are done via library routines even when generating VFP 238 // instructions, so use the same ones. 239 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 240 // e.g., __floatunsidf vs. __floatunssidfvfp. 241 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 242 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 243 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 244 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 245 } 246 } 247 248 // These libcalls are not available in 32-bit. 249 setLibcallName(RTLIB::SHL_I128, 0); 250 setLibcallName(RTLIB::SRL_I128, 0); 251 setLibcallName(RTLIB::SRA_I128, 0); 252 253 // Libcalls should use the AAPCS base standard ABI, even if hard float 254 // is in effect, as per the ARM RTABI specification, section 4.1.2. 255 if (Subtarget->isAAPCS_ABI()) { 256 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { 257 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), 258 CallingConv::ARM_AAPCS); 259 } 260 } 261 262 if (Subtarget->isThumb1Only()) 263 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 264 else 265 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 266 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 267 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 268 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 269 270 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 271 } 272 273 if (Subtarget->hasNEON()) { 274 addDRTypeForNEON(MVT::v2f32); 275 addDRTypeForNEON(MVT::v8i8); 276 addDRTypeForNEON(MVT::v4i16); 277 addDRTypeForNEON(MVT::v2i32); 278 addDRTypeForNEON(MVT::v1i64); 279 280 addQRTypeForNEON(MVT::v4f32); 281 addQRTypeForNEON(MVT::v2f64); 282 addQRTypeForNEON(MVT::v16i8); 283 addQRTypeForNEON(MVT::v8i16); 284 addQRTypeForNEON(MVT::v4i32); 285 addQRTypeForNEON(MVT::v2i64); 286 287 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 288 // neither Neon nor VFP support any arithmetic operations on it. 289 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 290 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 291 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 292 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 293 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 294 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 295 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 296 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 297 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 298 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 299 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 300 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 301 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 302 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 303 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 304 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 305 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 306 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 307 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 308 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 309 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 310 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 311 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 312 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 313 314 // Neon does not support some operations on v1i64 and v2i64 types. 315 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 316 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 317 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 318 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 319 320 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 321 setTargetDAGCombine(ISD::SHL); 322 setTargetDAGCombine(ISD::SRL); 323 setTargetDAGCombine(ISD::SRA); 324 setTargetDAGCombine(ISD::SIGN_EXTEND); 325 setTargetDAGCombine(ISD::ZERO_EXTEND); 326 setTargetDAGCombine(ISD::ANY_EXTEND); 327 setTargetDAGCombine(ISD::SELECT_CC); 328 } 329 330 computeRegisterProperties(); 331 332 // ARM does not have f32 extending load. 333 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 334 335 // ARM does not have i1 sign extending load. 336 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 337 338 // ARM supports all 4 flavors of integer indexed load / store. 339 if (!Subtarget->isThumb1Only()) { 340 for (unsigned im = (unsigned)ISD::PRE_INC; 341 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 342 setIndexedLoadAction(im, MVT::i1, Legal); 343 setIndexedLoadAction(im, MVT::i8, Legal); 344 setIndexedLoadAction(im, MVT::i16, Legal); 345 setIndexedLoadAction(im, MVT::i32, Legal); 346 setIndexedStoreAction(im, MVT::i1, Legal); 347 setIndexedStoreAction(im, MVT::i8, Legal); 348 setIndexedStoreAction(im, MVT::i16, Legal); 349 setIndexedStoreAction(im, MVT::i32, Legal); 350 } 351 } 352 353 // i64 operation support. 354 if (Subtarget->isThumb1Only()) { 355 setOperationAction(ISD::MUL, MVT::i64, Expand); 356 setOperationAction(ISD::MULHU, MVT::i32, Expand); 357 setOperationAction(ISD::MULHS, MVT::i32, Expand); 358 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 359 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 360 } else { 361 setOperationAction(ISD::MUL, MVT::i64, Expand); 362 setOperationAction(ISD::MULHU, MVT::i32, Expand); 363 if (!Subtarget->hasV6Ops()) 364 setOperationAction(ISD::MULHS, MVT::i32, Expand); 365 } 366 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 367 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 368 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 369 setOperationAction(ISD::SRL, MVT::i64, Custom); 370 setOperationAction(ISD::SRA, MVT::i64, Custom); 371 372 // ARM does not have ROTL. 373 setOperationAction(ISD::ROTL, MVT::i32, Expand); 374 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 375 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 376 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 377 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 378 379 // Only ARMv6 has BSWAP. 380 if (!Subtarget->hasV6Ops()) 381 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 382 383 // These are expanded into libcalls. 384 if (!Subtarget->hasDivide()) { 385 // v7M has a hardware divider 386 setOperationAction(ISD::SDIV, MVT::i32, Expand); 387 setOperationAction(ISD::UDIV, MVT::i32, Expand); 388 } 389 setOperationAction(ISD::SREM, MVT::i32, Expand); 390 setOperationAction(ISD::UREM, MVT::i32, Expand); 391 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 392 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 393 394 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 395 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 396 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 397 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 398 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 399 400 setOperationAction(ISD::TRAP, MVT::Other, Legal); 401 402 // Use the default implementation. 403 setOperationAction(ISD::VASTART, MVT::Other, Custom); 404 setOperationAction(ISD::VAARG, MVT::Other, Expand); 405 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 406 setOperationAction(ISD::VAEND, MVT::Other, Expand); 407 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 408 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 409 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 410 // FIXME: Shouldn't need this, since no register is used, but the legalizer 411 // doesn't yet know how to not do that for SjLj. 412 setExceptionSelectorRegister(ARM::R0); 413 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 414 // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise 415 // use the default expansion. 416 bool canHandleAtomics = 417 (Subtarget->hasV7Ops() || 418 (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); 419 if (canHandleAtomics) { 420 // membarrier needs custom lowering; the rest are legal and handled 421 // normally. 422 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 423 } else { 424 // Set them all for expansion, which will force libcalls. 425 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 426 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 427 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 428 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 429 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 430 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 431 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 432 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 433 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 434 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 435 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 436 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 437 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 438 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 439 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 440 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 441 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 442 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 443 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 444 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 445 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 446 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 447 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 448 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 449 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 450 // Since the libcalls include locking, fold in the fences 451 setShouldFoldAtomicFences(true); 452 } 453 // 64-bit versions are always libcalls (for now) 454 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 455 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 456 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 457 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 458 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 459 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 460 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 461 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 462 463 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 464 if (!Subtarget->hasV6Ops()) { 465 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 466 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 467 } 468 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 469 470 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) 471 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 472 // iff target supports vfp2. 473 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 474 475 // We want to custom lower some of our intrinsics. 476 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 477 if (Subtarget->isTargetDarwin()) { 478 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 479 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 480 } 481 482 setOperationAction(ISD::SETCC, MVT::i32, Expand); 483 setOperationAction(ISD::SETCC, MVT::f32, Expand); 484 setOperationAction(ISD::SETCC, MVT::f64, Expand); 485 setOperationAction(ISD::SELECT, MVT::i32, Expand); 486 setOperationAction(ISD::SELECT, MVT::f32, Expand); 487 setOperationAction(ISD::SELECT, MVT::f64, Expand); 488 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 489 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 490 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 491 492 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 493 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 494 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 495 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 496 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 497 498 // We don't support sin/cos/fmod/copysign/pow 499 setOperationAction(ISD::FSIN, MVT::f64, Expand); 500 setOperationAction(ISD::FSIN, MVT::f32, Expand); 501 setOperationAction(ISD::FCOS, MVT::f32, Expand); 502 setOperationAction(ISD::FCOS, MVT::f64, Expand); 503 setOperationAction(ISD::FREM, MVT::f64, Expand); 504 setOperationAction(ISD::FREM, MVT::f32, Expand); 505 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 506 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 507 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 508 } 509 setOperationAction(ISD::FPOW, MVT::f64, Expand); 510 setOperationAction(ISD::FPOW, MVT::f32, Expand); 511 512 // Various VFP goodness 513 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 514 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 515 if (Subtarget->hasVFP2()) { 516 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 517 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 518 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 519 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 520 } 521 // Special handling for half-precision FP. 522 if (!Subtarget->hasFP16()) { 523 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 524 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 525 } 526 } 527 528 // We have target-specific dag combine patterns for the following nodes: 529 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 530 setTargetDAGCombine(ISD::ADD); 531 setTargetDAGCombine(ISD::SUB); 532 setTargetDAGCombine(ISD::MUL); 533 534 if (Subtarget->hasV6T2Ops()) 535 setTargetDAGCombine(ISD::OR); 536 537 setStackPointerRegisterToSaveRestore(ARM::SP); 538 539 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 540 setSchedulingPreference(Sched::RegPressure); 541 else 542 setSchedulingPreference(Sched::Hybrid); 543 544 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 545 546 // On ARM arguments smaller than 4 bytes are extended, so all arguments 547 // are at least 4 bytes aligned. 548 setMinStackArgumentAlignment(4); 549 550 if (EnableARMCodePlacement) 551 benefitFromCodePlacementOpt = true; 552} 553 554std::pair<const TargetRegisterClass*, uint8_t> 555ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 556 const TargetRegisterClass *RRC = 0; 557 uint8_t Cost = 1; 558 switch (VT.getSimpleVT().SimpleTy) { 559 default: 560 return TargetLowering::findRepresentativeClass(VT); 561 // Use DPR as representative register class for all floating point 562 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 563 // the cost is 1 for both f32 and f64. 564 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 565 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 566 RRC = ARM::DPRRegisterClass; 567 break; 568 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 569 case MVT::v4f32: case MVT::v2f64: 570 RRC = ARM::DPRRegisterClass; 571 Cost = 2; 572 break; 573 case MVT::v4i64: 574 RRC = ARM::DPRRegisterClass; 575 Cost = 4; 576 break; 577 case MVT::v8i64: 578 RRC = ARM::DPRRegisterClass; 579 Cost = 8; 580 break; 581 } 582 return std::make_pair(RRC, Cost); 583} 584 585const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 586 switch (Opcode) { 587 default: return 0; 588 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 589 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 590 case ARMISD::CALL: return "ARMISD::CALL"; 591 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 592 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 593 case ARMISD::tCALL: return "ARMISD::tCALL"; 594 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 595 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 596 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 597 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 598 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 599 case ARMISD::CMP: return "ARMISD::CMP"; 600 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 601 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 602 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 603 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 604 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 605 case ARMISD::CMOV: return "ARMISD::CMOV"; 606 case ARMISD::CNEG: return "ARMISD::CNEG"; 607 608 case ARMISD::RBIT: return "ARMISD::RBIT"; 609 610 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 611 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 612 case ARMISD::SITOF: return "ARMISD::SITOF"; 613 case ARMISD::UITOF: return "ARMISD::UITOF"; 614 615 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 616 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 617 case ARMISD::RRX: return "ARMISD::RRX"; 618 619 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 620 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 621 622 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 623 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 624 625 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 626 627 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 628 629 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 630 631 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 632 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; 633 634 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 635 case ARMISD::VCGE: return "ARMISD::VCGE"; 636 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 637 case ARMISD::VCGT: return "ARMISD::VCGT"; 638 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 639 case ARMISD::VTST: return "ARMISD::VTST"; 640 641 case ARMISD::VSHL: return "ARMISD::VSHL"; 642 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 643 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 644 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 645 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 646 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 647 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 648 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 649 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 650 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 651 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 652 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 653 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 654 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 655 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 656 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 657 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 658 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 659 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 660 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 661 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 662 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 663 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 664 case ARMISD::VDUP: return "ARMISD::VDUP"; 665 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 666 case ARMISD::VEXT: return "ARMISD::VEXT"; 667 case ARMISD::VREV64: return "ARMISD::VREV64"; 668 case ARMISD::VREV32: return "ARMISD::VREV32"; 669 case ARMISD::VREV16: return "ARMISD::VREV16"; 670 case ARMISD::VZIP: return "ARMISD::VZIP"; 671 case ARMISD::VUZP: return "ARMISD::VUZP"; 672 case ARMISD::VTRN: return "ARMISD::VTRN"; 673 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 674 case ARMISD::FMAX: return "ARMISD::FMAX"; 675 case ARMISD::FMIN: return "ARMISD::FMIN"; 676 case ARMISD::BFI: return "ARMISD::BFI"; 677 } 678} 679 680/// getRegClassFor - Return the register class that should be used for the 681/// specified value type. 682TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 683 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 684 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 685 // load / store 4 to 8 consecutive D registers. 686 if (Subtarget->hasNEON()) { 687 if (VT == MVT::v4i64) 688 return ARM::QQPRRegisterClass; 689 else if (VT == MVT::v8i64) 690 return ARM::QQQQPRRegisterClass; 691 } 692 return TargetLowering::getRegClassFor(VT); 693} 694 695// Create a fast isel object. 696FastISel * 697ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 698 return ARM::createFastISel(funcInfo); 699} 700 701/// getFunctionAlignment - Return the Log2 alignment of this function. 702unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 703 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; 704} 705 706Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 707 unsigned NumVals = N->getNumValues(); 708 if (!NumVals) 709 return Sched::RegPressure; 710 711 for (unsigned i = 0; i != NumVals; ++i) { 712 EVT VT = N->getValueType(i); 713 if (VT.isFloatingPoint() || VT.isVector()) 714 return Sched::Latency; 715 } 716 717 if (!N->isMachineOpcode()) 718 return Sched::RegPressure; 719 720 // Load are scheduled for latency even if there instruction itinerary 721 // is not available. 722 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 723 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 724 if (TID.mayLoad()) 725 return Sched::Latency; 726 727 const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); 728 if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) 729 return Sched::Latency; 730 return Sched::RegPressure; 731} 732 733unsigned 734ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, 735 MachineFunction &MF) const { 736 unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; 737 switch (RC->getID()) { 738 default: 739 return 0; 740 case ARM::tGPRRegClassID: 741 return 5 - FPDiff; 742 case ARM::GPRRegClassID: 743 return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0); 744 case ARM::SPRRegClassID: // Currently not used as 'rep' register class. 745 case ARM::DPRRegClassID: 746 return 32 - 10; 747 } 748} 749 750//===----------------------------------------------------------------------===// 751// Lowering Code 752//===----------------------------------------------------------------------===// 753 754/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 755static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 756 switch (CC) { 757 default: llvm_unreachable("Unknown condition code!"); 758 case ISD::SETNE: return ARMCC::NE; 759 case ISD::SETEQ: return ARMCC::EQ; 760 case ISD::SETGT: return ARMCC::GT; 761 case ISD::SETGE: return ARMCC::GE; 762 case ISD::SETLT: return ARMCC::LT; 763 case ISD::SETLE: return ARMCC::LE; 764 case ISD::SETUGT: return ARMCC::HI; 765 case ISD::SETUGE: return ARMCC::HS; 766 case ISD::SETULT: return ARMCC::LO; 767 case ISD::SETULE: return ARMCC::LS; 768 } 769} 770 771/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 772static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 773 ARMCC::CondCodes &CondCode2) { 774 CondCode2 = ARMCC::AL; 775 switch (CC) { 776 default: llvm_unreachable("Unknown FP condition!"); 777 case ISD::SETEQ: 778 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 779 case ISD::SETGT: 780 case ISD::SETOGT: CondCode = ARMCC::GT; break; 781 case ISD::SETGE: 782 case ISD::SETOGE: CondCode = ARMCC::GE; break; 783 case ISD::SETOLT: CondCode = ARMCC::MI; break; 784 case ISD::SETOLE: CondCode = ARMCC::LS; break; 785 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 786 case ISD::SETO: CondCode = ARMCC::VC; break; 787 case ISD::SETUO: CondCode = ARMCC::VS; break; 788 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 789 case ISD::SETUGT: CondCode = ARMCC::HI; break; 790 case ISD::SETUGE: CondCode = ARMCC::PL; break; 791 case ISD::SETLT: 792 case ISD::SETULT: CondCode = ARMCC::LT; break; 793 case ISD::SETLE: 794 case ISD::SETULE: CondCode = ARMCC::LE; break; 795 case ISD::SETNE: 796 case ISD::SETUNE: CondCode = ARMCC::NE; break; 797 } 798} 799 800//===----------------------------------------------------------------------===// 801// Calling Convention Implementation 802//===----------------------------------------------------------------------===// 803 804#include "ARMGenCallingConv.inc" 805 806// APCS f64 is in register pairs, possibly split to stack 807static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 808 CCValAssign::LocInfo &LocInfo, 809 CCState &State, bool CanFail) { 810 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; 811 812 // Try to get the first register. 813 if (unsigned Reg = State.AllocateReg(RegList, 4)) 814 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 815 else { 816 // For the 2nd half of a v2f64, do not fail. 817 if (CanFail) 818 return false; 819 820 // Put the whole thing on the stack. 821 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 822 State.AllocateStack(8, 4), 823 LocVT, LocInfo)); 824 return true; 825 } 826 827 // Try to get the second register. 828 if (unsigned Reg = State.AllocateReg(RegList, 4)) 829 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 830 else 831 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 832 State.AllocateStack(4, 4), 833 LocVT, LocInfo)); 834 return true; 835} 836 837static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 838 CCValAssign::LocInfo &LocInfo, 839 ISD::ArgFlagsTy &ArgFlags, 840 CCState &State) { 841 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 842 return false; 843 if (LocVT == MVT::v2f64 && 844 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 845 return false; 846 return true; // we handled it 847} 848 849// AAPCS f64 is in aligned register pairs 850static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 851 CCValAssign::LocInfo &LocInfo, 852 CCState &State, bool CanFail) { 853 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 854 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 855 static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; 856 857 unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); 858 if (Reg == 0) { 859 // For the 2nd half of a v2f64, do not just fail. 860 if (CanFail) 861 return false; 862 863 // Put the whole thing on the stack. 864 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 865 State.AllocateStack(8, 8), 866 LocVT, LocInfo)); 867 return true; 868 } 869 870 unsigned i; 871 for (i = 0; i < 2; ++i) 872 if (HiRegList[i] == Reg) 873 break; 874 875 unsigned T = State.AllocateReg(LoRegList[i]); 876 (void)T; 877 assert(T == LoRegList[i] && "Could not allocate register"); 878 879 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 880 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 881 LocVT, LocInfo)); 882 return true; 883} 884 885static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 886 CCValAssign::LocInfo &LocInfo, 887 ISD::ArgFlagsTy &ArgFlags, 888 CCState &State) { 889 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 890 return false; 891 if (LocVT == MVT::v2f64 && 892 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 893 return false; 894 return true; // we handled it 895} 896 897static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 898 CCValAssign::LocInfo &LocInfo, CCState &State) { 899 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 900 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 901 902 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 903 if (Reg == 0) 904 return false; // we didn't handle it 905 906 unsigned i; 907 for (i = 0; i < 2; ++i) 908 if (HiRegList[i] == Reg) 909 break; 910 911 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 912 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 913 LocVT, LocInfo)); 914 return true; 915} 916 917static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 918 CCValAssign::LocInfo &LocInfo, 919 ISD::ArgFlagsTy &ArgFlags, 920 CCState &State) { 921 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 922 return false; 923 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 924 return false; 925 return true; // we handled it 926} 927 928static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 929 CCValAssign::LocInfo &LocInfo, 930 ISD::ArgFlagsTy &ArgFlags, 931 CCState &State) { 932 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, 933 State); 934} 935 936/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 937/// given CallingConvention value. 938CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 939 bool Return, 940 bool isVarArg) const { 941 switch (CC) { 942 default: 943 llvm_unreachable("Unsupported calling convention"); 944 case CallingConv::C: 945 case CallingConv::Fast: 946 // Use target triple & subtarget features to do actual dispatch. 947 if (Subtarget->isAAPCS_ABI()) { 948 if (Subtarget->hasVFP2() && 949 FloatABIType == FloatABI::Hard && !isVarArg) 950 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 951 else 952 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 953 } else 954 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 955 case CallingConv::ARM_AAPCS_VFP: 956 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 957 case CallingConv::ARM_AAPCS: 958 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 959 case CallingConv::ARM_APCS: 960 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 961 } 962} 963 964/// LowerCallResult - Lower the result values of a call into the 965/// appropriate copies out of appropriate physical registers. 966SDValue 967ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 968 CallingConv::ID CallConv, bool isVarArg, 969 const SmallVectorImpl<ISD::InputArg> &Ins, 970 DebugLoc dl, SelectionDAG &DAG, 971 SmallVectorImpl<SDValue> &InVals) const { 972 973 // Assign locations to each value returned by this call. 974 SmallVector<CCValAssign, 16> RVLocs; 975 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 976 RVLocs, *DAG.getContext()); 977 CCInfo.AnalyzeCallResult(Ins, 978 CCAssignFnForNode(CallConv, /* Return*/ true, 979 isVarArg)); 980 981 // Copy all of the result registers out of their specified physreg. 982 for (unsigned i = 0; i != RVLocs.size(); ++i) { 983 CCValAssign VA = RVLocs[i]; 984 985 SDValue Val; 986 if (VA.needsCustom()) { 987 // Handle f64 or half of a v2f64. 988 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 989 InFlag); 990 Chain = Lo.getValue(1); 991 InFlag = Lo.getValue(2); 992 VA = RVLocs[++i]; // skip ahead to next loc 993 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 994 InFlag); 995 Chain = Hi.getValue(1); 996 InFlag = Hi.getValue(2); 997 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 998 999 if (VA.getLocVT() == MVT::v2f64) { 1000 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1001 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1002 DAG.getConstant(0, MVT::i32)); 1003 1004 VA = RVLocs[++i]; // skip ahead to next loc 1005 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1006 Chain = Lo.getValue(1); 1007 InFlag = Lo.getValue(2); 1008 VA = RVLocs[++i]; // skip ahead to next loc 1009 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1010 Chain = Hi.getValue(1); 1011 InFlag = Hi.getValue(2); 1012 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1013 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1014 DAG.getConstant(1, MVT::i32)); 1015 } 1016 } else { 1017 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1018 InFlag); 1019 Chain = Val.getValue(1); 1020 InFlag = Val.getValue(2); 1021 } 1022 1023 switch (VA.getLocInfo()) { 1024 default: llvm_unreachable("Unknown loc info!"); 1025 case CCValAssign::Full: break; 1026 case CCValAssign::BCvt: 1027 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 1028 break; 1029 } 1030 1031 InVals.push_back(Val); 1032 } 1033 1034 return Chain; 1035} 1036 1037/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1038/// by "Src" to address "Dst" of size "Size". Alignment information is 1039/// specified by the specific parameter attribute. The copy will be passed as 1040/// a byval function parameter. 1041/// Sometimes what we are copying is the end of a larger object, the part that 1042/// does not fit in registers. 1043static SDValue 1044CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1045 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1046 DebugLoc dl) { 1047 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1048 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 1049 /*isVolatile=*/false, /*AlwaysInline=*/false, 1050 NULL, 0, NULL, 0); 1051} 1052 1053/// LowerMemOpCallTo - Store the argument to the stack. 1054SDValue 1055ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1056 SDValue StackPtr, SDValue Arg, 1057 DebugLoc dl, SelectionDAG &DAG, 1058 const CCValAssign &VA, 1059 ISD::ArgFlagsTy Flags) const { 1060 unsigned LocMemOffset = VA.getLocMemOffset(); 1061 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1062 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1063 if (Flags.isByVal()) { 1064 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1065 } 1066 return DAG.getStore(Chain, dl, Arg, PtrOff, 1067 PseudoSourceValue::getStack(), LocMemOffset, 1068 false, false, 0); 1069} 1070 1071void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1072 SDValue Chain, SDValue &Arg, 1073 RegsToPassVector &RegsToPass, 1074 CCValAssign &VA, CCValAssign &NextVA, 1075 SDValue &StackPtr, 1076 SmallVector<SDValue, 8> &MemOpChains, 1077 ISD::ArgFlagsTy Flags) const { 1078 1079 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1080 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1081 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1082 1083 if (NextVA.isRegLoc()) 1084 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1085 else { 1086 assert(NextVA.isMemLoc()); 1087 if (StackPtr.getNode() == 0) 1088 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1089 1090 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1091 dl, DAG, NextVA, 1092 Flags)); 1093 } 1094} 1095 1096/// LowerCall - Lowering a call into a callseq_start <- 1097/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1098/// nodes. 1099SDValue 1100ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1101 CallingConv::ID CallConv, bool isVarArg, 1102 bool &isTailCall, 1103 const SmallVectorImpl<ISD::OutputArg> &Outs, 1104 const SmallVectorImpl<SDValue> &OutVals, 1105 const SmallVectorImpl<ISD::InputArg> &Ins, 1106 DebugLoc dl, SelectionDAG &DAG, 1107 SmallVectorImpl<SDValue> &InVals) const { 1108 MachineFunction &MF = DAG.getMachineFunction(); 1109 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1110 bool IsSibCall = false; 1111 // Temporarily disable tail calls so things don't break. 1112 if (!EnableARMTailCalls) 1113 isTailCall = false; 1114 if (isTailCall) { 1115 // Check if it's really possible to do a tail call. 1116 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1117 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1118 Outs, OutVals, Ins, DAG); 1119 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1120 // detected sibcalls. 1121 if (isTailCall) { 1122 ++NumTailCalls; 1123 IsSibCall = true; 1124 } 1125 } 1126 1127 // Analyze operands of the call, assigning locations to each operand. 1128 SmallVector<CCValAssign, 16> ArgLocs; 1129 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1130 *DAG.getContext()); 1131 CCInfo.AnalyzeCallOperands(Outs, 1132 CCAssignFnForNode(CallConv, /* Return*/ false, 1133 isVarArg)); 1134 1135 // Get a count of how many bytes are to be pushed on the stack. 1136 unsigned NumBytes = CCInfo.getNextStackOffset(); 1137 1138 // For tail calls, memory operands are available in our caller's stack. 1139 if (IsSibCall) 1140 NumBytes = 0; 1141 1142 // Adjust the stack pointer for the new arguments... 1143 // These operations are automatically eliminated by the prolog/epilog pass 1144 if (!IsSibCall) 1145 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1146 1147 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1148 1149 RegsToPassVector RegsToPass; 1150 SmallVector<SDValue, 8> MemOpChains; 1151 1152 // Walk the register/memloc assignments, inserting copies/loads. In the case 1153 // of tail call optimization, arguments are handled later. 1154 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1155 i != e; 1156 ++i, ++realArgIdx) { 1157 CCValAssign &VA = ArgLocs[i]; 1158 SDValue Arg = OutVals[realArgIdx]; 1159 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1160 1161 // Promote the value if needed. 1162 switch (VA.getLocInfo()) { 1163 default: llvm_unreachable("Unknown loc info!"); 1164 case CCValAssign::Full: break; 1165 case CCValAssign::SExt: 1166 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1167 break; 1168 case CCValAssign::ZExt: 1169 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1170 break; 1171 case CCValAssign::AExt: 1172 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1173 break; 1174 case CCValAssign::BCvt: 1175 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1176 break; 1177 } 1178 1179 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1180 if (VA.needsCustom()) { 1181 if (VA.getLocVT() == MVT::v2f64) { 1182 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1183 DAG.getConstant(0, MVT::i32)); 1184 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1185 DAG.getConstant(1, MVT::i32)); 1186 1187 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1188 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1189 1190 VA = ArgLocs[++i]; // skip ahead to next loc 1191 if (VA.isRegLoc()) { 1192 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1193 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1194 } else { 1195 assert(VA.isMemLoc()); 1196 1197 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1198 dl, DAG, VA, Flags)); 1199 } 1200 } else { 1201 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1202 StackPtr, MemOpChains, Flags); 1203 } 1204 } else if (VA.isRegLoc()) { 1205 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1206 } else if (!IsSibCall) { 1207 assert(VA.isMemLoc()); 1208 1209 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1210 dl, DAG, VA, Flags)); 1211 } 1212 } 1213 1214 if (!MemOpChains.empty()) 1215 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1216 &MemOpChains[0], MemOpChains.size()); 1217 1218 // Build a sequence of copy-to-reg nodes chained together with token chain 1219 // and flag operands which copy the outgoing args into the appropriate regs. 1220 SDValue InFlag; 1221 // Tail call byval lowering might overwrite argument registers so in case of 1222 // tail call optimization the copies to registers are lowered later. 1223 if (!isTailCall) 1224 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1225 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1226 RegsToPass[i].second, InFlag); 1227 InFlag = Chain.getValue(1); 1228 } 1229 1230 // For tail calls lower the arguments to the 'real' stack slot. 1231 if (isTailCall) { 1232 // Force all the incoming stack arguments to be loaded from the stack 1233 // before any new outgoing arguments are stored to the stack, because the 1234 // outgoing stack slots may alias the incoming argument stack slots, and 1235 // the alias isn't otherwise explicit. This is slightly more conservative 1236 // than necessary, because it means that each store effectively depends 1237 // on every argument instead of just those arguments it would clobber. 1238 1239 // Do not flag preceeding copytoreg stuff together with the following stuff. 1240 InFlag = SDValue(); 1241 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1242 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1243 RegsToPass[i].second, InFlag); 1244 InFlag = Chain.getValue(1); 1245 } 1246 InFlag =SDValue(); 1247 } 1248 1249 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1250 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1251 // node so that legalize doesn't hack it. 1252 bool isDirect = false; 1253 bool isARMFunc = false; 1254 bool isLocalARMFunc = false; 1255 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1256 1257 if (EnableARMLongCalls) { 1258 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1259 && "long-calls with non-static relocation model!"); 1260 // Handle a global address or an external symbol. If it's not one of 1261 // those, the target's already in a register, so we don't need to do 1262 // anything extra. 1263 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1264 const GlobalValue *GV = G->getGlobal(); 1265 // Create a constant pool entry for the callee address 1266 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1267 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1268 ARMPCLabelIndex, 1269 ARMCP::CPValue, 0); 1270 // Get the address of the callee into a register 1271 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1272 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1273 Callee = DAG.getLoad(getPointerTy(), dl, 1274 DAG.getEntryNode(), CPAddr, 1275 PseudoSourceValue::getConstantPool(), 0, 1276 false, false, 0); 1277 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1278 const char *Sym = S->getSymbol(); 1279 1280 // Create a constant pool entry for the callee address 1281 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1282 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1283 Sym, ARMPCLabelIndex, 0); 1284 // Get the address of the callee into a register 1285 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1286 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1287 Callee = DAG.getLoad(getPointerTy(), dl, 1288 DAG.getEntryNode(), CPAddr, 1289 PseudoSourceValue::getConstantPool(), 0, 1290 false, false, 0); 1291 } 1292 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1293 const GlobalValue *GV = G->getGlobal(); 1294 isDirect = true; 1295 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1296 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1297 getTargetMachine().getRelocationModel() != Reloc::Static; 1298 isARMFunc = !Subtarget->isThumb() || isStub; 1299 // ARM call to a local ARM function is predicable. 1300 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1301 // tBX takes a register source operand. 1302 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1303 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1304 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1305 ARMPCLabelIndex, 1306 ARMCP::CPValue, 4); 1307 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1308 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1309 Callee = DAG.getLoad(getPointerTy(), dl, 1310 DAG.getEntryNode(), CPAddr, 1311 PseudoSourceValue::getConstantPool(), 0, 1312 false, false, 0); 1313 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1314 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1315 getPointerTy(), Callee, PICLabel); 1316 } else 1317 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 1318 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1319 isDirect = true; 1320 bool isStub = Subtarget->isTargetDarwin() && 1321 getTargetMachine().getRelocationModel() != Reloc::Static; 1322 isARMFunc = !Subtarget->isThumb() || isStub; 1323 // tBX takes a register source operand. 1324 const char *Sym = S->getSymbol(); 1325 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1326 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1327 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1328 Sym, ARMPCLabelIndex, 4); 1329 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1330 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1331 Callee = DAG.getLoad(getPointerTy(), dl, 1332 DAG.getEntryNode(), CPAddr, 1333 PseudoSourceValue::getConstantPool(), 0, 1334 false, false, 0); 1335 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1336 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1337 getPointerTy(), Callee, PICLabel); 1338 } else 1339 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 1340 } 1341 1342 // FIXME: handle tail calls differently. 1343 unsigned CallOpc; 1344 if (Subtarget->isThumb()) { 1345 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1346 CallOpc = ARMISD::CALL_NOLINK; 1347 else 1348 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1349 } else { 1350 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1351 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1352 : ARMISD::CALL_NOLINK; 1353 } 1354 1355 std::vector<SDValue> Ops; 1356 Ops.push_back(Chain); 1357 Ops.push_back(Callee); 1358 1359 // Add argument registers to the end of the list so that they are known live 1360 // into the call. 1361 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1362 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1363 RegsToPass[i].second.getValueType())); 1364 1365 if (InFlag.getNode()) 1366 Ops.push_back(InFlag); 1367 1368 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1369 if (isTailCall) 1370 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1371 1372 // Returns a chain and a flag for retval copy to use. 1373 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1374 InFlag = Chain.getValue(1); 1375 1376 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1377 DAG.getIntPtrConstant(0, true), InFlag); 1378 if (!Ins.empty()) 1379 InFlag = Chain.getValue(1); 1380 1381 // Handle result values, copying them out of physregs into vregs that we 1382 // return. 1383 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1384 dl, DAG, InVals); 1385} 1386 1387/// MatchingStackOffset - Return true if the given stack call argument is 1388/// already available in the same position (relatively) of the caller's 1389/// incoming argument stack. 1390static 1391bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1392 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1393 const ARMInstrInfo *TII) { 1394 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1395 int FI = INT_MAX; 1396 if (Arg.getOpcode() == ISD::CopyFromReg) { 1397 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1398 if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) 1399 return false; 1400 MachineInstr *Def = MRI->getVRegDef(VR); 1401 if (!Def) 1402 return false; 1403 if (!Flags.isByVal()) { 1404 if (!TII->isLoadFromStackSlot(Def, FI)) 1405 return false; 1406 } else { 1407 return false; 1408 } 1409 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1410 if (Flags.isByVal()) 1411 // ByVal argument is passed in as a pointer but it's now being 1412 // dereferenced. e.g. 1413 // define @foo(%struct.X* %A) { 1414 // tail call @bar(%struct.X* byval %A) 1415 // } 1416 return false; 1417 SDValue Ptr = Ld->getBasePtr(); 1418 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1419 if (!FINode) 1420 return false; 1421 FI = FINode->getIndex(); 1422 } else 1423 return false; 1424 1425 assert(FI != INT_MAX); 1426 if (!MFI->isFixedObjectIndex(FI)) 1427 return false; 1428 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1429} 1430 1431/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1432/// for tail call optimization. Targets which want to do tail call 1433/// optimization should implement this function. 1434bool 1435ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1436 CallingConv::ID CalleeCC, 1437 bool isVarArg, 1438 bool isCalleeStructRet, 1439 bool isCallerStructRet, 1440 const SmallVectorImpl<ISD::OutputArg> &Outs, 1441 const SmallVectorImpl<SDValue> &OutVals, 1442 const SmallVectorImpl<ISD::InputArg> &Ins, 1443 SelectionDAG& DAG) const { 1444 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1445 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1446 bool CCMatch = CallerCC == CalleeCC; 1447 1448 // Look for obvious safe cases to perform tail call optimization that do not 1449 // require ABI changes. This is what gcc calls sibcall. 1450 1451 // Do not sibcall optimize vararg calls unless the call site is not passing 1452 // any arguments. 1453 if (isVarArg && !Outs.empty()) 1454 return false; 1455 1456 // Also avoid sibcall optimization if either caller or callee uses struct 1457 // return semantics. 1458 if (isCalleeStructRet || isCallerStructRet) 1459 return false; 1460 1461 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1462 // emitEpilogue is not ready for them. 1463 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1464 // LR. This means if we need to reload LR, it takes an extra instructions, 1465 // which outweighs the value of the tail call; but here we don't know yet 1466 // whether LR is going to be used. Probably the right approach is to 1467 // generate the tail call here and turn it back into CALL/RET in 1468 // emitEpilogue if LR is used. 1469 if (Subtarget->isThumb1Only()) 1470 return false; 1471 1472 // For the moment, we can only do this to functions defined in this 1473 // compilation, or to indirect calls. A Thumb B to an ARM function, 1474 // or vice versa, is not easily fixed up in the linker unlike BL. 1475 // (We could do this by loading the address of the callee into a register; 1476 // that is an extra instruction over the direct call and burns a register 1477 // as well, so is not likely to be a win.) 1478 1479 // It might be safe to remove this restriction on non-Darwin. 1480 1481 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1482 // but we need to make sure there are enough registers; the only valid 1483 // registers are the 4 used for parameters. We don't currently do this 1484 // case. 1485 if (isa<ExternalSymbolSDNode>(Callee)) 1486 return false; 1487 1488 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1489 const GlobalValue *GV = G->getGlobal(); 1490 if (GV->isDeclaration() || GV->isWeakForLinker()) 1491 return false; 1492 } 1493 1494 // If the calling conventions do not match, then we'd better make sure the 1495 // results are returned in the same way as what the caller expects. 1496 if (!CCMatch) { 1497 SmallVector<CCValAssign, 16> RVLocs1; 1498 CCState CCInfo1(CalleeCC, false, getTargetMachine(), 1499 RVLocs1, *DAG.getContext()); 1500 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1501 1502 SmallVector<CCValAssign, 16> RVLocs2; 1503 CCState CCInfo2(CallerCC, false, getTargetMachine(), 1504 RVLocs2, *DAG.getContext()); 1505 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1506 1507 if (RVLocs1.size() != RVLocs2.size()) 1508 return false; 1509 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1510 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1511 return false; 1512 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1513 return false; 1514 if (RVLocs1[i].isRegLoc()) { 1515 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1516 return false; 1517 } else { 1518 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1519 return false; 1520 } 1521 } 1522 } 1523 1524 // If the callee takes no arguments then go on to check the results of the 1525 // call. 1526 if (!Outs.empty()) { 1527 // Check if stack adjustment is needed. For now, do not do this if any 1528 // argument is passed on the stack. 1529 SmallVector<CCValAssign, 16> ArgLocs; 1530 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 1531 ArgLocs, *DAG.getContext()); 1532 CCInfo.AnalyzeCallOperands(Outs, 1533 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1534 if (CCInfo.getNextStackOffset()) { 1535 MachineFunction &MF = DAG.getMachineFunction(); 1536 1537 // Check if the arguments are already laid out in the right way as 1538 // the caller's fixed stack objects. 1539 MachineFrameInfo *MFI = MF.getFrameInfo(); 1540 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1541 const ARMInstrInfo *TII = 1542 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1543 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1544 i != e; 1545 ++i, ++realArgIdx) { 1546 CCValAssign &VA = ArgLocs[i]; 1547 EVT RegVT = VA.getLocVT(); 1548 SDValue Arg = OutVals[realArgIdx]; 1549 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1550 if (VA.getLocInfo() == CCValAssign::Indirect) 1551 return false; 1552 if (VA.needsCustom()) { 1553 // f64 and vector types are split into multiple registers or 1554 // register/stack-slot combinations. The types will not match 1555 // the registers; give up on memory f64 refs until we figure 1556 // out what to do about this. 1557 if (!VA.isRegLoc()) 1558 return false; 1559 if (!ArgLocs[++i].isRegLoc()) 1560 return false; 1561 if (RegVT == MVT::v2f64) { 1562 if (!ArgLocs[++i].isRegLoc()) 1563 return false; 1564 if (!ArgLocs[++i].isRegLoc()) 1565 return false; 1566 } 1567 } else if (!VA.isRegLoc()) { 1568 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1569 MFI, MRI, TII)) 1570 return false; 1571 } 1572 } 1573 } 1574 } 1575 1576 return true; 1577} 1578 1579SDValue 1580ARMTargetLowering::LowerReturn(SDValue Chain, 1581 CallingConv::ID CallConv, bool isVarArg, 1582 const SmallVectorImpl<ISD::OutputArg> &Outs, 1583 const SmallVectorImpl<SDValue> &OutVals, 1584 DebugLoc dl, SelectionDAG &DAG) const { 1585 1586 // CCValAssign - represent the assignment of the return value to a location. 1587 SmallVector<CCValAssign, 16> RVLocs; 1588 1589 // CCState - Info about the registers and stack slots. 1590 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1591 *DAG.getContext()); 1592 1593 // Analyze outgoing return values. 1594 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1595 isVarArg)); 1596 1597 // If this is the first return lowered for this function, add 1598 // the regs to the liveout set for the function. 1599 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1600 for (unsigned i = 0; i != RVLocs.size(); ++i) 1601 if (RVLocs[i].isRegLoc()) 1602 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1603 } 1604 1605 SDValue Flag; 1606 1607 // Copy the result values into the output registers. 1608 for (unsigned i = 0, realRVLocIdx = 0; 1609 i != RVLocs.size(); 1610 ++i, ++realRVLocIdx) { 1611 CCValAssign &VA = RVLocs[i]; 1612 assert(VA.isRegLoc() && "Can only return in registers!"); 1613 1614 SDValue Arg = OutVals[realRVLocIdx]; 1615 1616 switch (VA.getLocInfo()) { 1617 default: llvm_unreachable("Unknown loc info!"); 1618 case CCValAssign::Full: break; 1619 case CCValAssign::BCvt: 1620 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1621 break; 1622 } 1623 1624 if (VA.needsCustom()) { 1625 if (VA.getLocVT() == MVT::v2f64) { 1626 // Extract the first half and return it in two registers. 1627 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1628 DAG.getConstant(0, MVT::i32)); 1629 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1630 DAG.getVTList(MVT::i32, MVT::i32), Half); 1631 1632 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1633 Flag = Chain.getValue(1); 1634 VA = RVLocs[++i]; // skip ahead to next loc 1635 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1636 HalfGPRs.getValue(1), Flag); 1637 Flag = Chain.getValue(1); 1638 VA = RVLocs[++i]; // skip ahead to next loc 1639 1640 // Extract the 2nd half and fall through to handle it as an f64 value. 1641 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1642 DAG.getConstant(1, MVT::i32)); 1643 } 1644 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1645 // available. 1646 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1647 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1648 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1649 Flag = Chain.getValue(1); 1650 VA = RVLocs[++i]; // skip ahead to next loc 1651 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1652 Flag); 1653 } else 1654 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1655 1656 // Guarantee that all emitted copies are 1657 // stuck together, avoiding something bad. 1658 Flag = Chain.getValue(1); 1659 } 1660 1661 SDValue result; 1662 if (Flag.getNode()) 1663 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1664 else // Return Void 1665 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1666 1667 return result; 1668} 1669 1670// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1671// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1672// one of the above mentioned nodes. It has to be wrapped because otherwise 1673// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1674// be used to form addressing mode. These wrapped nodes will be selected 1675// into MOVi. 1676static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1677 EVT PtrVT = Op.getValueType(); 1678 // FIXME there is no actual debug info here 1679 DebugLoc dl = Op.getDebugLoc(); 1680 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1681 SDValue Res; 1682 if (CP->isMachineConstantPoolEntry()) 1683 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1684 CP->getAlignment()); 1685 else 1686 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1687 CP->getAlignment()); 1688 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1689} 1690 1691unsigned ARMTargetLowering::getJumpTableEncoding() const { 1692 return MachineJumpTableInfo::EK_Inline; 1693} 1694 1695SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1696 SelectionDAG &DAG) const { 1697 MachineFunction &MF = DAG.getMachineFunction(); 1698 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1699 unsigned ARMPCLabelIndex = 0; 1700 DebugLoc DL = Op.getDebugLoc(); 1701 EVT PtrVT = getPointerTy(); 1702 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1703 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1704 SDValue CPAddr; 1705 if (RelocM == Reloc::Static) { 1706 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1707 } else { 1708 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1709 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1710 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1711 ARMCP::CPBlockAddress, 1712 PCAdj); 1713 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1714 } 1715 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1716 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1717 PseudoSourceValue::getConstantPool(), 0, 1718 false, false, 0); 1719 if (RelocM == Reloc::Static) 1720 return Result; 1721 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1722 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1723} 1724 1725// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1726SDValue 1727ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1728 SelectionDAG &DAG) const { 1729 DebugLoc dl = GA->getDebugLoc(); 1730 EVT PtrVT = getPointerTy(); 1731 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1732 MachineFunction &MF = DAG.getMachineFunction(); 1733 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1734 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1735 ARMConstantPoolValue *CPV = 1736 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1737 ARMCP::CPValue, PCAdj, "tlsgd", true); 1738 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1739 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1740 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1741 PseudoSourceValue::getConstantPool(), 0, 1742 false, false, 0); 1743 SDValue Chain = Argument.getValue(1); 1744 1745 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1746 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1747 1748 // call __tls_get_addr. 1749 ArgListTy Args; 1750 ArgListEntry Entry; 1751 Entry.Node = Argument; 1752 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1753 Args.push_back(Entry); 1754 // FIXME: is there useful debug info available here? 1755 std::pair<SDValue, SDValue> CallResult = 1756 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1757 false, false, false, false, 1758 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1759 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1760 return CallResult.first; 1761} 1762 1763// Lower ISD::GlobalTLSAddress using the "initial exec" or 1764// "local exec" model. 1765SDValue 1766ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1767 SelectionDAG &DAG) const { 1768 const GlobalValue *GV = GA->getGlobal(); 1769 DebugLoc dl = GA->getDebugLoc(); 1770 SDValue Offset; 1771 SDValue Chain = DAG.getEntryNode(); 1772 EVT PtrVT = getPointerTy(); 1773 // Get the Thread Pointer 1774 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1775 1776 if (GV->isDeclaration()) { 1777 MachineFunction &MF = DAG.getMachineFunction(); 1778 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1779 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1780 // Initial exec model. 1781 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1782 ARMConstantPoolValue *CPV = 1783 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1784 ARMCP::CPValue, PCAdj, "gottpoff", true); 1785 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1786 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1787 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1788 PseudoSourceValue::getConstantPool(), 0, 1789 false, false, 0); 1790 Chain = Offset.getValue(1); 1791 1792 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1793 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1794 1795 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1796 PseudoSourceValue::getConstantPool(), 0, 1797 false, false, 0); 1798 } else { 1799 // local exec model 1800 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1801 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1802 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1803 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1804 PseudoSourceValue::getConstantPool(), 0, 1805 false, false, 0); 1806 } 1807 1808 // The address of the thread local variable is the add of the thread 1809 // pointer with the offset of the variable. 1810 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1811} 1812 1813SDValue 1814ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 1815 // TODO: implement the "local dynamic" model 1816 assert(Subtarget->isTargetELF() && 1817 "TLS not implemented for non-ELF targets"); 1818 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1819 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1820 // otherwise use the "Local Exec" TLS Model 1821 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1822 return LowerToTLSGeneralDynamicModel(GA, DAG); 1823 else 1824 return LowerToTLSExecModels(GA, DAG); 1825} 1826 1827SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1828 SelectionDAG &DAG) const { 1829 EVT PtrVT = getPointerTy(); 1830 DebugLoc dl = Op.getDebugLoc(); 1831 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1832 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1833 if (RelocM == Reloc::PIC_) { 1834 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1835 ARMConstantPoolValue *CPV = 1836 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1837 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1838 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1839 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1840 CPAddr, 1841 PseudoSourceValue::getConstantPool(), 0, 1842 false, false, 0); 1843 SDValue Chain = Result.getValue(1); 1844 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1845 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1846 if (!UseGOTOFF) 1847 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1848 PseudoSourceValue::getGOT(), 0, 1849 false, false, 0); 1850 return Result; 1851 } else { 1852 // If we have T2 ops, we can materialize the address directly via movt/movw 1853 // pair. This is always cheaper. 1854 if (Subtarget->useMovt()) { 1855 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1856 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 1857 } else { 1858 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1859 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1860 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1861 PseudoSourceValue::getConstantPool(), 0, 1862 false, false, 0); 1863 } 1864 } 1865} 1866 1867SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1868 SelectionDAG &DAG) const { 1869 MachineFunction &MF = DAG.getMachineFunction(); 1870 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1871 unsigned ARMPCLabelIndex = 0; 1872 EVT PtrVT = getPointerTy(); 1873 DebugLoc dl = Op.getDebugLoc(); 1874 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1875 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1876 SDValue CPAddr; 1877 if (RelocM == Reloc::Static) 1878 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1879 else { 1880 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1881 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1882 ARMConstantPoolValue *CPV = 1883 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1884 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1885 } 1886 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1887 1888 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1889 PseudoSourceValue::getConstantPool(), 0, 1890 false, false, 0); 1891 SDValue Chain = Result.getValue(1); 1892 1893 if (RelocM == Reloc::PIC_) { 1894 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1895 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1896 } 1897 1898 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1899 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1900 PseudoSourceValue::getGOT(), 0, 1901 false, false, 0); 1902 1903 return Result; 1904} 1905 1906SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1907 SelectionDAG &DAG) const { 1908 assert(Subtarget->isTargetELF() && 1909 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1910 MachineFunction &MF = DAG.getMachineFunction(); 1911 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1912 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1913 EVT PtrVT = getPointerTy(); 1914 DebugLoc dl = Op.getDebugLoc(); 1915 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1916 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1917 "_GLOBAL_OFFSET_TABLE_", 1918 ARMPCLabelIndex, PCAdj); 1919 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1920 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1921 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1922 PseudoSourceValue::getConstantPool(), 0, 1923 false, false, 0); 1924 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1925 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1926} 1927 1928SDValue 1929ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 1930 DebugLoc dl = Op.getDebugLoc(); 1931 SDValue Val = DAG.getConstant(0, MVT::i32); 1932 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 1933 Op.getOperand(1), Val); 1934} 1935 1936SDValue 1937ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 1938 DebugLoc dl = Op.getDebugLoc(); 1939 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 1940 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 1941} 1942 1943SDValue 1944ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 1945 const ARMSubtarget *Subtarget) const { 1946 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1947 DebugLoc dl = Op.getDebugLoc(); 1948 switch (IntNo) { 1949 default: return SDValue(); // Don't custom lower most intrinsics. 1950 case Intrinsic::arm_thread_pointer: { 1951 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1952 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1953 } 1954 case Intrinsic::eh_sjlj_lsda: { 1955 MachineFunction &MF = DAG.getMachineFunction(); 1956 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1957 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1958 EVT PtrVT = getPointerTy(); 1959 DebugLoc dl = Op.getDebugLoc(); 1960 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1961 SDValue CPAddr; 1962 unsigned PCAdj = (RelocM != Reloc::PIC_) 1963 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1964 ARMConstantPoolValue *CPV = 1965 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1966 ARMCP::CPLSDA, PCAdj); 1967 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1968 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1969 SDValue Result = 1970 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1971 PseudoSourceValue::getConstantPool(), 0, 1972 false, false, 0); 1973 1974 if (RelocM == Reloc::PIC_) { 1975 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1976 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1977 } 1978 return Result; 1979 } 1980 } 1981} 1982 1983static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 1984 const ARMSubtarget *Subtarget) { 1985 DebugLoc dl = Op.getDebugLoc(); 1986 SDValue Op5 = Op.getOperand(5); 1987 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); 1988 // v6 and v7 can both handle barriers directly, but need handled a bit 1989 // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should 1990 // never get here. 1991 unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; 1992 if (Subtarget->hasV7Ops()) 1993 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); 1994 else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) 1995 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), 1996 DAG.getConstant(0, MVT::i32)); 1997 assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 1998 return SDValue(); 1999} 2000 2001static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2002 MachineFunction &MF = DAG.getMachineFunction(); 2003 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2004 2005 // vastart just stores the address of the VarArgsFrameIndex slot into the 2006 // memory location argument. 2007 DebugLoc dl = Op.getDebugLoc(); 2008 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2009 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2010 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2011 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, 2012 false, false, 0); 2013} 2014 2015SDValue 2016ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2017 SDValue &Root, SelectionDAG &DAG, 2018 DebugLoc dl) const { 2019 MachineFunction &MF = DAG.getMachineFunction(); 2020 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2021 2022 TargetRegisterClass *RC; 2023 if (AFI->isThumb1OnlyFunction()) 2024 RC = ARM::tGPRRegisterClass; 2025 else 2026 RC = ARM::GPRRegisterClass; 2027 2028 // Transform the arguments stored in physical registers into virtual ones. 2029 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2030 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2031 2032 SDValue ArgValue2; 2033 if (NextVA.isMemLoc()) { 2034 MachineFrameInfo *MFI = MF.getFrameInfo(); 2035 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2036 2037 // Create load node to retrieve arguments from the stack. 2038 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2039 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2040 PseudoSourceValue::getFixedStack(FI), 0, 2041 false, false, 0); 2042 } else { 2043 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2044 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2045 } 2046 2047 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2048} 2049 2050SDValue 2051ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2052 CallingConv::ID CallConv, bool isVarArg, 2053 const SmallVectorImpl<ISD::InputArg> 2054 &Ins, 2055 DebugLoc dl, SelectionDAG &DAG, 2056 SmallVectorImpl<SDValue> &InVals) 2057 const { 2058 2059 MachineFunction &MF = DAG.getMachineFunction(); 2060 MachineFrameInfo *MFI = MF.getFrameInfo(); 2061 2062 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2063 2064 // Assign locations to all of the incoming arguments. 2065 SmallVector<CCValAssign, 16> ArgLocs; 2066 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 2067 *DAG.getContext()); 2068 CCInfo.AnalyzeFormalArguments(Ins, 2069 CCAssignFnForNode(CallConv, /* Return*/ false, 2070 isVarArg)); 2071 2072 SmallVector<SDValue, 16> ArgValues; 2073 2074 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2075 CCValAssign &VA = ArgLocs[i]; 2076 2077 // Arguments stored in registers. 2078 if (VA.isRegLoc()) { 2079 EVT RegVT = VA.getLocVT(); 2080 2081 SDValue ArgValue; 2082 if (VA.needsCustom()) { 2083 // f64 and vector types are split up into multiple registers or 2084 // combinations of registers and stack slots. 2085 if (VA.getLocVT() == MVT::v2f64) { 2086 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2087 Chain, DAG, dl); 2088 VA = ArgLocs[++i]; // skip ahead to next loc 2089 SDValue ArgValue2; 2090 if (VA.isMemLoc()) { 2091 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2092 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2093 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2094 PseudoSourceValue::getFixedStack(FI), 0, 2095 false, false, 0); 2096 } else { 2097 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2098 Chain, DAG, dl); 2099 } 2100 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2101 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2102 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2103 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2104 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2105 } else 2106 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2107 2108 } else { 2109 TargetRegisterClass *RC; 2110 2111 if (RegVT == MVT::f32) 2112 RC = ARM::SPRRegisterClass; 2113 else if (RegVT == MVT::f64) 2114 RC = ARM::DPRRegisterClass; 2115 else if (RegVT == MVT::v2f64) 2116 RC = ARM::QPRRegisterClass; 2117 else if (RegVT == MVT::i32) 2118 RC = (AFI->isThumb1OnlyFunction() ? 2119 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2120 else 2121 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2122 2123 // Transform the arguments in physical registers into virtual ones. 2124 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2125 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2126 } 2127 2128 // If this is an 8 or 16-bit value, it is really passed promoted 2129 // to 32 bits. Insert an assert[sz]ext to capture this, then 2130 // truncate to the right size. 2131 switch (VA.getLocInfo()) { 2132 default: llvm_unreachable("Unknown loc info!"); 2133 case CCValAssign::Full: break; 2134 case CCValAssign::BCvt: 2135 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 2136 break; 2137 case CCValAssign::SExt: 2138 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2139 DAG.getValueType(VA.getValVT())); 2140 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2141 break; 2142 case CCValAssign::ZExt: 2143 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2144 DAG.getValueType(VA.getValVT())); 2145 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2146 break; 2147 } 2148 2149 InVals.push_back(ArgValue); 2150 2151 } else { // VA.isRegLoc() 2152 2153 // sanity check 2154 assert(VA.isMemLoc()); 2155 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2156 2157 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 2158 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); 2159 2160 // Create load nodes to retrieve arguments from the stack. 2161 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2162 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2163 PseudoSourceValue::getFixedStack(FI), 0, 2164 false, false, 0)); 2165 } 2166 } 2167 2168 // varargs 2169 if (isVarArg) { 2170 static const unsigned GPRArgRegs[] = { 2171 ARM::R0, ARM::R1, ARM::R2, ARM::R3 2172 }; 2173 2174 unsigned NumGPRs = CCInfo.getFirstUnallocated 2175 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2176 2177 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 2178 unsigned VARegSize = (4 - NumGPRs) * 4; 2179 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2180 unsigned ArgOffset = CCInfo.getNextStackOffset(); 2181 if (VARegSaveSize) { 2182 // If this function is vararg, store any remaining integer argument regs 2183 // to their spots on the stack so that they may be loaded by deferencing 2184 // the result of va_next. 2185 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2186 AFI->setVarArgsFrameIndex( 2187 MFI->CreateFixedObject(VARegSaveSize, 2188 ArgOffset + VARegSaveSize - VARegSize, 2189 true)); 2190 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2191 getPointerTy()); 2192 2193 SmallVector<SDValue, 4> MemOps; 2194 for (; NumGPRs < 4; ++NumGPRs) { 2195 TargetRegisterClass *RC; 2196 if (AFI->isThumb1OnlyFunction()) 2197 RC = ARM::tGPRRegisterClass; 2198 else 2199 RC = ARM::GPRRegisterClass; 2200 2201 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 2202 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2203 SDValue Store = 2204 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2205 PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 2206 0, false, false, 0); 2207 MemOps.push_back(Store); 2208 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2209 DAG.getConstant(4, getPointerTy())); 2210 } 2211 if (!MemOps.empty()) 2212 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2213 &MemOps[0], MemOps.size()); 2214 } else 2215 // This will point to the next argument passed via stack. 2216 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2217 } 2218 2219 return Chain; 2220} 2221 2222/// isFloatingPointZero - Return true if this is +0.0. 2223static bool isFloatingPointZero(SDValue Op) { 2224 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2225 return CFP->getValueAPF().isPosZero(); 2226 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2227 // Maybe this has already been legalized into the constant pool? 2228 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2229 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2230 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2231 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2232 return CFP->getValueAPF().isPosZero(); 2233 } 2234 } 2235 return false; 2236} 2237 2238/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2239/// the given operands. 2240SDValue 2241ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2242 SDValue &ARMcc, SelectionDAG &DAG, 2243 DebugLoc dl) const { 2244 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2245 unsigned C = RHSC->getZExtValue(); 2246 if (!isLegalICmpImmediate(C)) { 2247 // Constant does not fit, try adjusting it by one? 2248 switch (CC) { 2249 default: break; 2250 case ISD::SETLT: 2251 case ISD::SETGE: 2252 if (isLegalICmpImmediate(C-1)) { 2253 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2254 RHS = DAG.getConstant(C-1, MVT::i32); 2255 } 2256 break; 2257 case ISD::SETULT: 2258 case ISD::SETUGE: 2259 if (C > 0 && isLegalICmpImmediate(C-1)) { 2260 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2261 RHS = DAG.getConstant(C-1, MVT::i32); 2262 } 2263 break; 2264 case ISD::SETLE: 2265 case ISD::SETGT: 2266 if (isLegalICmpImmediate(C+1)) { 2267 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2268 RHS = DAG.getConstant(C+1, MVT::i32); 2269 } 2270 break; 2271 case ISD::SETULE: 2272 case ISD::SETUGT: 2273 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { 2274 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2275 RHS = DAG.getConstant(C+1, MVT::i32); 2276 } 2277 break; 2278 } 2279 } 2280 } 2281 2282 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2283 ARMISD::NodeType CompareType; 2284 switch (CondCode) { 2285 default: 2286 CompareType = ARMISD::CMP; 2287 break; 2288 case ARMCC::EQ: 2289 case ARMCC::NE: 2290 // Uses only Z Flag 2291 CompareType = ARMISD::CMPZ; 2292 break; 2293 } 2294 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2295 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 2296} 2297 2298/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2299SDValue 2300ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2301 DebugLoc dl) const { 2302 SDValue Cmp; 2303 if (!isFloatingPointZero(RHS)) 2304 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 2305 else 2306 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 2307 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 2308} 2309 2310SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2311 EVT VT = Op.getValueType(); 2312 SDValue LHS = Op.getOperand(0); 2313 SDValue RHS = Op.getOperand(1); 2314 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2315 SDValue TrueVal = Op.getOperand(2); 2316 SDValue FalseVal = Op.getOperand(3); 2317 DebugLoc dl = Op.getDebugLoc(); 2318 2319 if (LHS.getValueType() == MVT::i32) { 2320 SDValue ARMcc; 2321 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2322 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2323 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2324 } 2325 2326 ARMCC::CondCodes CondCode, CondCode2; 2327 FPCCToARMCC(CC, CondCode, CondCode2); 2328 2329 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2330 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2331 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2332 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2333 ARMcc, CCR, Cmp); 2334 if (CondCode2 != ARMCC::AL) { 2335 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2336 // FIXME: Needs another CMP because flag can have but one use. 2337 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2338 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2339 Result, TrueVal, ARMcc2, CCR, Cmp2); 2340 } 2341 return Result; 2342} 2343 2344/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2345/// to morph to an integer compare sequence. 2346static bool canChangeToInt(SDValue Op, bool &SeenZero, 2347 const ARMSubtarget *Subtarget) { 2348 SDNode *N = Op.getNode(); 2349 if (!N->hasOneUse()) 2350 // Otherwise it requires moving the value from fp to integer registers. 2351 return false; 2352 if (!N->getNumValues()) 2353 return false; 2354 EVT VT = Op.getValueType(); 2355 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2356 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2357 // vmrs are very slow, e.g. cortex-a8. 2358 return false; 2359 2360 if (isFloatingPointZero(Op)) { 2361 SeenZero = true; 2362 return true; 2363 } 2364 return ISD::isNormalLoad(N); 2365} 2366 2367static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2368 if (isFloatingPointZero(Op)) 2369 return DAG.getConstant(0, MVT::i32); 2370 2371 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2372 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2373 Ld->getChain(), Ld->getBasePtr(), 2374 Ld->getSrcValue(), Ld->getSrcValueOffset(), 2375 Ld->isVolatile(), Ld->isNonTemporal(), 2376 Ld->getAlignment()); 2377 2378 llvm_unreachable("Unknown VFP cmp argument!"); 2379} 2380 2381static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2382 SDValue &RetVal1, SDValue &RetVal2) { 2383 if (isFloatingPointZero(Op)) { 2384 RetVal1 = DAG.getConstant(0, MVT::i32); 2385 RetVal2 = DAG.getConstant(0, MVT::i32); 2386 return; 2387 } 2388 2389 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2390 SDValue Ptr = Ld->getBasePtr(); 2391 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2392 Ld->getChain(), Ptr, 2393 Ld->getSrcValue(), Ld->getSrcValueOffset(), 2394 Ld->isVolatile(), Ld->isNonTemporal(), 2395 Ld->getAlignment()); 2396 2397 EVT PtrType = Ptr.getValueType(); 2398 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2399 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2400 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2401 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2402 Ld->getChain(), NewPtr, 2403 Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, 2404 Ld->isVolatile(), Ld->isNonTemporal(), 2405 NewAlign); 2406 return; 2407 } 2408 2409 llvm_unreachable("Unknown VFP cmp argument!"); 2410} 2411 2412/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2413/// f32 and even f64 comparisons to integer ones. 2414SDValue 2415ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2416 SDValue Chain = Op.getOperand(0); 2417 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2418 SDValue LHS = Op.getOperand(2); 2419 SDValue RHS = Op.getOperand(3); 2420 SDValue Dest = Op.getOperand(4); 2421 DebugLoc dl = Op.getDebugLoc(); 2422 2423 bool SeenZero = false; 2424 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2425 canChangeToInt(RHS, SeenZero, Subtarget) && 2426 // If one of the operand is zero, it's safe to ignore the NaN case since 2427 // we only care about equality comparisons. 2428 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2429 // If unsafe fp math optimization is enabled and there are no othter uses of 2430 // the CMP operands, and the condition code is EQ oe NE, we can optimize it 2431 // to an integer comparison. 2432 if (CC == ISD::SETOEQ) 2433 CC = ISD::SETEQ; 2434 else if (CC == ISD::SETUNE) 2435 CC = ISD::SETNE; 2436 2437 SDValue ARMcc; 2438 if (LHS.getValueType() == MVT::f32) { 2439 LHS = bitcastf32Toi32(LHS, DAG); 2440 RHS = bitcastf32Toi32(RHS, DAG); 2441 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2442 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2443 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2444 Chain, Dest, ARMcc, CCR, Cmp); 2445 } 2446 2447 SDValue LHS1, LHS2; 2448 SDValue RHS1, RHS2; 2449 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2450 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2451 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2452 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2453 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2454 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2455 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2456 } 2457 2458 return SDValue(); 2459} 2460 2461SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2462 SDValue Chain = Op.getOperand(0); 2463 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2464 SDValue LHS = Op.getOperand(2); 2465 SDValue RHS = Op.getOperand(3); 2466 SDValue Dest = Op.getOperand(4); 2467 DebugLoc dl = Op.getDebugLoc(); 2468 2469 if (LHS.getValueType() == MVT::i32) { 2470 SDValue ARMcc; 2471 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2472 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2473 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2474 Chain, Dest, ARMcc, CCR, Cmp); 2475 } 2476 2477 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2478 2479 if (UnsafeFPMath && 2480 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2481 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2482 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2483 if (Result.getNode()) 2484 return Result; 2485 } 2486 2487 ARMCC::CondCodes CondCode, CondCode2; 2488 FPCCToARMCC(CC, CondCode, CondCode2); 2489 2490 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2491 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2492 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2493 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2494 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2495 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2496 if (CondCode2 != ARMCC::AL) { 2497 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2498 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2499 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2500 } 2501 return Res; 2502} 2503 2504SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2505 SDValue Chain = Op.getOperand(0); 2506 SDValue Table = Op.getOperand(1); 2507 SDValue Index = Op.getOperand(2); 2508 DebugLoc dl = Op.getDebugLoc(); 2509 2510 EVT PTy = getPointerTy(); 2511 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2512 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2513 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2514 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2515 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2516 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2517 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2518 if (Subtarget->isThumb2()) { 2519 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2520 // which does another jump to the destination. This also makes it easier 2521 // to translate it to TBB / TBH later. 2522 // FIXME: This might not work if the function is extremely large. 2523 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2524 Addr, Op.getOperand(2), JTI, UId); 2525 } 2526 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2527 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2528 PseudoSourceValue::getJumpTable(), 0, 2529 false, false, 0); 2530 Chain = Addr.getValue(1); 2531 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2532 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2533 } else { 2534 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2535 PseudoSourceValue::getJumpTable(), 0, false, false, 0); 2536 Chain = Addr.getValue(1); 2537 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2538 } 2539} 2540 2541static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2542 DebugLoc dl = Op.getDebugLoc(); 2543 unsigned Opc; 2544 2545 switch (Op.getOpcode()) { 2546 default: 2547 assert(0 && "Invalid opcode!"); 2548 case ISD::FP_TO_SINT: 2549 Opc = ARMISD::FTOSI; 2550 break; 2551 case ISD::FP_TO_UINT: 2552 Opc = ARMISD::FTOUI; 2553 break; 2554 } 2555 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2556 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 2557} 2558 2559static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2560 EVT VT = Op.getValueType(); 2561 DebugLoc dl = Op.getDebugLoc(); 2562 unsigned Opc; 2563 2564 switch (Op.getOpcode()) { 2565 default: 2566 assert(0 && "Invalid opcode!"); 2567 case ISD::SINT_TO_FP: 2568 Opc = ARMISD::SITOF; 2569 break; 2570 case ISD::UINT_TO_FP: 2571 Opc = ARMISD::UITOF; 2572 break; 2573 } 2574 2575 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 2576 return DAG.getNode(Opc, dl, VT, Op); 2577} 2578 2579SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 2580 // Implement fcopysign with a fabs and a conditional fneg. 2581 SDValue Tmp0 = Op.getOperand(0); 2582 SDValue Tmp1 = Op.getOperand(1); 2583 DebugLoc dl = Op.getDebugLoc(); 2584 EVT VT = Op.getValueType(); 2585 EVT SrcVT = Tmp1.getValueType(); 2586 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 2587 SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); 2588 SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); 2589 SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); 2590 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2591 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); 2592} 2593 2594SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 2595 MachineFunction &MF = DAG.getMachineFunction(); 2596 MachineFrameInfo *MFI = MF.getFrameInfo(); 2597 MFI->setReturnAddressIsTaken(true); 2598 2599 EVT VT = Op.getValueType(); 2600 DebugLoc dl = Op.getDebugLoc(); 2601 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2602 if (Depth) { 2603 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 2604 SDValue Offset = DAG.getConstant(4, MVT::i32); 2605 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 2606 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 2607 NULL, 0, false, false, 0); 2608 } 2609 2610 // Return LR, which contains the return address. Mark it an implicit live-in. 2611 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 2612 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 2613} 2614 2615SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 2616 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 2617 MFI->setFrameAddressIsTaken(true); 2618 2619 EVT VT = Op.getValueType(); 2620 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 2621 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2622 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 2623 ? ARM::R7 : ARM::R11; 2624 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 2625 while (Depth--) 2626 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, 2627 false, false, 0); 2628 return FrameAddr; 2629} 2630 2631/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to 2632/// expand a bit convert where either the source or destination type is i64 to 2633/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 2634/// operand type is illegal (e.g., v2f32 for a target that doesn't support 2635/// vectors), since the legalizer won't know what to do with that. 2636static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2637 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2638 DebugLoc dl = N->getDebugLoc(); 2639 SDValue Op = N->getOperand(0); 2640 2641 // This function is only supposed to be called for i64 types, either as the 2642 // source or destination of the bit convert. 2643 EVT SrcVT = Op.getValueType(); 2644 EVT DstVT = N->getValueType(0); 2645 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 2646 "ExpandBIT_CONVERT called for non-i64 type"); 2647 2648 // Turn i64->f64 into VMOVDRR. 2649 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 2650 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2651 DAG.getConstant(0, MVT::i32)); 2652 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2653 DAG.getConstant(1, MVT::i32)); 2654 return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, 2655 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 2656 } 2657 2658 // Turn f64->i64 into VMOVRRD. 2659 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 2660 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2661 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2662 // Merge the pieces into a single i64 value. 2663 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2664 } 2665 2666 return SDValue(); 2667} 2668 2669/// getZeroVector - Returns a vector of specified type with all zero elements. 2670/// Zero vectors are used to represent vector negation and in those cases 2671/// will be implemented with the NEON VNEG instruction. However, VNEG does 2672/// not support i64 elements, so sometimes the zero vectors will need to be 2673/// explicitly constructed. Regardless, use a canonical VMOV to create the 2674/// zero vector. 2675static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2676 assert(VT.isVector() && "Expected a vector type"); 2677 // The canonical modified immediate encoding of a zero vector is....0! 2678 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 2679 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 2680 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 2681 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 2682} 2683 2684/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2685/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2686SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 2687 SelectionDAG &DAG) const { 2688 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2689 EVT VT = Op.getValueType(); 2690 unsigned VTBits = VT.getSizeInBits(); 2691 DebugLoc dl = Op.getDebugLoc(); 2692 SDValue ShOpLo = Op.getOperand(0); 2693 SDValue ShOpHi = Op.getOperand(1); 2694 SDValue ShAmt = Op.getOperand(2); 2695 SDValue ARMcc; 2696 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2697 2698 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2699 2700 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2701 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2702 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2703 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2704 DAG.getConstant(VTBits, MVT::i32)); 2705 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2706 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2707 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2708 2709 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2710 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2711 ARMcc, DAG, dl); 2712 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2713 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 2714 CCR, Cmp); 2715 2716 SDValue Ops[2] = { Lo, Hi }; 2717 return DAG.getMergeValues(Ops, 2, dl); 2718} 2719 2720/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2721/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2722SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 2723 SelectionDAG &DAG) const { 2724 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2725 EVT VT = Op.getValueType(); 2726 unsigned VTBits = VT.getSizeInBits(); 2727 DebugLoc dl = Op.getDebugLoc(); 2728 SDValue ShOpLo = Op.getOperand(0); 2729 SDValue ShOpHi = Op.getOperand(1); 2730 SDValue ShAmt = Op.getOperand(2); 2731 SDValue ARMcc; 2732 2733 assert(Op.getOpcode() == ISD::SHL_PARTS); 2734 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2735 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2736 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2737 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2738 DAG.getConstant(VTBits, MVT::i32)); 2739 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2740 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2741 2742 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2743 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2744 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2745 ARMcc, DAG, dl); 2746 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2747 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 2748 CCR, Cmp); 2749 2750 SDValue Ops[2] = { Lo, Hi }; 2751 return DAG.getMergeValues(Ops, 2, dl); 2752} 2753 2754static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 2755 const ARMSubtarget *ST) { 2756 EVT VT = N->getValueType(0); 2757 DebugLoc dl = N->getDebugLoc(); 2758 2759 if (!ST->hasV6T2Ops()) 2760 return SDValue(); 2761 2762 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 2763 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 2764} 2765 2766static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2767 const ARMSubtarget *ST) { 2768 EVT VT = N->getValueType(0); 2769 DebugLoc dl = N->getDebugLoc(); 2770 2771 // Lower vector shifts on NEON to use VSHL. 2772 if (VT.isVector()) { 2773 assert(ST->hasNEON() && "unexpected vector shift"); 2774 2775 // Left shifts translate directly to the vshiftu intrinsic. 2776 if (N->getOpcode() == ISD::SHL) 2777 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2778 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2779 N->getOperand(0), N->getOperand(1)); 2780 2781 assert((N->getOpcode() == ISD::SRA || 2782 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2783 2784 // NEON uses the same intrinsics for both left and right shifts. For 2785 // right shifts, the shift amounts are negative, so negate the vector of 2786 // shift amounts. 2787 EVT ShiftVT = N->getOperand(1).getValueType(); 2788 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2789 getZeroVector(ShiftVT, DAG, dl), 2790 N->getOperand(1)); 2791 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2792 Intrinsic::arm_neon_vshifts : 2793 Intrinsic::arm_neon_vshiftu); 2794 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2795 DAG.getConstant(vshiftInt, MVT::i32), 2796 N->getOperand(0), NegatedCount); 2797 } 2798 2799 // We can get here for a node like i32 = ISD::SHL i32, i64 2800 if (VT != MVT::i64) 2801 return SDValue(); 2802 2803 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2804 "Unknown shift to lower!"); 2805 2806 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2807 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2808 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2809 return SDValue(); 2810 2811 // If we are in thumb mode, we don't have RRX. 2812 if (ST->isThumb1Only()) return SDValue(); 2813 2814 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2815 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2816 DAG.getConstant(0, MVT::i32)); 2817 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2818 DAG.getConstant(1, MVT::i32)); 2819 2820 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2821 // captures the result into a carry flag. 2822 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2823 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2824 2825 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2826 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2827 2828 // Merge the pieces into a single i64 value. 2829 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2830} 2831 2832static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2833 SDValue TmpOp0, TmpOp1; 2834 bool Invert = false; 2835 bool Swap = false; 2836 unsigned Opc = 0; 2837 2838 SDValue Op0 = Op.getOperand(0); 2839 SDValue Op1 = Op.getOperand(1); 2840 SDValue CC = Op.getOperand(2); 2841 EVT VT = Op.getValueType(); 2842 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2843 DebugLoc dl = Op.getDebugLoc(); 2844 2845 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2846 switch (SetCCOpcode) { 2847 default: llvm_unreachable("Illegal FP comparison"); break; 2848 case ISD::SETUNE: 2849 case ISD::SETNE: Invert = true; // Fallthrough 2850 case ISD::SETOEQ: 2851 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2852 case ISD::SETOLT: 2853 case ISD::SETLT: Swap = true; // Fallthrough 2854 case ISD::SETOGT: 2855 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2856 case ISD::SETOLE: 2857 case ISD::SETLE: Swap = true; // Fallthrough 2858 case ISD::SETOGE: 2859 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2860 case ISD::SETUGE: Swap = true; // Fallthrough 2861 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2862 case ISD::SETUGT: Swap = true; // Fallthrough 2863 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2864 case ISD::SETUEQ: Invert = true; // Fallthrough 2865 case ISD::SETONE: 2866 // Expand this to (OLT | OGT). 2867 TmpOp0 = Op0; 2868 TmpOp1 = Op1; 2869 Opc = ISD::OR; 2870 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2871 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2872 break; 2873 case ISD::SETUO: Invert = true; // Fallthrough 2874 case ISD::SETO: 2875 // Expand this to (OLT | OGE). 2876 TmpOp0 = Op0; 2877 TmpOp1 = Op1; 2878 Opc = ISD::OR; 2879 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2880 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2881 break; 2882 } 2883 } else { 2884 // Integer comparisons. 2885 switch (SetCCOpcode) { 2886 default: llvm_unreachable("Illegal integer comparison"); break; 2887 case ISD::SETNE: Invert = true; 2888 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2889 case ISD::SETLT: Swap = true; 2890 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2891 case ISD::SETLE: Swap = true; 2892 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2893 case ISD::SETULT: Swap = true; 2894 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2895 case ISD::SETULE: Swap = true; 2896 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2897 } 2898 2899 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2900 if (Opc == ARMISD::VCEQ) { 2901 2902 SDValue AndOp; 2903 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2904 AndOp = Op0; 2905 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 2906 AndOp = Op1; 2907 2908 // Ignore bitconvert. 2909 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 2910 AndOp = AndOp.getOperand(0); 2911 2912 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 2913 Opc = ARMISD::VTST; 2914 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 2915 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 2916 Invert = !Invert; 2917 } 2918 } 2919 } 2920 2921 if (Swap) 2922 std::swap(Op0, Op1); 2923 2924 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 2925 2926 if (Invert) 2927 Result = DAG.getNOT(dl, Result, VT); 2928 2929 return Result; 2930} 2931 2932/// isNEONModifiedImm - Check if the specified splat value corresponds to a 2933/// valid vector constant for a NEON instruction with a "modified immediate" 2934/// operand (e.g., VMOV). If so, return the encoded value. 2935static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 2936 unsigned SplatBitSize, SelectionDAG &DAG, 2937 EVT &VT, bool is128Bits, bool isVMOV) { 2938 unsigned OpCmode, Imm; 2939 2940 // SplatBitSize is set to the smallest size that splats the vector, so a 2941 // zero vector will always have SplatBitSize == 8. However, NEON modified 2942 // immediate instructions others than VMOV do not support the 8-bit encoding 2943 // of a zero vector, and the default encoding of zero is supposed to be the 2944 // 32-bit version. 2945 if (SplatBits == 0) 2946 SplatBitSize = 32; 2947 2948 switch (SplatBitSize) { 2949 case 8: 2950 if (!isVMOV) 2951 return SDValue(); 2952 // Any 1-byte value is OK. Op=0, Cmode=1110. 2953 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 2954 OpCmode = 0xe; 2955 Imm = SplatBits; 2956 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 2957 break; 2958 2959 case 16: 2960 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 2961 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 2962 if ((SplatBits & ~0xff) == 0) { 2963 // Value = 0x00nn: Op=x, Cmode=100x. 2964 OpCmode = 0x8; 2965 Imm = SplatBits; 2966 break; 2967 } 2968 if ((SplatBits & ~0xff00) == 0) { 2969 // Value = 0xnn00: Op=x, Cmode=101x. 2970 OpCmode = 0xa; 2971 Imm = SplatBits >> 8; 2972 break; 2973 } 2974 return SDValue(); 2975 2976 case 32: 2977 // NEON's 32-bit VMOV supports splat values where: 2978 // * only one byte is nonzero, or 2979 // * the least significant byte is 0xff and the second byte is nonzero, or 2980 // * the least significant 2 bytes are 0xff and the third is nonzero. 2981 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 2982 if ((SplatBits & ~0xff) == 0) { 2983 // Value = 0x000000nn: Op=x, Cmode=000x. 2984 OpCmode = 0; 2985 Imm = SplatBits; 2986 break; 2987 } 2988 if ((SplatBits & ~0xff00) == 0) { 2989 // Value = 0x0000nn00: Op=x, Cmode=001x. 2990 OpCmode = 0x2; 2991 Imm = SplatBits >> 8; 2992 break; 2993 } 2994 if ((SplatBits & ~0xff0000) == 0) { 2995 // Value = 0x00nn0000: Op=x, Cmode=010x. 2996 OpCmode = 0x4; 2997 Imm = SplatBits >> 16; 2998 break; 2999 } 3000 if ((SplatBits & ~0xff000000) == 0) { 3001 // Value = 0xnn000000: Op=x, Cmode=011x. 3002 OpCmode = 0x6; 3003 Imm = SplatBits >> 24; 3004 break; 3005 } 3006 3007 if ((SplatBits & ~0xffff) == 0 && 3008 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3009 // Value = 0x0000nnff: Op=x, Cmode=1100. 3010 OpCmode = 0xc; 3011 Imm = SplatBits >> 8; 3012 SplatBits |= 0xff; 3013 break; 3014 } 3015 3016 if ((SplatBits & ~0xffffff) == 0 && 3017 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3018 // Value = 0x00nnffff: Op=x, Cmode=1101. 3019 OpCmode = 0xd; 3020 Imm = SplatBits >> 16; 3021 SplatBits |= 0xffff; 3022 break; 3023 } 3024 3025 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3026 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3027 // VMOV.I32. A (very) minor optimization would be to replicate the value 3028 // and fall through here to test for a valid 64-bit splat. But, then the 3029 // caller would also need to check and handle the change in size. 3030 return SDValue(); 3031 3032 case 64: { 3033 if (!isVMOV) 3034 return SDValue(); 3035 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3036 uint64_t BitMask = 0xff; 3037 uint64_t Val = 0; 3038 unsigned ImmMask = 1; 3039 Imm = 0; 3040 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3041 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3042 Val |= BitMask; 3043 Imm |= ImmMask; 3044 } else if ((SplatBits & BitMask) != 0) { 3045 return SDValue(); 3046 } 3047 BitMask <<= 8; 3048 ImmMask <<= 1; 3049 } 3050 // Op=1, Cmode=1110. 3051 OpCmode = 0x1e; 3052 SplatBits = Val; 3053 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3054 break; 3055 } 3056 3057 default: 3058 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3059 return SDValue(); 3060 } 3061 3062 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3063 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3064} 3065 3066static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3067 bool &ReverseVEXT, unsigned &Imm) { 3068 unsigned NumElts = VT.getVectorNumElements(); 3069 ReverseVEXT = false; 3070 Imm = M[0]; 3071 3072 // If this is a VEXT shuffle, the immediate value is the index of the first 3073 // element. The other shuffle indices must be the successive elements after 3074 // the first one. 3075 unsigned ExpectedElt = Imm; 3076 for (unsigned i = 1; i < NumElts; ++i) { 3077 // Increment the expected index. If it wraps around, it may still be 3078 // a VEXT but the source vectors must be swapped. 3079 ExpectedElt += 1; 3080 if (ExpectedElt == NumElts * 2) { 3081 ExpectedElt = 0; 3082 ReverseVEXT = true; 3083 } 3084 3085 if (ExpectedElt != static_cast<unsigned>(M[i])) 3086 return false; 3087 } 3088 3089 // Adjust the index value if the source operands will be swapped. 3090 if (ReverseVEXT) 3091 Imm -= NumElts; 3092 3093 return true; 3094} 3095 3096/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3097/// instruction with the specified blocksize. (The order of the elements 3098/// within each block of the vector is reversed.) 3099static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3100 unsigned BlockSize) { 3101 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3102 "Only possible block sizes for VREV are: 16, 32, 64"); 3103 3104 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3105 if (EltSz == 64) 3106 return false; 3107 3108 unsigned NumElts = VT.getVectorNumElements(); 3109 unsigned BlockElts = M[0] + 1; 3110 3111 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3112 return false; 3113 3114 for (unsigned i = 0; i < NumElts; ++i) { 3115 if ((unsigned) M[i] != 3116 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3117 return false; 3118 } 3119 3120 return true; 3121} 3122 3123static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3124 unsigned &WhichResult) { 3125 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3126 if (EltSz == 64) 3127 return false; 3128 3129 unsigned NumElts = VT.getVectorNumElements(); 3130 WhichResult = (M[0] == 0 ? 0 : 1); 3131 for (unsigned i = 0; i < NumElts; i += 2) { 3132 if ((unsigned) M[i] != i + WhichResult || 3133 (unsigned) M[i+1] != i + NumElts + WhichResult) 3134 return false; 3135 } 3136 return true; 3137} 3138 3139/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3140/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3141/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3142static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3143 unsigned &WhichResult) { 3144 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3145 if (EltSz == 64) 3146 return false; 3147 3148 unsigned NumElts = VT.getVectorNumElements(); 3149 WhichResult = (M[0] == 0 ? 0 : 1); 3150 for (unsigned i = 0; i < NumElts; i += 2) { 3151 if ((unsigned) M[i] != i + WhichResult || 3152 (unsigned) M[i+1] != i + WhichResult) 3153 return false; 3154 } 3155 return true; 3156} 3157 3158static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3159 unsigned &WhichResult) { 3160 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3161 if (EltSz == 64) 3162 return false; 3163 3164 unsigned NumElts = VT.getVectorNumElements(); 3165 WhichResult = (M[0] == 0 ? 0 : 1); 3166 for (unsigned i = 0; i != NumElts; ++i) { 3167 if ((unsigned) M[i] != 2 * i + WhichResult) 3168 return false; 3169 } 3170 3171 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3172 if (VT.is64BitVector() && EltSz == 32) 3173 return false; 3174 3175 return true; 3176} 3177 3178/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3179/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3180/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3181static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3182 unsigned &WhichResult) { 3183 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3184 if (EltSz == 64) 3185 return false; 3186 3187 unsigned Half = VT.getVectorNumElements() / 2; 3188 WhichResult = (M[0] == 0 ? 0 : 1); 3189 for (unsigned j = 0; j != 2; ++j) { 3190 unsigned Idx = WhichResult; 3191 for (unsigned i = 0; i != Half; ++i) { 3192 if ((unsigned) M[i + j * Half] != Idx) 3193 return false; 3194 Idx += 2; 3195 } 3196 } 3197 3198 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3199 if (VT.is64BitVector() && EltSz == 32) 3200 return false; 3201 3202 return true; 3203} 3204 3205static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3206 unsigned &WhichResult) { 3207 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3208 if (EltSz == 64) 3209 return false; 3210 3211 unsigned NumElts = VT.getVectorNumElements(); 3212 WhichResult = (M[0] == 0 ? 0 : 1); 3213 unsigned Idx = WhichResult * NumElts / 2; 3214 for (unsigned i = 0; i != NumElts; i += 2) { 3215 if ((unsigned) M[i] != Idx || 3216 (unsigned) M[i+1] != Idx + NumElts) 3217 return false; 3218 Idx += 1; 3219 } 3220 3221 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3222 if (VT.is64BitVector() && EltSz == 32) 3223 return false; 3224 3225 return true; 3226} 3227 3228/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3229/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3230/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3231static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3232 unsigned &WhichResult) { 3233 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3234 if (EltSz == 64) 3235 return false; 3236 3237 unsigned NumElts = VT.getVectorNumElements(); 3238 WhichResult = (M[0] == 0 ? 0 : 1); 3239 unsigned Idx = WhichResult * NumElts / 2; 3240 for (unsigned i = 0; i != NumElts; i += 2) { 3241 if ((unsigned) M[i] != Idx || 3242 (unsigned) M[i+1] != Idx) 3243 return false; 3244 Idx += 1; 3245 } 3246 3247 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3248 if (VT.is64BitVector() && EltSz == 32) 3249 return false; 3250 3251 return true; 3252} 3253 3254// If this is a case we can't handle, return null and let the default 3255// expansion code take care of it. 3256static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 3257 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3258 DebugLoc dl = Op.getDebugLoc(); 3259 EVT VT = Op.getValueType(); 3260 3261 APInt SplatBits, SplatUndef; 3262 unsigned SplatBitSize; 3263 bool HasAnyUndefs; 3264 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3265 if (SplatBitSize <= 64) { 3266 // Check if an immediate VMOV works. 3267 EVT VmovVT; 3268 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3269 SplatUndef.getZExtValue(), SplatBitSize, 3270 DAG, VmovVT, VT.is128BitVector(), true); 3271 if (Val.getNode()) { 3272 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3273 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3274 } 3275 3276 // Try an immediate VMVN. 3277 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3278 ((1LL << SplatBitSize) - 1)); 3279 Val = isNEONModifiedImm(NegatedImm, 3280 SplatUndef.getZExtValue(), SplatBitSize, 3281 DAG, VmovVT, VT.is128BitVector(), false); 3282 if (Val.getNode()) { 3283 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3284 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3285 } 3286 } 3287 } 3288 3289 // Scan through the operands to see if only one value is used. 3290 unsigned NumElts = VT.getVectorNumElements(); 3291 bool isOnlyLowElement = true; 3292 bool usesOnlyOneValue = true; 3293 bool isConstant = true; 3294 SDValue Value; 3295 for (unsigned i = 0; i < NumElts; ++i) { 3296 SDValue V = Op.getOperand(i); 3297 if (V.getOpcode() == ISD::UNDEF) 3298 continue; 3299 if (i > 0) 3300 isOnlyLowElement = false; 3301 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3302 isConstant = false; 3303 3304 if (!Value.getNode()) 3305 Value = V; 3306 else if (V != Value) 3307 usesOnlyOneValue = false; 3308 } 3309 3310 if (!Value.getNode()) 3311 return DAG.getUNDEF(VT); 3312 3313 if (isOnlyLowElement) 3314 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3315 3316 // If all elements are constants, fall back to the default expansion, which 3317 // will generate a load from the constant pool. 3318 if (isConstant) 3319 return SDValue(); 3320 3321 // Use VDUP for non-constant splats. 3322 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3323 if (usesOnlyOneValue && EltSize <= 32) 3324 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3325 3326 // Vectors with 32- or 64-bit elements can be built by directly assigning 3327 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 3328 // will be legalized. 3329 if (EltSize >= 32) { 3330 // Do the expansion with floating-point types, since that is what the VFP 3331 // registers are defined to use, and since i64 is not legal. 3332 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3333 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3334 SmallVector<SDValue, 8> Ops; 3335 for (unsigned i = 0; i < NumElts; ++i) 3336 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); 3337 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3338 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3339 } 3340 3341 return SDValue(); 3342} 3343 3344/// isShuffleMaskLegal - Targets can use this to indicate that they only 3345/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3346/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3347/// are assumed to be legal. 3348bool 3349ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 3350 EVT VT) const { 3351 if (VT.getVectorNumElements() == 4 && 3352 (VT.is128BitVector() || VT.is64BitVector())) { 3353 unsigned PFIndexes[4]; 3354 for (unsigned i = 0; i != 4; ++i) { 3355 if (M[i] < 0) 3356 PFIndexes[i] = 8; 3357 else 3358 PFIndexes[i] = M[i]; 3359 } 3360 3361 // Compute the index in the perfect shuffle table. 3362 unsigned PFTableIndex = 3363 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3364 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3365 unsigned Cost = (PFEntry >> 30); 3366 3367 if (Cost <= 4) 3368 return true; 3369 } 3370 3371 bool ReverseVEXT; 3372 unsigned Imm, WhichResult; 3373 3374 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3375 return (EltSize >= 32 || 3376 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 3377 isVREVMask(M, VT, 64) || 3378 isVREVMask(M, VT, 32) || 3379 isVREVMask(M, VT, 16) || 3380 isVEXTMask(M, VT, ReverseVEXT, Imm) || 3381 isVTRNMask(M, VT, WhichResult) || 3382 isVUZPMask(M, VT, WhichResult) || 3383 isVZIPMask(M, VT, WhichResult) || 3384 isVTRN_v_undef_Mask(M, VT, WhichResult) || 3385 isVUZP_v_undef_Mask(M, VT, WhichResult) || 3386 isVZIP_v_undef_Mask(M, VT, WhichResult)); 3387} 3388 3389/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3390/// the specified operations to build the shuffle. 3391static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 3392 SDValue RHS, SelectionDAG &DAG, 3393 DebugLoc dl) { 3394 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3395 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3396 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3397 3398 enum { 3399 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3400 OP_VREV, 3401 OP_VDUP0, 3402 OP_VDUP1, 3403 OP_VDUP2, 3404 OP_VDUP3, 3405 OP_VEXT1, 3406 OP_VEXT2, 3407 OP_VEXT3, 3408 OP_VUZPL, // VUZP, left result 3409 OP_VUZPR, // VUZP, right result 3410 OP_VZIPL, // VZIP, left result 3411 OP_VZIPR, // VZIP, right result 3412 OP_VTRNL, // VTRN, left result 3413 OP_VTRNR // VTRN, right result 3414 }; 3415 3416 if (OpNum == OP_COPY) { 3417 if (LHSID == (1*9+2)*9+3) return LHS; 3418 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3419 return RHS; 3420 } 3421 3422 SDValue OpLHS, OpRHS; 3423 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 3424 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 3425 EVT VT = OpLHS.getValueType(); 3426 3427 switch (OpNum) { 3428 default: llvm_unreachable("Unknown shuffle opcode!"); 3429 case OP_VREV: 3430 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 3431 case OP_VDUP0: 3432 case OP_VDUP1: 3433 case OP_VDUP2: 3434 case OP_VDUP3: 3435 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 3436 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 3437 case OP_VEXT1: 3438 case OP_VEXT2: 3439 case OP_VEXT3: 3440 return DAG.getNode(ARMISD::VEXT, dl, VT, 3441 OpLHS, OpRHS, 3442 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 3443 case OP_VUZPL: 3444 case OP_VUZPR: 3445 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3446 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 3447 case OP_VZIPL: 3448 case OP_VZIPR: 3449 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3450 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 3451 case OP_VTRNL: 3452 case OP_VTRNR: 3453 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3454 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 3455 } 3456} 3457 3458static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3459 SDValue V1 = Op.getOperand(0); 3460 SDValue V2 = Op.getOperand(1); 3461 DebugLoc dl = Op.getDebugLoc(); 3462 EVT VT = Op.getValueType(); 3463 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3464 SmallVector<int, 8> ShuffleMask; 3465 3466 // Convert shuffles that are directly supported on NEON to target-specific 3467 // DAG nodes, instead of keeping them as shuffles and matching them again 3468 // during code selection. This is more efficient and avoids the possibility 3469 // of inconsistencies between legalization and selection. 3470 // FIXME: floating-point vectors should be canonicalized to integer vectors 3471 // of the same time so that they get CSEd properly. 3472 SVN->getMask(ShuffleMask); 3473 3474 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3475 if (EltSize <= 32) { 3476 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 3477 int Lane = SVN->getSplatIndex(); 3478 // If this is undef splat, generate it via "just" vdup, if possible. 3479 if (Lane == -1) Lane = 0; 3480 3481 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 3482 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 3483 } 3484 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 3485 DAG.getConstant(Lane, MVT::i32)); 3486 } 3487 3488 bool ReverseVEXT; 3489 unsigned Imm; 3490 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 3491 if (ReverseVEXT) 3492 std::swap(V1, V2); 3493 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 3494 DAG.getConstant(Imm, MVT::i32)); 3495 } 3496 3497 if (isVREVMask(ShuffleMask, VT, 64)) 3498 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 3499 if (isVREVMask(ShuffleMask, VT, 32)) 3500 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 3501 if (isVREVMask(ShuffleMask, VT, 16)) 3502 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 3503 3504 // Check for Neon shuffles that modify both input vectors in place. 3505 // If both results are used, i.e., if there are two shuffles with the same 3506 // source operands and with masks corresponding to both results of one of 3507 // these operations, DAG memoization will ensure that a single node is 3508 // used for both shuffles. 3509 unsigned WhichResult; 3510 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 3511 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3512 V1, V2).getValue(WhichResult); 3513 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 3514 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3515 V1, V2).getValue(WhichResult); 3516 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 3517 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3518 V1, V2).getValue(WhichResult); 3519 3520 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3521 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3522 V1, V1).getValue(WhichResult); 3523 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3524 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3525 V1, V1).getValue(WhichResult); 3526 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3527 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3528 V1, V1).getValue(WhichResult); 3529 } 3530 3531 // If the shuffle is not directly supported and it has 4 elements, use 3532 // the PerfectShuffle-generated table to synthesize it from other shuffles. 3533 unsigned NumElts = VT.getVectorNumElements(); 3534 if (NumElts == 4) { 3535 unsigned PFIndexes[4]; 3536 for (unsigned i = 0; i != 4; ++i) { 3537 if (ShuffleMask[i] < 0) 3538 PFIndexes[i] = 8; 3539 else 3540 PFIndexes[i] = ShuffleMask[i]; 3541 } 3542 3543 // Compute the index in the perfect shuffle table. 3544 unsigned PFTableIndex = 3545 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3546 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3547 unsigned Cost = (PFEntry >> 30); 3548 3549 if (Cost <= 4) 3550 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 3551 } 3552 3553 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 3554 if (EltSize >= 32) { 3555 // Do the expansion with floating-point types, since that is what the VFP 3556 // registers are defined to use, and since i64 is not legal. 3557 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3558 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3559 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); 3560 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); 3561 SmallVector<SDValue, 8> Ops; 3562 for (unsigned i = 0; i < NumElts; ++i) { 3563 if (ShuffleMask[i] < 0) 3564 Ops.push_back(DAG.getUNDEF(EltVT)); 3565 else 3566 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 3567 ShuffleMask[i] < (int)NumElts ? V1 : V2, 3568 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 3569 MVT::i32))); 3570 } 3571 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3572 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3573 } 3574 3575 return SDValue(); 3576} 3577 3578static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 3579 EVT VT = Op.getValueType(); 3580 DebugLoc dl = Op.getDebugLoc(); 3581 SDValue Vec = Op.getOperand(0); 3582 SDValue Lane = Op.getOperand(1); 3583 assert(VT == MVT::i32 && 3584 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 3585 "unexpected type for custom-lowering vector extract"); 3586 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 3587} 3588 3589static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3590 // The only time a CONCAT_VECTORS operation can have legal types is when 3591 // two 64-bit vectors are concatenated to a 128-bit vector. 3592 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 3593 "unexpected CONCAT_VECTORS"); 3594 DebugLoc dl = Op.getDebugLoc(); 3595 SDValue Val = DAG.getUNDEF(MVT::v2f64); 3596 SDValue Op0 = Op.getOperand(0); 3597 SDValue Op1 = Op.getOperand(1); 3598 if (Op0.getOpcode() != ISD::UNDEF) 3599 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3600 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 3601 DAG.getIntPtrConstant(0)); 3602 if (Op1.getOpcode() != ISD::UNDEF) 3603 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3604 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 3605 DAG.getIntPtrConstant(1)); 3606 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 3607} 3608 3609SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 3610 switch (Op.getOpcode()) { 3611 default: llvm_unreachable("Don't know how to custom lower this!"); 3612 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3613 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 3614 case ISD::GlobalAddress: 3615 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 3616 LowerGlobalAddressELF(Op, DAG); 3617 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3618 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3619 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 3620 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 3621 case ISD::VASTART: return LowerVASTART(Op, DAG); 3622 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 3623 case ISD::SINT_TO_FP: 3624 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 3625 case ISD::FP_TO_SINT: 3626 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 3627 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 3628 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3629 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3630 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 3631 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 3632 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 3633 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 3634 Subtarget); 3635 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 3636 case ISD::SHL: 3637 case ISD::SRL: 3638 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 3639 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 3640 case ISD::SRL_PARTS: 3641 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 3642 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 3643 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 3644 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3645 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3646 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3647 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 3648 } 3649 return SDValue(); 3650} 3651 3652/// ReplaceNodeResults - Replace the results of node with an illegal result 3653/// type with new values built out of custom code. 3654void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3655 SmallVectorImpl<SDValue>&Results, 3656 SelectionDAG &DAG) const { 3657 SDValue Res; 3658 switch (N->getOpcode()) { 3659 default: 3660 llvm_unreachable("Don't know how to custom expand this!"); 3661 break; 3662 case ISD::BIT_CONVERT: 3663 Res = ExpandBIT_CONVERT(N, DAG); 3664 break; 3665 case ISD::SRL: 3666 case ISD::SRA: 3667 Res = LowerShift(N, DAG, Subtarget); 3668 break; 3669 } 3670 if (Res.getNode()) 3671 Results.push_back(Res); 3672} 3673 3674//===----------------------------------------------------------------------===// 3675// ARM Scheduler Hooks 3676//===----------------------------------------------------------------------===// 3677 3678MachineBasicBlock * 3679ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 3680 MachineBasicBlock *BB, 3681 unsigned Size) const { 3682 unsigned dest = MI->getOperand(0).getReg(); 3683 unsigned ptr = MI->getOperand(1).getReg(); 3684 unsigned oldval = MI->getOperand(2).getReg(); 3685 unsigned newval = MI->getOperand(3).getReg(); 3686 unsigned scratch = BB->getParent()->getRegInfo() 3687 .createVirtualRegister(ARM::GPRRegisterClass); 3688 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3689 DebugLoc dl = MI->getDebugLoc(); 3690 bool isThumb2 = Subtarget->isThumb2(); 3691 3692 unsigned ldrOpc, strOpc; 3693 switch (Size) { 3694 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3695 case 1: 3696 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3697 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; 3698 break; 3699 case 2: 3700 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3701 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3702 break; 3703 case 4: 3704 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3705 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3706 break; 3707 } 3708 3709 MachineFunction *MF = BB->getParent(); 3710 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3711 MachineFunction::iterator It = BB; 3712 ++It; // insert the new blocks after the current block 3713 3714 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3715 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3716 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3717 MF->insert(It, loop1MBB); 3718 MF->insert(It, loop2MBB); 3719 MF->insert(It, exitMBB); 3720 3721 // Transfer the remainder of BB and its successor edges to exitMBB. 3722 exitMBB->splice(exitMBB->begin(), BB, 3723 llvm::next(MachineBasicBlock::iterator(MI)), 3724 BB->end()); 3725 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3726 3727 // thisMBB: 3728 // ... 3729 // fallthrough --> loop1MBB 3730 BB->addSuccessor(loop1MBB); 3731 3732 // loop1MBB: 3733 // ldrex dest, [ptr] 3734 // cmp dest, oldval 3735 // bne exitMBB 3736 BB = loop1MBB; 3737 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3738 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 3739 .addReg(dest).addReg(oldval)); 3740 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3741 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3742 BB->addSuccessor(loop2MBB); 3743 BB->addSuccessor(exitMBB); 3744 3745 // loop2MBB: 3746 // strex scratch, newval, [ptr] 3747 // cmp scratch, #0 3748 // bne loop1MBB 3749 BB = loop2MBB; 3750 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 3751 .addReg(ptr)); 3752 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3753 .addReg(scratch).addImm(0)); 3754 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3755 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3756 BB->addSuccessor(loop1MBB); 3757 BB->addSuccessor(exitMBB); 3758 3759 // exitMBB: 3760 // ... 3761 BB = exitMBB; 3762 3763 MI->eraseFromParent(); // The instruction is gone now. 3764 3765 return BB; 3766} 3767 3768MachineBasicBlock * 3769ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3770 unsigned Size, unsigned BinOpcode) const { 3771 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3772 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3773 3774 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3775 MachineFunction *MF = BB->getParent(); 3776 MachineFunction::iterator It = BB; 3777 ++It; 3778 3779 unsigned dest = MI->getOperand(0).getReg(); 3780 unsigned ptr = MI->getOperand(1).getReg(); 3781 unsigned incr = MI->getOperand(2).getReg(); 3782 DebugLoc dl = MI->getDebugLoc(); 3783 3784 bool isThumb2 = Subtarget->isThumb2(); 3785 unsigned ldrOpc, strOpc; 3786 switch (Size) { 3787 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3788 case 1: 3789 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3790 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 3791 break; 3792 case 2: 3793 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3794 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3795 break; 3796 case 4: 3797 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3798 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3799 break; 3800 } 3801 3802 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3803 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3804 MF->insert(It, loopMBB); 3805 MF->insert(It, exitMBB); 3806 3807 // Transfer the remainder of BB and its successor edges to exitMBB. 3808 exitMBB->splice(exitMBB->begin(), BB, 3809 llvm::next(MachineBasicBlock::iterator(MI)), 3810 BB->end()); 3811 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3812 3813 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3814 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3815 unsigned scratch2 = (!BinOpcode) ? incr : 3816 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3817 3818 // thisMBB: 3819 // ... 3820 // fallthrough --> loopMBB 3821 BB->addSuccessor(loopMBB); 3822 3823 // loopMBB: 3824 // ldrex dest, ptr 3825 // <binop> scratch2, dest, incr 3826 // strex scratch, scratch2, ptr 3827 // cmp scratch, #0 3828 // bne- loopMBB 3829 // fallthrough --> exitMBB 3830 BB = loopMBB; 3831 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3832 if (BinOpcode) { 3833 // operand order needs to go the other way for NAND 3834 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 3835 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3836 addReg(incr).addReg(dest)).addReg(0); 3837 else 3838 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3839 addReg(dest).addReg(incr)).addReg(0); 3840 } 3841 3842 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 3843 .addReg(ptr)); 3844 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3845 .addReg(scratch).addImm(0)); 3846 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3847 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3848 3849 BB->addSuccessor(loopMBB); 3850 BB->addSuccessor(exitMBB); 3851 3852 // exitMBB: 3853 // ... 3854 BB = exitMBB; 3855 3856 MI->eraseFromParent(); // The instruction is gone now. 3857 3858 return BB; 3859} 3860 3861static 3862MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 3863 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 3864 E = MBB->succ_end(); I != E; ++I) 3865 if (*I != Succ) 3866 return *I; 3867 llvm_unreachable("Expecting a BB with two successors!"); 3868} 3869 3870MachineBasicBlock * 3871ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3872 MachineBasicBlock *BB) const { 3873 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3874 DebugLoc dl = MI->getDebugLoc(); 3875 bool isThumb2 = Subtarget->isThumb2(); 3876 switch (MI->getOpcode()) { 3877 default: 3878 MI->dump(); 3879 llvm_unreachable("Unexpected instr type to insert"); 3880 3881 case ARM::ATOMIC_LOAD_ADD_I8: 3882 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3883 case ARM::ATOMIC_LOAD_ADD_I16: 3884 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3885 case ARM::ATOMIC_LOAD_ADD_I32: 3886 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 3887 3888 case ARM::ATOMIC_LOAD_AND_I8: 3889 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3890 case ARM::ATOMIC_LOAD_AND_I16: 3891 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3892 case ARM::ATOMIC_LOAD_AND_I32: 3893 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 3894 3895 case ARM::ATOMIC_LOAD_OR_I8: 3896 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3897 case ARM::ATOMIC_LOAD_OR_I16: 3898 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3899 case ARM::ATOMIC_LOAD_OR_I32: 3900 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 3901 3902 case ARM::ATOMIC_LOAD_XOR_I8: 3903 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3904 case ARM::ATOMIC_LOAD_XOR_I16: 3905 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3906 case ARM::ATOMIC_LOAD_XOR_I32: 3907 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 3908 3909 case ARM::ATOMIC_LOAD_NAND_I8: 3910 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3911 case ARM::ATOMIC_LOAD_NAND_I16: 3912 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3913 case ARM::ATOMIC_LOAD_NAND_I32: 3914 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 3915 3916 case ARM::ATOMIC_LOAD_SUB_I8: 3917 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3918 case ARM::ATOMIC_LOAD_SUB_I16: 3919 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3920 case ARM::ATOMIC_LOAD_SUB_I32: 3921 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 3922 3923 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 3924 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 3925 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 3926 3927 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 3928 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 3929 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 3930 3931 case ARM::tMOVCCr_pseudo: { 3932 // To "insert" a SELECT_CC instruction, we actually have to insert the 3933 // diamond control-flow pattern. The incoming instruction knows the 3934 // destination vreg to set, the condition code register to branch on, the 3935 // true/false values to select between, and a branch opcode to use. 3936 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3937 MachineFunction::iterator It = BB; 3938 ++It; 3939 3940 // thisMBB: 3941 // ... 3942 // TrueVal = ... 3943 // cmpTY ccX, r1, r2 3944 // bCC copy1MBB 3945 // fallthrough --> copy0MBB 3946 MachineBasicBlock *thisMBB = BB; 3947 MachineFunction *F = BB->getParent(); 3948 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 3949 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 3950 F->insert(It, copy0MBB); 3951 F->insert(It, sinkMBB); 3952 3953 // Transfer the remainder of BB and its successor edges to sinkMBB. 3954 sinkMBB->splice(sinkMBB->begin(), BB, 3955 llvm::next(MachineBasicBlock::iterator(MI)), 3956 BB->end()); 3957 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 3958 3959 BB->addSuccessor(copy0MBB); 3960 BB->addSuccessor(sinkMBB); 3961 3962 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 3963 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 3964 3965 // copy0MBB: 3966 // %FalseValue = ... 3967 // # fallthrough to sinkMBB 3968 BB = copy0MBB; 3969 3970 // Update machine-CFG edges 3971 BB->addSuccessor(sinkMBB); 3972 3973 // sinkMBB: 3974 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3975 // ... 3976 BB = sinkMBB; 3977 BuildMI(*BB, BB->begin(), dl, 3978 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 3979 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 3980 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3981 3982 MI->eraseFromParent(); // The pseudo instruction is gone now. 3983 return BB; 3984 } 3985 3986 case ARM::BCCi64: 3987 case ARM::BCCZi64: { 3988 // Compare both parts that make up the double comparison separately for 3989 // equality. 3990 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 3991 3992 unsigned LHS1 = MI->getOperand(1).getReg(); 3993 unsigned LHS2 = MI->getOperand(2).getReg(); 3994 if (RHSisZero) { 3995 AddDefaultPred(BuildMI(BB, dl, 3996 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3997 .addReg(LHS1).addImm(0)); 3998 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3999 .addReg(LHS2).addImm(0) 4000 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4001 } else { 4002 unsigned RHS1 = MI->getOperand(3).getReg(); 4003 unsigned RHS2 = MI->getOperand(4).getReg(); 4004 AddDefaultPred(BuildMI(BB, dl, 4005 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4006 .addReg(LHS1).addReg(RHS1)); 4007 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4008 .addReg(LHS2).addReg(RHS2) 4009 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4010 } 4011 4012 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 4013 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 4014 if (MI->getOperand(0).getImm() == ARMCC::NE) 4015 std::swap(destMBB, exitMBB); 4016 4017 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4018 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 4019 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) 4020 .addMBB(exitMBB); 4021 4022 MI->eraseFromParent(); // The pseudo instruction is gone now. 4023 return BB; 4024 } 4025 4026 case ARM::tANDsp: 4027 case ARM::tADDspr_: 4028 case ARM::tSUBspi_: 4029 case ARM::t2SUBrSPi_: 4030 case ARM::t2SUBrSPi12_: 4031 case ARM::t2SUBrSPs_: { 4032 MachineFunction *MF = BB->getParent(); 4033 unsigned DstReg = MI->getOperand(0).getReg(); 4034 unsigned SrcReg = MI->getOperand(1).getReg(); 4035 bool DstIsDead = MI->getOperand(0).isDead(); 4036 bool SrcIsKill = MI->getOperand(1).isKill(); 4037 4038 if (SrcReg != ARM::SP) { 4039 // Copy the source to SP from virtual register. 4040 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); 4041 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 4042 ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; 4043 BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) 4044 .addReg(SrcReg, getKillRegState(SrcIsKill)); 4045 } 4046 4047 unsigned OpOpc = 0; 4048 bool NeedPred = false, NeedCC = false, NeedOp3 = false; 4049 switch (MI->getOpcode()) { 4050 default: 4051 llvm_unreachable("Unexpected pseudo instruction!"); 4052 case ARM::tANDsp: 4053 OpOpc = ARM::tAND; 4054 NeedPred = true; 4055 break; 4056 case ARM::tADDspr_: 4057 OpOpc = ARM::tADDspr; 4058 break; 4059 case ARM::tSUBspi_: 4060 OpOpc = ARM::tSUBspi; 4061 break; 4062 case ARM::t2SUBrSPi_: 4063 OpOpc = ARM::t2SUBrSPi; 4064 NeedPred = true; NeedCC = true; 4065 break; 4066 case ARM::t2SUBrSPi12_: 4067 OpOpc = ARM::t2SUBrSPi12; 4068 NeedPred = true; 4069 break; 4070 case ARM::t2SUBrSPs_: 4071 OpOpc = ARM::t2SUBrSPs; 4072 NeedPred = true; NeedCC = true; NeedOp3 = true; 4073 break; 4074 } 4075 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); 4076 if (OpOpc == ARM::tAND) 4077 AddDefaultT1CC(MIB); 4078 MIB.addReg(ARM::SP); 4079 MIB.addOperand(MI->getOperand(2)); 4080 if (NeedOp3) 4081 MIB.addOperand(MI->getOperand(3)); 4082 if (NeedPred) 4083 AddDefaultPred(MIB); 4084 if (NeedCC) 4085 AddDefaultCC(MIB); 4086 4087 // Copy the result from SP to virtual register. 4088 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); 4089 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) 4090 ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; 4091 BuildMI(*BB, MI, dl, TII->get(CopyOpc)) 4092 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) 4093 .addReg(ARM::SP); 4094 MI->eraseFromParent(); // The pseudo instruction is gone now. 4095 return BB; 4096 } 4097 } 4098} 4099 4100//===----------------------------------------------------------------------===// 4101// ARM Optimization Hooks 4102//===----------------------------------------------------------------------===// 4103 4104static 4105SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 4106 TargetLowering::DAGCombinerInfo &DCI) { 4107 SelectionDAG &DAG = DCI.DAG; 4108 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4109 EVT VT = N->getValueType(0); 4110 unsigned Opc = N->getOpcode(); 4111 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 4112 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 4113 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 4114 ISD::CondCode CC = ISD::SETCC_INVALID; 4115 4116 if (isSlctCC) { 4117 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 4118 } else { 4119 SDValue CCOp = Slct.getOperand(0); 4120 if (CCOp.getOpcode() == ISD::SETCC) 4121 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 4122 } 4123 4124 bool DoXform = false; 4125 bool InvCC = false; 4126 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 4127 "Bad input!"); 4128 4129 if (LHS.getOpcode() == ISD::Constant && 4130 cast<ConstantSDNode>(LHS)->isNullValue()) { 4131 DoXform = true; 4132 } else if (CC != ISD::SETCC_INVALID && 4133 RHS.getOpcode() == ISD::Constant && 4134 cast<ConstantSDNode>(RHS)->isNullValue()) { 4135 std::swap(LHS, RHS); 4136 SDValue Op0 = Slct.getOperand(0); 4137 EVT OpVT = isSlctCC ? Op0.getValueType() : 4138 Op0.getOperand(0).getValueType(); 4139 bool isInt = OpVT.isInteger(); 4140 CC = ISD::getSetCCInverse(CC, isInt); 4141 4142 if (!TLI.isCondCodeLegal(CC, OpVT)) 4143 return SDValue(); // Inverse operator isn't legal. 4144 4145 DoXform = true; 4146 InvCC = true; 4147 } 4148 4149 if (DoXform) { 4150 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 4151 if (isSlctCC) 4152 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 4153 Slct.getOperand(0), Slct.getOperand(1), CC); 4154 SDValue CCOp = Slct.getOperand(0); 4155 if (InvCC) 4156 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 4157 CCOp.getOperand(0), CCOp.getOperand(1), CC); 4158 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4159 CCOp, OtherOp, Result); 4160 } 4161 return SDValue(); 4162} 4163 4164/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 4165static SDValue PerformADDCombine(SDNode *N, 4166 TargetLowering::DAGCombinerInfo &DCI) { 4167 // added by evan in r37685 with no testcase. 4168 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 4169 4170 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 4171 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 4172 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 4173 if (Result.getNode()) return Result; 4174 } 4175 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 4176 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 4177 if (Result.getNode()) return Result; 4178 } 4179 4180 return SDValue(); 4181} 4182 4183/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 4184static SDValue PerformSUBCombine(SDNode *N, 4185 TargetLowering::DAGCombinerInfo &DCI) { 4186 // added by evan in r37685 with no testcase. 4187 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 4188 4189 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 4190 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 4191 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 4192 if (Result.getNode()) return Result; 4193 } 4194 4195 return SDValue(); 4196} 4197 4198static SDValue PerformMULCombine(SDNode *N, 4199 TargetLowering::DAGCombinerInfo &DCI, 4200 const ARMSubtarget *Subtarget) { 4201 SelectionDAG &DAG = DCI.DAG; 4202 4203 if (Subtarget->isThumb1Only()) 4204 return SDValue(); 4205 4206 if (DAG.getMachineFunction(). 4207 getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 4208 return SDValue(); 4209 4210 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 4211 return SDValue(); 4212 4213 EVT VT = N->getValueType(0); 4214 if (VT != MVT::i32) 4215 return SDValue(); 4216 4217 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4218 if (!C) 4219 return SDValue(); 4220 4221 uint64_t MulAmt = C->getZExtValue(); 4222 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 4223 ShiftAmt = ShiftAmt & (32 - 1); 4224 SDValue V = N->getOperand(0); 4225 DebugLoc DL = N->getDebugLoc(); 4226 4227 SDValue Res; 4228 MulAmt >>= ShiftAmt; 4229 if (isPowerOf2_32(MulAmt - 1)) { 4230 // (mul x, 2^N + 1) => (add (shl x, N), x) 4231 Res = DAG.getNode(ISD::ADD, DL, VT, 4232 V, DAG.getNode(ISD::SHL, DL, VT, 4233 V, DAG.getConstant(Log2_32(MulAmt-1), 4234 MVT::i32))); 4235 } else if (isPowerOf2_32(MulAmt + 1)) { 4236 // (mul x, 2^N - 1) => (sub (shl x, N), x) 4237 Res = DAG.getNode(ISD::SUB, DL, VT, 4238 DAG.getNode(ISD::SHL, DL, VT, 4239 V, DAG.getConstant(Log2_32(MulAmt+1), 4240 MVT::i32)), 4241 V); 4242 } else 4243 return SDValue(); 4244 4245 if (ShiftAmt != 0) 4246 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 4247 DAG.getConstant(ShiftAmt, MVT::i32)); 4248 4249 // Do not add new nodes to DAG combiner worklist. 4250 DCI.CombineTo(N, Res, false); 4251 return SDValue(); 4252} 4253 4254/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 4255static SDValue PerformORCombine(SDNode *N, 4256 TargetLowering::DAGCombinerInfo &DCI, 4257 const ARMSubtarget *Subtarget) { 4258 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 4259 // reasonable. 4260 4261 // BFI is only available on V6T2+ 4262 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 4263 return SDValue(); 4264 4265 SelectionDAG &DAG = DCI.DAG; 4266 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 4267 DebugLoc DL = N->getDebugLoc(); 4268 // 1) or (and A, mask), val => ARMbfi A, val, mask 4269 // iff (val & mask) == val 4270 // 4271 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4272 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 4273 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4274 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 4275 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4276 // (i.e., copy a bitfield value into another bitfield of the same width) 4277 if (N0.getOpcode() != ISD::AND) 4278 return SDValue(); 4279 4280 EVT VT = N->getValueType(0); 4281 if (VT != MVT::i32) 4282 return SDValue(); 4283 4284 4285 // The value and the mask need to be constants so we can verify this is 4286 // actually a bitfield set. If the mask is 0xffff, we can do better 4287 // via a movt instruction, so don't use BFI in that case. 4288 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4289 if (!C) 4290 return SDValue(); 4291 unsigned Mask = C->getZExtValue(); 4292 if (Mask == 0xffff) 4293 return SDValue(); 4294 SDValue Res; 4295 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 4296 if ((C = dyn_cast<ConstantSDNode>(N1))) { 4297 unsigned Val = C->getZExtValue(); 4298 if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) 4299 return SDValue(); 4300 Val >>= CountTrailingZeros_32(~Mask); 4301 4302 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), 4303 DAG.getConstant(Val, MVT::i32), 4304 DAG.getConstant(Mask, MVT::i32)); 4305 4306 // Do not add new nodes to DAG combiner worklist. 4307 DCI.CombineTo(N, Res, false); 4308 } else if (N1.getOpcode() == ISD::AND) { 4309 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4310 C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4311 if (!C) 4312 return SDValue(); 4313 unsigned Mask2 = C->getZExtValue(); 4314 4315 if (ARM::isBitFieldInvertedMask(Mask) && 4316 ARM::isBitFieldInvertedMask(~Mask2) && 4317 (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { 4318 // The pack halfword instruction works better for masks that fit it, 4319 // so use that when it's available. 4320 if (Subtarget->hasT2ExtractPack() && 4321 (Mask == 0xffff || Mask == 0xffff0000)) 4322 return SDValue(); 4323 // 2a 4324 unsigned lsb = CountTrailingZeros_32(Mask2); 4325 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 4326 DAG.getConstant(lsb, MVT::i32)); 4327 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, 4328 DAG.getConstant(Mask, MVT::i32)); 4329 // Do not add new nodes to DAG combiner worklist. 4330 DCI.CombineTo(N, Res, false); 4331 } else if (ARM::isBitFieldInvertedMask(~Mask) && 4332 ARM::isBitFieldInvertedMask(Mask2) && 4333 (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { 4334 // The pack halfword instruction works better for masks that fit it, 4335 // so use that when it's available. 4336 if (Subtarget->hasT2ExtractPack() && 4337 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 4338 return SDValue(); 4339 // 2b 4340 unsigned lsb = CountTrailingZeros_32(Mask); 4341 Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4342 DAG.getConstant(lsb, MVT::i32)); 4343 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 4344 DAG.getConstant(Mask2, MVT::i32)); 4345 // Do not add new nodes to DAG combiner worklist. 4346 DCI.CombineTo(N, Res, false); 4347 } 4348 } 4349 4350 return SDValue(); 4351} 4352 4353/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 4354/// ARMISD::VMOVRRD. 4355static SDValue PerformVMOVRRDCombine(SDNode *N, 4356 TargetLowering::DAGCombinerInfo &DCI) { 4357 // fmrrd(fmdrr x, y) -> x,y 4358 SDValue InDouble = N->getOperand(0); 4359 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 4360 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 4361 return SDValue(); 4362} 4363 4364/// PerformVDUPLANECombine - Target-specific dag combine xforms for 4365/// ARMISD::VDUPLANE. 4366static SDValue PerformVDUPLANECombine(SDNode *N, 4367 TargetLowering::DAGCombinerInfo &DCI) { 4368 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 4369 // redundant. 4370 SDValue Op = N->getOperand(0); 4371 EVT VT = N->getValueType(0); 4372 4373 // Ignore bit_converts. 4374 while (Op.getOpcode() == ISD::BIT_CONVERT) 4375 Op = Op.getOperand(0); 4376 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 4377 return SDValue(); 4378 4379 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 4380 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 4381 // The canonical VMOV for a zero vector uses a 32-bit element size. 4382 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4383 unsigned EltBits; 4384 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 4385 EltSize = 8; 4386 if (EltSize > VT.getVectorElementType().getSizeInBits()) 4387 return SDValue(); 4388 4389 SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); 4390 return DCI.CombineTo(N, Res, false); 4391} 4392 4393/// getVShiftImm - Check if this is a valid build_vector for the immediate 4394/// operand of a vector shift operation, where all the elements of the 4395/// build_vector must have the same constant integer value. 4396static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 4397 // Ignore bit_converts. 4398 while (Op.getOpcode() == ISD::BIT_CONVERT) 4399 Op = Op.getOperand(0); 4400 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 4401 APInt SplatBits, SplatUndef; 4402 unsigned SplatBitSize; 4403 bool HasAnyUndefs; 4404 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 4405 HasAnyUndefs, ElementBits) || 4406 SplatBitSize > ElementBits) 4407 return false; 4408 Cnt = SplatBits.getSExtValue(); 4409 return true; 4410} 4411 4412/// isVShiftLImm - Check if this is a valid build_vector for the immediate 4413/// operand of a vector shift left operation. That value must be in the range: 4414/// 0 <= Value < ElementBits for a left shift; or 4415/// 0 <= Value <= ElementBits for a long left shift. 4416static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 4417 assert(VT.isVector() && "vector shift count is not a vector type"); 4418 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4419 if (! getVShiftImm(Op, ElementBits, Cnt)) 4420 return false; 4421 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 4422} 4423 4424/// isVShiftRImm - Check if this is a valid build_vector for the immediate 4425/// operand of a vector shift right operation. For a shift opcode, the value 4426/// is positive, but for an intrinsic the value count must be negative. The 4427/// absolute value must be in the range: 4428/// 1 <= |Value| <= ElementBits for a right shift; or 4429/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 4430static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 4431 int64_t &Cnt) { 4432 assert(VT.isVector() && "vector shift count is not a vector type"); 4433 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4434 if (! getVShiftImm(Op, ElementBits, Cnt)) 4435 return false; 4436 if (isIntrinsic) 4437 Cnt = -Cnt; 4438 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 4439} 4440 4441/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 4442static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 4443 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4444 switch (IntNo) { 4445 default: 4446 // Don't do anything for most intrinsics. 4447 break; 4448 4449 // Vector shifts: check for immediate versions and lower them. 4450 // Note: This is done during DAG combining instead of DAG legalizing because 4451 // the build_vectors for 64-bit vector element shift counts are generally 4452 // not legal, and it is hard to see their values after they get legalized to 4453 // loads from a constant pool. 4454 case Intrinsic::arm_neon_vshifts: 4455 case Intrinsic::arm_neon_vshiftu: 4456 case Intrinsic::arm_neon_vshiftls: 4457 case Intrinsic::arm_neon_vshiftlu: 4458 case Intrinsic::arm_neon_vshiftn: 4459 case Intrinsic::arm_neon_vrshifts: 4460 case Intrinsic::arm_neon_vrshiftu: 4461 case Intrinsic::arm_neon_vrshiftn: 4462 case Intrinsic::arm_neon_vqshifts: 4463 case Intrinsic::arm_neon_vqshiftu: 4464 case Intrinsic::arm_neon_vqshiftsu: 4465 case Intrinsic::arm_neon_vqshiftns: 4466 case Intrinsic::arm_neon_vqshiftnu: 4467 case Intrinsic::arm_neon_vqshiftnsu: 4468 case Intrinsic::arm_neon_vqrshiftns: 4469 case Intrinsic::arm_neon_vqrshiftnu: 4470 case Intrinsic::arm_neon_vqrshiftnsu: { 4471 EVT VT = N->getOperand(1).getValueType(); 4472 int64_t Cnt; 4473 unsigned VShiftOpc = 0; 4474 4475 switch (IntNo) { 4476 case Intrinsic::arm_neon_vshifts: 4477 case Intrinsic::arm_neon_vshiftu: 4478 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 4479 VShiftOpc = ARMISD::VSHL; 4480 break; 4481 } 4482 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 4483 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 4484 ARMISD::VSHRs : ARMISD::VSHRu); 4485 break; 4486 } 4487 return SDValue(); 4488 4489 case Intrinsic::arm_neon_vshiftls: 4490 case Intrinsic::arm_neon_vshiftlu: 4491 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 4492 break; 4493 llvm_unreachable("invalid shift count for vshll intrinsic"); 4494 4495 case Intrinsic::arm_neon_vrshifts: 4496 case Intrinsic::arm_neon_vrshiftu: 4497 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 4498 break; 4499 return SDValue(); 4500 4501 case Intrinsic::arm_neon_vqshifts: 4502 case Intrinsic::arm_neon_vqshiftu: 4503 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4504 break; 4505 return SDValue(); 4506 4507 case Intrinsic::arm_neon_vqshiftsu: 4508 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4509 break; 4510 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 4511 4512 case Intrinsic::arm_neon_vshiftn: 4513 case Intrinsic::arm_neon_vrshiftn: 4514 case Intrinsic::arm_neon_vqshiftns: 4515 case Intrinsic::arm_neon_vqshiftnu: 4516 case Intrinsic::arm_neon_vqshiftnsu: 4517 case Intrinsic::arm_neon_vqrshiftns: 4518 case Intrinsic::arm_neon_vqrshiftnu: 4519 case Intrinsic::arm_neon_vqrshiftnsu: 4520 // Narrowing shifts require an immediate right shift. 4521 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 4522 break; 4523 llvm_unreachable("invalid shift count for narrowing vector shift " 4524 "intrinsic"); 4525 4526 default: 4527 llvm_unreachable("unhandled vector shift"); 4528 } 4529 4530 switch (IntNo) { 4531 case Intrinsic::arm_neon_vshifts: 4532 case Intrinsic::arm_neon_vshiftu: 4533 // Opcode already set above. 4534 break; 4535 case Intrinsic::arm_neon_vshiftls: 4536 case Intrinsic::arm_neon_vshiftlu: 4537 if (Cnt == VT.getVectorElementType().getSizeInBits()) 4538 VShiftOpc = ARMISD::VSHLLi; 4539 else 4540 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 4541 ARMISD::VSHLLs : ARMISD::VSHLLu); 4542 break; 4543 case Intrinsic::arm_neon_vshiftn: 4544 VShiftOpc = ARMISD::VSHRN; break; 4545 case Intrinsic::arm_neon_vrshifts: 4546 VShiftOpc = ARMISD::VRSHRs; break; 4547 case Intrinsic::arm_neon_vrshiftu: 4548 VShiftOpc = ARMISD::VRSHRu; break; 4549 case Intrinsic::arm_neon_vrshiftn: 4550 VShiftOpc = ARMISD::VRSHRN; break; 4551 case Intrinsic::arm_neon_vqshifts: 4552 VShiftOpc = ARMISD::VQSHLs; break; 4553 case Intrinsic::arm_neon_vqshiftu: 4554 VShiftOpc = ARMISD::VQSHLu; break; 4555 case Intrinsic::arm_neon_vqshiftsu: 4556 VShiftOpc = ARMISD::VQSHLsu; break; 4557 case Intrinsic::arm_neon_vqshiftns: 4558 VShiftOpc = ARMISD::VQSHRNs; break; 4559 case Intrinsic::arm_neon_vqshiftnu: 4560 VShiftOpc = ARMISD::VQSHRNu; break; 4561 case Intrinsic::arm_neon_vqshiftnsu: 4562 VShiftOpc = ARMISD::VQSHRNsu; break; 4563 case Intrinsic::arm_neon_vqrshiftns: 4564 VShiftOpc = ARMISD::VQRSHRNs; break; 4565 case Intrinsic::arm_neon_vqrshiftnu: 4566 VShiftOpc = ARMISD::VQRSHRNu; break; 4567 case Intrinsic::arm_neon_vqrshiftnsu: 4568 VShiftOpc = ARMISD::VQRSHRNsu; break; 4569 } 4570 4571 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4572 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 4573 } 4574 4575 case Intrinsic::arm_neon_vshiftins: { 4576 EVT VT = N->getOperand(1).getValueType(); 4577 int64_t Cnt; 4578 unsigned VShiftOpc = 0; 4579 4580 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 4581 VShiftOpc = ARMISD::VSLI; 4582 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 4583 VShiftOpc = ARMISD::VSRI; 4584 else { 4585 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 4586 } 4587 4588 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4589 N->getOperand(1), N->getOperand(2), 4590 DAG.getConstant(Cnt, MVT::i32)); 4591 } 4592 4593 case Intrinsic::arm_neon_vqrshifts: 4594 case Intrinsic::arm_neon_vqrshiftu: 4595 // No immediate versions of these to check for. 4596 break; 4597 } 4598 4599 return SDValue(); 4600} 4601 4602/// PerformShiftCombine - Checks for immediate versions of vector shifts and 4603/// lowers them. As with the vector shift intrinsics, this is done during DAG 4604/// combining instead of DAG legalizing because the build_vectors for 64-bit 4605/// vector element shift counts are generally not legal, and it is hard to see 4606/// their values after they get legalized to loads from a constant pool. 4607static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 4608 const ARMSubtarget *ST) { 4609 EVT VT = N->getValueType(0); 4610 4611 // Nothing to be done for scalar shifts. 4612 if (! VT.isVector()) 4613 return SDValue(); 4614 4615 assert(ST->hasNEON() && "unexpected vector shift"); 4616 int64_t Cnt; 4617 4618 switch (N->getOpcode()) { 4619 default: llvm_unreachable("unexpected shift opcode"); 4620 4621 case ISD::SHL: 4622 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 4623 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 4624 DAG.getConstant(Cnt, MVT::i32)); 4625 break; 4626 4627 case ISD::SRA: 4628 case ISD::SRL: 4629 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 4630 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 4631 ARMISD::VSHRs : ARMISD::VSHRu); 4632 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 4633 DAG.getConstant(Cnt, MVT::i32)); 4634 } 4635 } 4636 return SDValue(); 4637} 4638 4639/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 4640/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 4641static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 4642 const ARMSubtarget *ST) { 4643 SDValue N0 = N->getOperand(0); 4644 4645 // Check for sign- and zero-extensions of vector extract operations of 8- 4646 // and 16-bit vector elements. NEON supports these directly. They are 4647 // handled during DAG combining because type legalization will promote them 4648 // to 32-bit types and it is messy to recognize the operations after that. 4649 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 4650 SDValue Vec = N0.getOperand(0); 4651 SDValue Lane = N0.getOperand(1); 4652 EVT VT = N->getValueType(0); 4653 EVT EltVT = N0.getValueType(); 4654 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4655 4656 if (VT == MVT::i32 && 4657 (EltVT == MVT::i8 || EltVT == MVT::i16) && 4658 TLI.isTypeLegal(Vec.getValueType())) { 4659 4660 unsigned Opc = 0; 4661 switch (N->getOpcode()) { 4662 default: llvm_unreachable("unexpected opcode"); 4663 case ISD::SIGN_EXTEND: 4664 Opc = ARMISD::VGETLANEs; 4665 break; 4666 case ISD::ZERO_EXTEND: 4667 case ISD::ANY_EXTEND: 4668 Opc = ARMISD::VGETLANEu; 4669 break; 4670 } 4671 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 4672 } 4673 } 4674 4675 return SDValue(); 4676} 4677 4678/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 4679/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 4680static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 4681 const ARMSubtarget *ST) { 4682 // If the target supports NEON, try to use vmax/vmin instructions for f32 4683 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 4684 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 4685 // a NaN; only do the transformation when it matches that behavior. 4686 4687 // For now only do this when using NEON for FP operations; if using VFP, it 4688 // is not obvious that the benefit outweighs the cost of switching to the 4689 // NEON pipeline. 4690 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 4691 N->getValueType(0) != MVT::f32) 4692 return SDValue(); 4693 4694 SDValue CondLHS = N->getOperand(0); 4695 SDValue CondRHS = N->getOperand(1); 4696 SDValue LHS = N->getOperand(2); 4697 SDValue RHS = N->getOperand(3); 4698 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4699 4700 unsigned Opcode = 0; 4701 bool IsReversed; 4702 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 4703 IsReversed = false; // x CC y ? x : y 4704 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 4705 IsReversed = true ; // x CC y ? y : x 4706 } else { 4707 return SDValue(); 4708 } 4709 4710 bool IsUnordered; 4711 switch (CC) { 4712 default: break; 4713 case ISD::SETOLT: 4714 case ISD::SETOLE: 4715 case ISD::SETLT: 4716 case ISD::SETLE: 4717 case ISD::SETULT: 4718 case ISD::SETULE: 4719 // If LHS is NaN, an ordered comparison will be false and the result will 4720 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 4721 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4722 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 4723 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4724 break; 4725 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 4726 // will return -0, so vmin can only be used for unsafe math or if one of 4727 // the operands is known to be nonzero. 4728 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 4729 !UnsafeFPMath && 4730 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4731 break; 4732 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 4733 break; 4734 4735 case ISD::SETOGT: 4736 case ISD::SETOGE: 4737 case ISD::SETGT: 4738 case ISD::SETGE: 4739 case ISD::SETUGT: 4740 case ISD::SETUGE: 4741 // If LHS is NaN, an ordered comparison will be false and the result will 4742 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 4743 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4744 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 4745 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4746 break; 4747 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 4748 // will return +0, so vmax can only be used for unsafe math or if one of 4749 // the operands is known to be nonzero. 4750 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 4751 !UnsafeFPMath && 4752 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4753 break; 4754 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 4755 break; 4756 } 4757 4758 if (!Opcode) 4759 return SDValue(); 4760 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 4761} 4762 4763SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 4764 DAGCombinerInfo &DCI) const { 4765 switch (N->getOpcode()) { 4766 default: break; 4767 case ISD::ADD: return PerformADDCombine(N, DCI); 4768 case ISD::SUB: return PerformSUBCombine(N, DCI); 4769 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 4770 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 4771 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 4772 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 4773 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 4774 case ISD::SHL: 4775 case ISD::SRA: 4776 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 4777 case ISD::SIGN_EXTEND: 4778 case ISD::ZERO_EXTEND: 4779 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 4780 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 4781 } 4782 return SDValue(); 4783} 4784 4785bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 4786 if (!Subtarget->hasV6Ops()) 4787 // Pre-v6 does not support unaligned mem access. 4788 return false; 4789 4790 // v6+ may or may not support unaligned mem access depending on the system 4791 // configuration. 4792 // FIXME: This is pretty conservative. Should we provide cmdline option to 4793 // control the behaviour? 4794 if (!Subtarget->isTargetDarwin()) 4795 return false; 4796 4797 switch (VT.getSimpleVT().SimpleTy) { 4798 default: 4799 return false; 4800 case MVT::i8: 4801 case MVT::i16: 4802 case MVT::i32: 4803 return true; 4804 // FIXME: VLD1 etc with standard alignment is legal. 4805 } 4806} 4807 4808static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 4809 if (V < 0) 4810 return false; 4811 4812 unsigned Scale = 1; 4813 switch (VT.getSimpleVT().SimpleTy) { 4814 default: return false; 4815 case MVT::i1: 4816 case MVT::i8: 4817 // Scale == 1; 4818 break; 4819 case MVT::i16: 4820 // Scale == 2; 4821 Scale = 2; 4822 break; 4823 case MVT::i32: 4824 // Scale == 4; 4825 Scale = 4; 4826 break; 4827 } 4828 4829 if ((V & (Scale - 1)) != 0) 4830 return false; 4831 V /= Scale; 4832 return V == (V & ((1LL << 5) - 1)); 4833} 4834 4835static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 4836 const ARMSubtarget *Subtarget) { 4837 bool isNeg = false; 4838 if (V < 0) { 4839 isNeg = true; 4840 V = - V; 4841 } 4842 4843 switch (VT.getSimpleVT().SimpleTy) { 4844 default: return false; 4845 case MVT::i1: 4846 case MVT::i8: 4847 case MVT::i16: 4848 case MVT::i32: 4849 // + imm12 or - imm8 4850 if (isNeg) 4851 return V == (V & ((1LL << 8) - 1)); 4852 return V == (V & ((1LL << 12) - 1)); 4853 case MVT::f32: 4854 case MVT::f64: 4855 // Same as ARM mode. FIXME: NEON? 4856 if (!Subtarget->hasVFP2()) 4857 return false; 4858 if ((V & 3) != 0) 4859 return false; 4860 V >>= 2; 4861 return V == (V & ((1LL << 8) - 1)); 4862 } 4863} 4864 4865/// isLegalAddressImmediate - Return true if the integer value can be used 4866/// as the offset of the target addressing mode for load / store of the 4867/// given type. 4868static bool isLegalAddressImmediate(int64_t V, EVT VT, 4869 const ARMSubtarget *Subtarget) { 4870 if (V == 0) 4871 return true; 4872 4873 if (!VT.isSimple()) 4874 return false; 4875 4876 if (Subtarget->isThumb1Only()) 4877 return isLegalT1AddressImmediate(V, VT); 4878 else if (Subtarget->isThumb2()) 4879 return isLegalT2AddressImmediate(V, VT, Subtarget); 4880 4881 // ARM mode. 4882 if (V < 0) 4883 V = - V; 4884 switch (VT.getSimpleVT().SimpleTy) { 4885 default: return false; 4886 case MVT::i1: 4887 case MVT::i8: 4888 case MVT::i32: 4889 // +- imm12 4890 return V == (V & ((1LL << 12) - 1)); 4891 case MVT::i16: 4892 // +- imm8 4893 return V == (V & ((1LL << 8) - 1)); 4894 case MVT::f32: 4895 case MVT::f64: 4896 if (!Subtarget->hasVFP2()) // FIXME: NEON? 4897 return false; 4898 if ((V & 3) != 0) 4899 return false; 4900 V >>= 2; 4901 return V == (V & ((1LL << 8) - 1)); 4902 } 4903} 4904 4905bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 4906 EVT VT) const { 4907 int Scale = AM.Scale; 4908 if (Scale < 0) 4909 return false; 4910 4911 switch (VT.getSimpleVT().SimpleTy) { 4912 default: return false; 4913 case MVT::i1: 4914 case MVT::i8: 4915 case MVT::i16: 4916 case MVT::i32: 4917 if (Scale == 1) 4918 return true; 4919 // r + r << imm 4920 Scale = Scale & ~1; 4921 return Scale == 2 || Scale == 4 || Scale == 8; 4922 case MVT::i64: 4923 // r + r 4924 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 4925 return true; 4926 return false; 4927 case MVT::isVoid: 4928 // Note, we allow "void" uses (basically, uses that aren't loads or 4929 // stores), because arm allows folding a scale into many arithmetic 4930 // operations. This should be made more precise and revisited later. 4931 4932 // Allow r << imm, but the imm has to be a multiple of two. 4933 if (Scale & 1) return false; 4934 return isPowerOf2_32(Scale); 4935 } 4936} 4937 4938/// isLegalAddressingMode - Return true if the addressing mode represented 4939/// by AM is legal for this target, for a load/store of the specified type. 4940bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 4941 const Type *Ty) const { 4942 EVT VT = getValueType(Ty, true); 4943 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 4944 return false; 4945 4946 // Can never fold addr of global into load/store. 4947 if (AM.BaseGV) 4948 return false; 4949 4950 switch (AM.Scale) { 4951 case 0: // no scale reg, must be "r+i" or "r", or "i". 4952 break; 4953 case 1: 4954 if (Subtarget->isThumb1Only()) 4955 return false; 4956 // FALL THROUGH. 4957 default: 4958 // ARM doesn't support any R+R*scale+imm addr modes. 4959 if (AM.BaseOffs) 4960 return false; 4961 4962 if (!VT.isSimple()) 4963 return false; 4964 4965 if (Subtarget->isThumb2()) 4966 return isLegalT2ScaledAddressingMode(AM, VT); 4967 4968 int Scale = AM.Scale; 4969 switch (VT.getSimpleVT().SimpleTy) { 4970 default: return false; 4971 case MVT::i1: 4972 case MVT::i8: 4973 case MVT::i32: 4974 if (Scale < 0) Scale = -Scale; 4975 if (Scale == 1) 4976 return true; 4977 // r + r << imm 4978 return isPowerOf2_32(Scale & ~1); 4979 case MVT::i16: 4980 case MVT::i64: 4981 // r + r 4982 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 4983 return true; 4984 return false; 4985 4986 case MVT::isVoid: 4987 // Note, we allow "void" uses (basically, uses that aren't loads or 4988 // stores), because arm allows folding a scale into many arithmetic 4989 // operations. This should be made more precise and revisited later. 4990 4991 // Allow r << imm, but the imm has to be a multiple of two. 4992 if (Scale & 1) return false; 4993 return isPowerOf2_32(Scale); 4994 } 4995 break; 4996 } 4997 return true; 4998} 4999 5000/// isLegalICmpImmediate - Return true if the specified immediate is legal 5001/// icmp immediate, that is the target has icmp instructions which can compare 5002/// a register against the immediate without having to materialize the 5003/// immediate into a register. 5004bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 5005 if (!Subtarget->isThumb()) 5006 return ARM_AM::getSOImmVal(Imm) != -1; 5007 if (Subtarget->isThumb2()) 5008 return ARM_AM::getT2SOImmVal(Imm) != -1; 5009 return Imm >= 0 && Imm <= 255; 5010} 5011 5012static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 5013 bool isSEXTLoad, SDValue &Base, 5014 SDValue &Offset, bool &isInc, 5015 SelectionDAG &DAG) { 5016 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5017 return false; 5018 5019 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 5020 // AddressingMode 3 5021 Base = Ptr->getOperand(0); 5022 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5023 int RHSC = (int)RHS->getZExtValue(); 5024 if (RHSC < 0 && RHSC > -256) { 5025 assert(Ptr->getOpcode() == ISD::ADD); 5026 isInc = false; 5027 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5028 return true; 5029 } 5030 } 5031 isInc = (Ptr->getOpcode() == ISD::ADD); 5032 Offset = Ptr->getOperand(1); 5033 return true; 5034 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 5035 // AddressingMode 2 5036 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5037 int RHSC = (int)RHS->getZExtValue(); 5038 if (RHSC < 0 && RHSC > -0x1000) { 5039 assert(Ptr->getOpcode() == ISD::ADD); 5040 isInc = false; 5041 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5042 Base = Ptr->getOperand(0); 5043 return true; 5044 } 5045 } 5046 5047 if (Ptr->getOpcode() == ISD::ADD) { 5048 isInc = true; 5049 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 5050 if (ShOpcVal != ARM_AM::no_shift) { 5051 Base = Ptr->getOperand(1); 5052 Offset = Ptr->getOperand(0); 5053 } else { 5054 Base = Ptr->getOperand(0); 5055 Offset = Ptr->getOperand(1); 5056 } 5057 return true; 5058 } 5059 5060 isInc = (Ptr->getOpcode() == ISD::ADD); 5061 Base = Ptr->getOperand(0); 5062 Offset = Ptr->getOperand(1); 5063 return true; 5064 } 5065 5066 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 5067 return false; 5068} 5069 5070static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 5071 bool isSEXTLoad, SDValue &Base, 5072 SDValue &Offset, bool &isInc, 5073 SelectionDAG &DAG) { 5074 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5075 return false; 5076 5077 Base = Ptr->getOperand(0); 5078 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5079 int RHSC = (int)RHS->getZExtValue(); 5080 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 5081 assert(Ptr->getOpcode() == ISD::ADD); 5082 isInc = false; 5083 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5084 return true; 5085 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 5086 isInc = Ptr->getOpcode() == ISD::ADD; 5087 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 5088 return true; 5089 } 5090 } 5091 5092 return false; 5093} 5094 5095/// getPreIndexedAddressParts - returns true by value, base pointer and 5096/// offset pointer and addressing mode by reference if the node's address 5097/// can be legally represented as pre-indexed load / store address. 5098bool 5099ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 5100 SDValue &Offset, 5101 ISD::MemIndexedMode &AM, 5102 SelectionDAG &DAG) const { 5103 if (Subtarget->isThumb1Only()) 5104 return false; 5105 5106 EVT VT; 5107 SDValue Ptr; 5108 bool isSEXTLoad = false; 5109 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5110 Ptr = LD->getBasePtr(); 5111 VT = LD->getMemoryVT(); 5112 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5113 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5114 Ptr = ST->getBasePtr(); 5115 VT = ST->getMemoryVT(); 5116 } else 5117 return false; 5118 5119 bool isInc; 5120 bool isLegal = false; 5121 if (Subtarget->isThumb2()) 5122 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5123 Offset, isInc, DAG); 5124 else 5125 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5126 Offset, isInc, DAG); 5127 if (!isLegal) 5128 return false; 5129 5130 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 5131 return true; 5132} 5133 5134/// getPostIndexedAddressParts - returns true by value, base pointer and 5135/// offset pointer and addressing mode by reference if this node can be 5136/// combined with a load / store to form a post-indexed load / store. 5137bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 5138 SDValue &Base, 5139 SDValue &Offset, 5140 ISD::MemIndexedMode &AM, 5141 SelectionDAG &DAG) const { 5142 if (Subtarget->isThumb1Only()) 5143 return false; 5144 5145 EVT VT; 5146 SDValue Ptr; 5147 bool isSEXTLoad = false; 5148 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5149 VT = LD->getMemoryVT(); 5150 Ptr = LD->getBasePtr(); 5151 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5152 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5153 VT = ST->getMemoryVT(); 5154 Ptr = ST->getBasePtr(); 5155 } else 5156 return false; 5157 5158 bool isInc; 5159 bool isLegal = false; 5160 if (Subtarget->isThumb2()) 5161 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5162 isInc, DAG); 5163 else 5164 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5165 isInc, DAG); 5166 if (!isLegal) 5167 return false; 5168 5169 if (Ptr != Base) { 5170 // Swap base ptr and offset to catch more post-index load / store when 5171 // it's legal. In Thumb2 mode, offset must be an immediate. 5172 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 5173 !Subtarget->isThumb2()) 5174 std::swap(Base, Offset); 5175 5176 // Post-indexed load / store update the base pointer. 5177 if (Ptr != Base) 5178 return false; 5179 } 5180 5181 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 5182 return true; 5183} 5184 5185void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 5186 const APInt &Mask, 5187 APInt &KnownZero, 5188 APInt &KnownOne, 5189 const SelectionDAG &DAG, 5190 unsigned Depth) const { 5191 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 5192 switch (Op.getOpcode()) { 5193 default: break; 5194 case ARMISD::CMOV: { 5195 // Bits are known zero/one if known on the LHS and RHS. 5196 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 5197 if (KnownZero == 0 && KnownOne == 0) return; 5198 5199 APInt KnownZeroRHS, KnownOneRHS; 5200 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 5201 KnownZeroRHS, KnownOneRHS, Depth+1); 5202 KnownZero &= KnownZeroRHS; 5203 KnownOne &= KnownOneRHS; 5204 return; 5205 } 5206 } 5207} 5208 5209//===----------------------------------------------------------------------===// 5210// ARM Inline Assembly Support 5211//===----------------------------------------------------------------------===// 5212 5213/// getConstraintType - Given a constraint letter, return the type of 5214/// constraint it is for this target. 5215ARMTargetLowering::ConstraintType 5216ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 5217 if (Constraint.size() == 1) { 5218 switch (Constraint[0]) { 5219 default: break; 5220 case 'l': return C_RegisterClass; 5221 case 'w': return C_RegisterClass; 5222 } 5223 } 5224 return TargetLowering::getConstraintType(Constraint); 5225} 5226 5227std::pair<unsigned, const TargetRegisterClass*> 5228ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5229 EVT VT) const { 5230 if (Constraint.size() == 1) { 5231 // GCC ARM Constraint Letters 5232 switch (Constraint[0]) { 5233 case 'l': 5234 if (Subtarget->isThumb()) 5235 return std::make_pair(0U, ARM::tGPRRegisterClass); 5236 else 5237 return std::make_pair(0U, ARM::GPRRegisterClass); 5238 case 'r': 5239 return std::make_pair(0U, ARM::GPRRegisterClass); 5240 case 'w': 5241 if (VT == MVT::f32) 5242 return std::make_pair(0U, ARM::SPRRegisterClass); 5243 if (VT.getSizeInBits() == 64) 5244 return std::make_pair(0U, ARM::DPRRegisterClass); 5245 if (VT.getSizeInBits() == 128) 5246 return std::make_pair(0U, ARM::QPRRegisterClass); 5247 break; 5248 } 5249 } 5250 if (StringRef("{cc}").equals_lower(Constraint)) 5251 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 5252 5253 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5254} 5255 5256std::vector<unsigned> ARMTargetLowering:: 5257getRegClassForInlineAsmConstraint(const std::string &Constraint, 5258 EVT VT) const { 5259 if (Constraint.size() != 1) 5260 return std::vector<unsigned>(); 5261 5262 switch (Constraint[0]) { // GCC ARM Constraint Letters 5263 default: break; 5264 case 'l': 5265 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5266 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5267 0); 5268 case 'r': 5269 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5270 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5271 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 5272 ARM::R12, ARM::LR, 0); 5273 case 'w': 5274 if (VT == MVT::f32) 5275 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 5276 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 5277 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 5278 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 5279 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 5280 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 5281 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 5282 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 5283 if (VT.getSizeInBits() == 64) 5284 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 5285 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 5286 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 5287 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 5288 if (VT.getSizeInBits() == 128) 5289 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 5290 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 5291 break; 5292 } 5293 5294 return std::vector<unsigned>(); 5295} 5296 5297/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5298/// vector. If it is invalid, don't add anything to Ops. 5299void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 5300 char Constraint, 5301 std::vector<SDValue>&Ops, 5302 SelectionDAG &DAG) const { 5303 SDValue Result(0, 0); 5304 5305 switch (Constraint) { 5306 default: break; 5307 case 'I': case 'J': case 'K': case 'L': 5308 case 'M': case 'N': case 'O': 5309 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 5310 if (!C) 5311 return; 5312 5313 int64_t CVal64 = C->getSExtValue(); 5314 int CVal = (int) CVal64; 5315 // None of these constraints allow values larger than 32 bits. Check 5316 // that the value fits in an int. 5317 if (CVal != CVal64) 5318 return; 5319 5320 switch (Constraint) { 5321 case 'I': 5322 if (Subtarget->isThumb1Only()) { 5323 // This must be a constant between 0 and 255, for ADD 5324 // immediates. 5325 if (CVal >= 0 && CVal <= 255) 5326 break; 5327 } else if (Subtarget->isThumb2()) { 5328 // A constant that can be used as an immediate value in a 5329 // data-processing instruction. 5330 if (ARM_AM::getT2SOImmVal(CVal) != -1) 5331 break; 5332 } else { 5333 // A constant that can be used as an immediate value in a 5334 // data-processing instruction. 5335 if (ARM_AM::getSOImmVal(CVal) != -1) 5336 break; 5337 } 5338 return; 5339 5340 case 'J': 5341 if (Subtarget->isThumb()) { // FIXME thumb2 5342 // This must be a constant between -255 and -1, for negated ADD 5343 // immediates. This can be used in GCC with an "n" modifier that 5344 // prints the negated value, for use with SUB instructions. It is 5345 // not useful otherwise but is implemented for compatibility. 5346 if (CVal >= -255 && CVal <= -1) 5347 break; 5348 } else { 5349 // This must be a constant between -4095 and 4095. It is not clear 5350 // what this constraint is intended for. Implemented for 5351 // compatibility with GCC. 5352 if (CVal >= -4095 && CVal <= 4095) 5353 break; 5354 } 5355 return; 5356 5357 case 'K': 5358 if (Subtarget->isThumb1Only()) { 5359 // A 32-bit value where only one byte has a nonzero value. Exclude 5360 // zero to match GCC. This constraint is used by GCC internally for 5361 // constants that can be loaded with a move/shift combination. 5362 // It is not useful otherwise but is implemented for compatibility. 5363 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 5364 break; 5365 } else if (Subtarget->isThumb2()) { 5366 // A constant whose bitwise inverse can be used as an immediate 5367 // value in a data-processing instruction. This can be used in GCC 5368 // with a "B" modifier that prints the inverted value, for use with 5369 // BIC and MVN instructions. It is not useful otherwise but is 5370 // implemented for compatibility. 5371 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 5372 break; 5373 } else { 5374 // A constant whose bitwise inverse can be used as an immediate 5375 // value in a data-processing instruction. This can be used in GCC 5376 // with a "B" modifier that prints the inverted value, for use with 5377 // BIC and MVN instructions. It is not useful otherwise but is 5378 // implemented for compatibility. 5379 if (ARM_AM::getSOImmVal(~CVal) != -1) 5380 break; 5381 } 5382 return; 5383 5384 case 'L': 5385 if (Subtarget->isThumb1Only()) { 5386 // This must be a constant between -7 and 7, 5387 // for 3-operand ADD/SUB immediate instructions. 5388 if (CVal >= -7 && CVal < 7) 5389 break; 5390 } else if (Subtarget->isThumb2()) { 5391 // A constant whose negation can be used as an immediate value in a 5392 // data-processing instruction. This can be used in GCC with an "n" 5393 // modifier that prints the negated value, for use with SUB 5394 // instructions. It is not useful otherwise but is implemented for 5395 // compatibility. 5396 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 5397 break; 5398 } else { 5399 // A constant whose negation can be used as an immediate value in a 5400 // data-processing instruction. This can be used in GCC with an "n" 5401 // modifier that prints the negated value, for use with SUB 5402 // instructions. It is not useful otherwise but is implemented for 5403 // compatibility. 5404 if (ARM_AM::getSOImmVal(-CVal) != -1) 5405 break; 5406 } 5407 return; 5408 5409 case 'M': 5410 if (Subtarget->isThumb()) { // FIXME thumb2 5411 // This must be a multiple of 4 between 0 and 1020, for 5412 // ADD sp + immediate. 5413 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 5414 break; 5415 } else { 5416 // A power of two or a constant between 0 and 32. This is used in 5417 // GCC for the shift amount on shifted register operands, but it is 5418 // useful in general for any shift amounts. 5419 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 5420 break; 5421 } 5422 return; 5423 5424 case 'N': 5425 if (Subtarget->isThumb()) { // FIXME thumb2 5426 // This must be a constant between 0 and 31, for shift amounts. 5427 if (CVal >= 0 && CVal <= 31) 5428 break; 5429 } 5430 return; 5431 5432 case 'O': 5433 if (Subtarget->isThumb()) { // FIXME thumb2 5434 // This must be a multiple of 4 between -508 and 508, for 5435 // ADD/SUB sp = sp + immediate. 5436 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 5437 break; 5438 } 5439 return; 5440 } 5441 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 5442 break; 5443 } 5444 5445 if (Result.getNode()) { 5446 Ops.push_back(Result); 5447 return; 5448 } 5449 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5450} 5451 5452bool 5453ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 5454 // The ARM target isn't yet aware of offsets. 5455 return false; 5456} 5457 5458int ARM::getVFPf32Imm(const APFloat &FPImm) { 5459 APInt Imm = FPImm.bitcastToAPInt(); 5460 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 5461 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 5462 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 5463 5464 // We can handle 4 bits of mantissa. 5465 // mantissa = (16+UInt(e:f:g:h))/16. 5466 if (Mantissa & 0x7ffff) 5467 return -1; 5468 Mantissa >>= 19; 5469 if ((Mantissa & 0xf) != Mantissa) 5470 return -1; 5471 5472 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5473 if (Exp < -3 || Exp > 4) 5474 return -1; 5475 Exp = ((Exp+3) & 0x7) ^ 4; 5476 5477 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5478} 5479 5480int ARM::getVFPf64Imm(const APFloat &FPImm) { 5481 APInt Imm = FPImm.bitcastToAPInt(); 5482 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 5483 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 5484 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 5485 5486 // We can handle 4 bits of mantissa. 5487 // mantissa = (16+UInt(e:f:g:h))/16. 5488 if (Mantissa & 0xffffffffffffLL) 5489 return -1; 5490 Mantissa >>= 48; 5491 if ((Mantissa & 0xf) != Mantissa) 5492 return -1; 5493 5494 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5495 if (Exp < -3 || Exp > 4) 5496 return -1; 5497 Exp = ((Exp+3) & 0x7) ^ 4; 5498 5499 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5500} 5501 5502bool ARM::isBitFieldInvertedMask(unsigned v) { 5503 if (v == 0xffffffff) 5504 return 0; 5505 // there can be 1's on either or both "outsides", all the "inside" 5506 // bits must be 0's 5507 unsigned int lsb = 0, msb = 31; 5508 while (v & (1 << msb)) --msb; 5509 while (v & (1 << lsb)) ++lsb; 5510 for (unsigned int i = lsb; i <= msb; ++i) { 5511 if (v & (1 << i)) 5512 return 0; 5513 } 5514 return 1; 5515} 5516 5517/// isFPImmLegal - Returns true if the target can instruction select the 5518/// specified FP immediate natively. If false, the legalizer will 5519/// materialize the FP immediate as a load from a constant pool. 5520bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 5521 if (!Subtarget->hasVFP3()) 5522 return false; 5523 if (VT == MVT::f32) 5524 return ARM::getVFPf32Imm(Imm) != -1; 5525 if (VT == MVT::f64) 5526 return ARM::getVFPf64Imm(Imm) != -1; 5527 return false; 5528} 5529