1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines an instruction selector for the ARM target. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "arm-isel" 15#include "ARM.h" 16#include "ARMBaseInstrInfo.h" 17#include "ARMTargetMachine.h" 18#include "MCTargetDesc/ARMAddressingModes.h" 19#include "llvm/CodeGen/MachineFrameInfo.h" 20#include "llvm/CodeGen/MachineFunction.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/CodeGen/SelectionDAG.h" 24#include "llvm/CodeGen/SelectionDAGISel.h" 25#include "llvm/IR/CallingConv.h" 26#include "llvm/IR/Constants.h" 27#include "llvm/IR/DerivedTypes.h" 28#include "llvm/IR/Function.h" 29#include "llvm/IR/Intrinsics.h" 30#include "llvm/IR/LLVMContext.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Compiler.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/Target/TargetLowering.h" 37#include "llvm/Target/TargetOptions.h" 38 39using namespace llvm; 40 41static cl::opt<bool> 42DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46static cl::opt<bool> 47CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, 48 cl::desc("Check fp vmla / vmls hazard at isel time"), 49 cl::init(true)); 50 51//===--------------------------------------------------------------------===// 52/// ARMDAGToDAGISel - ARM specific code to select ARM machine 53/// instructions for SelectionDAG operations. 54/// 55namespace { 56 57enum AddrMode2Type { 58 AM2_BASE, // Simple AM2 (+-imm12) 59 AM2_SHOP // Shifter-op AM2 60}; 61 62class ARMDAGToDAGISel : public SelectionDAGISel { 63 ARMBaseTargetMachine &TM; 64 const ARMBaseInstrInfo *TII; 65 66 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 67 /// make the right decision when generating code for different targets. 68 const ARMSubtarget *Subtarget; 69 70public: 71 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, 72 CodeGenOpt::Level OptLevel) 73 : SelectionDAGISel(tm, OptLevel), TM(tm), 74 TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())), 75 Subtarget(&TM.getSubtarget<ARMSubtarget>()) { 76 } 77 78 virtual const char *getPassName() const { 79 return "ARM Instruction Selection"; 80 } 81 82 virtual void PreprocessISelDAG(); 83 84 /// getI32Imm - Return a target constant of type i32 with the specified 85 /// value. 86 inline SDValue getI32Imm(unsigned Imm) { 87 return CurDAG->getTargetConstant(Imm, MVT::i32); 88 } 89 90 SDNode *Select(SDNode *N); 91 92 93 bool hasNoVMLxHazardUse(SDNode *N) const; 94 bool isShifterOpProfitable(const SDValue &Shift, 95 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 96 bool SelectRegShifterOperand(SDValue N, SDValue &A, 97 SDValue &B, SDValue &C, 98 bool CheckProfitability = true); 99 bool SelectImmShifterOperand(SDValue N, SDValue &A, 100 SDValue &B, bool CheckProfitability = true); 101 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 102 SDValue &B, SDValue &C) { 103 // Don't apply the profitability check 104 return SelectRegShifterOperand(N, A, B, C, false); 105 } 106 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 107 SDValue &B) { 108 // Don't apply the profitability check 109 return SelectImmShifterOperand(N, A, B, false); 110 } 111 112 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 113 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 114 115 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, 116 SDValue &Offset, SDValue &Opc); 117 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, 118 SDValue &Opc) { 119 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; 120 } 121 122 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, 123 SDValue &Opc) { 124 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; 125 } 126 127 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, 128 SDValue &Opc) { 129 SelectAddrMode2Worker(N, Base, Offset, Opc); 130// return SelectAddrMode2ShOp(N, Base, Offset, Opc); 131 // This always matches one way or another. 132 return true; 133 } 134 135 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 136 SDValue &Offset, SDValue &Opc); 137 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 138 SDValue &Offset, SDValue &Opc); 139 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 140 SDValue &Offset, SDValue &Opc); 141 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 142 bool SelectAddrMode3(SDValue N, SDValue &Base, 143 SDValue &Offset, SDValue &Opc); 144 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 145 SDValue &Offset, SDValue &Opc); 146 bool SelectAddrMode5(SDValue N, SDValue &Base, 147 SDValue &Offset); 148 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 149 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 150 151 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 152 153 // Thumb Addressing Modes: 154 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 155 bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset, 156 unsigned Scale); 157 bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset); 158 bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset); 159 bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset); 160 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 161 SDValue &OffImm); 162 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 163 SDValue &OffImm); 164 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 165 SDValue &OffImm); 166 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 167 SDValue &OffImm); 168 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 169 170 // Thumb 2 Addressing Modes: 171 bool SelectT2ShifterOperandReg(SDValue N, 172 SDValue &BaseReg, SDValue &Opc); 173 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 174 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 175 SDValue &OffImm); 176 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 177 SDValue &OffImm); 178 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 179 SDValue &OffReg, SDValue &ShImm); 180 181 inline bool is_so_imm(unsigned Imm) const { 182 return ARM_AM::getSOImmVal(Imm) != -1; 183 } 184 185 inline bool is_so_imm_not(unsigned Imm) const { 186 return ARM_AM::getSOImmVal(~Imm) != -1; 187 } 188 189 inline bool is_t2_so_imm(unsigned Imm) const { 190 return ARM_AM::getT2SOImmVal(Imm) != -1; 191 } 192 193 inline bool is_t2_so_imm_not(unsigned Imm) const { 194 return ARM_AM::getT2SOImmVal(~Imm) != -1; 195 } 196 197 // Include the pieces autogenerated from the target description. 198#include "ARMGenDAGISel.inc" 199 200private: 201 /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for 202 /// ARM. 203 SDNode *SelectARMIndexedLoad(SDNode *N); 204 SDNode *SelectT2IndexedLoad(SDNode *N); 205 206 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 207 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 208 /// loads of D registers and even subregs and odd subregs of Q registers. 209 /// For NumVecs <= 2, QOpcodes1 is not used. 210 SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 211 const uint16_t *DOpcodes, 212 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); 213 214 /// SelectVST - Select NEON store intrinsics. NumVecs should 215 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 216 /// stores of D registers and even subregs and odd subregs of Q registers. 217 /// For NumVecs <= 2, QOpcodes1 is not used. 218 SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 219 const uint16_t *DOpcodes, 220 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); 221 222 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 223 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 224 /// load/store of D registers and Q registers. 225 SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, 226 bool isUpdating, unsigned NumVecs, 227 const uint16_t *DOpcodes, const uint16_t *QOpcodes); 228 229 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 230 /// should be 2, 3 or 4. The opcode array specifies the instructions used 231 /// for loading D registers. (Q registers are not supported.) 232 SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 233 const uint16_t *Opcodes); 234 235 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, 236 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be 237 /// generated to force the table registers to be consecutive. 238 SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); 239 240 /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. 241 SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 242 243 /// SelectCMOVOp - Select CMOV instructions for ARM. 244 SDNode *SelectCMOVOp(SDNode *N); 245 SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 246 ARMCC::CondCodes CCVal, SDValue CCR, 247 SDValue InFlag); 248 SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 249 ARMCC::CondCodes CCVal, SDValue CCR, 250 SDValue InFlag); 251 SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 252 ARMCC::CondCodes CCVal, SDValue CCR, 253 SDValue InFlag); 254 SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 255 ARMCC::CondCodes CCVal, SDValue CCR, 256 SDValue InFlag); 257 258 // Select special operations if node forms integer ABS pattern 259 SDNode *SelectABSOp(SDNode *N); 260 261 SDNode *SelectInlineAsm(SDNode *N); 262 263 SDNode *SelectConcatVector(SDNode *N); 264 265 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 266 267 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 268 /// inline asm expressions. 269 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 270 char ConstraintCode, 271 std::vector<SDValue> &OutOps); 272 273 // Form pairs of consecutive R, S, D, or Q registers. 274 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 275 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 276 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 277 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 278 279 // Form sequences of 4 consecutive S, D, or Q registers. 280 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 281 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 282 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 283 284 // Get the alignment operand for a NEON VLD or VST instruction. 285 SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector); 286}; 287} 288 289/// isInt32Immediate - This method tests to see if the node is a 32-bit constant 290/// operand. If so Imm will receive the 32-bit value. 291static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 292 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 293 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 294 return true; 295 } 296 return false; 297} 298 299// isInt32Immediate - This method tests to see if a constant operand. 300// If so Imm will receive the 32 bit value. 301static bool isInt32Immediate(SDValue N, unsigned &Imm) { 302 return isInt32Immediate(N.getNode(), Imm); 303} 304 305// isOpcWithIntImmediate - This method tests to see if the node is a specific 306// opcode and that it has a immediate integer right operand. 307// If so Imm will receive the 32 bit value. 308static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 309 return N->getOpcode() == Opc && 310 isInt32Immediate(N->getOperand(1).getNode(), Imm); 311} 312 313/// \brief Check whether a particular node is a constant value representable as 314/// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 315/// 316/// \param ScaledConstant [out] - On success, the pre-scaled constant value. 317static bool isScaledConstantInRange(SDValue Node, int Scale, 318 int RangeMin, int RangeMax, 319 int &ScaledConstant) { 320 assert(Scale > 0 && "Invalid scale!"); 321 322 // Check that this is a constant. 323 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 324 if (!C) 325 return false; 326 327 ScaledConstant = (int) C->getZExtValue(); 328 if ((ScaledConstant % Scale) != 0) 329 return false; 330 331 ScaledConstant /= Scale; 332 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 333} 334 335void ARMDAGToDAGISel::PreprocessISelDAG() { 336 if (!Subtarget->hasV6T2Ops()) 337 return; 338 339 bool isThumb2 = Subtarget->isThumb(); 340 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 341 E = CurDAG->allnodes_end(); I != E; ) { 342 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 343 344 if (N->getOpcode() != ISD::ADD) 345 continue; 346 347 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 348 // leading zeros, followed by consecutive set bits, followed by 1 or 2 349 // trailing zeros, e.g. 1020. 350 // Transform the expression to 351 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 352 // of trailing zeros of c2. The left shift would be folded as an shifter 353 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 354 // node (UBFX). 355 356 SDValue N0 = N->getOperand(0); 357 SDValue N1 = N->getOperand(1); 358 unsigned And_imm = 0; 359 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 360 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 361 std::swap(N0, N1); 362 } 363 if (!And_imm) 364 continue; 365 366 // Check if the AND mask is an immediate of the form: 000.....1111111100 367 unsigned TZ = CountTrailingZeros_32(And_imm); 368 if (TZ != 1 && TZ != 2) 369 // Be conservative here. Shifter operands aren't always free. e.g. On 370 // Swift, left shifter operand of 1 / 2 for free but others are not. 371 // e.g. 372 // ubfx r3, r1, #16, #8 373 // ldr.w r3, [r0, r3, lsl #2] 374 // vs. 375 // mov.w r9, #1020 376 // and.w r2, r9, r1, lsr #14 377 // ldr r2, [r0, r2] 378 continue; 379 And_imm >>= TZ; 380 if (And_imm & (And_imm + 1)) 381 continue; 382 383 // Look for (and (srl X, c1), c2). 384 SDValue Srl = N1.getOperand(0); 385 unsigned Srl_imm = 0; 386 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 387 (Srl_imm <= 2)) 388 continue; 389 390 // Make sure first operand is not a shifter operand which would prevent 391 // folding of the left shift. 392 SDValue CPTmp0; 393 SDValue CPTmp1; 394 SDValue CPTmp2; 395 if (isThumb2) { 396 if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) 397 continue; 398 } else { 399 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 400 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 401 continue; 402 } 403 404 // Now make the transformation. 405 Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, 406 Srl.getOperand(0), 407 CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); 408 N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, 409 Srl, CurDAG->getConstant(And_imm, MVT::i32)); 410 N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, 411 N1, CurDAG->getConstant(TZ, MVT::i32)); 412 CurDAG->UpdateNodeOperands(N, N0, N1); 413 } 414} 415 416/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 417/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 418/// least on current ARM implementations) which should be avoidded. 419bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 420 if (OptLevel == CodeGenOpt::None) 421 return true; 422 423 if (!CheckVMLxHazard) 424 return true; 425 426 if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && 427 !Subtarget->isSwift()) 428 return true; 429 430 if (!N->hasOneUse()) 431 return false; 432 433 SDNode *Use = *N->use_begin(); 434 if (Use->getOpcode() == ISD::CopyToReg) 435 return true; 436 if (Use->isMachineOpcode()) { 437 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 438 if (MCID.mayStore()) 439 return true; 440 unsigned Opcode = MCID.getOpcode(); 441 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 442 return true; 443 // vmlx feeding into another vmlx. We actually want to unfold 444 // the use later in the MLxExpansion pass. e.g. 445 // vmla 446 // vmla (stall 8 cycles) 447 // 448 // vmul (5 cycles) 449 // vadd (5 cycles) 450 // vmla 451 // This adds up to about 18 - 19 cycles. 452 // 453 // vmla 454 // vmul (stall 4 cycles) 455 // vadd adds up to about 14 cycles. 456 return TII->isFpMLxInstruction(Opcode); 457 } 458 459 return false; 460} 461 462bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 463 ARM_AM::ShiftOpc ShOpcVal, 464 unsigned ShAmt) { 465 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 466 return true; 467 if (Shift.hasOneUse()) 468 return true; 469 // R << 2 is free. 470 return ShOpcVal == ARM_AM::lsl && 471 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 472} 473 474bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 475 SDValue &BaseReg, 476 SDValue &Opc, 477 bool CheckProfitability) { 478 if (DisableShifterOp) 479 return false; 480 481 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 482 483 // Don't match base register only case. That is matched to a separate 484 // lower complexity pattern with explicit register operand. 485 if (ShOpcVal == ARM_AM::no_shift) return false; 486 487 BaseReg = N.getOperand(0); 488 unsigned ShImmVal = 0; 489 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 490 if (!RHS) return false; 491 ShImmVal = RHS->getZExtValue() & 31; 492 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 493 MVT::i32); 494 return true; 495} 496 497bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 498 SDValue &BaseReg, 499 SDValue &ShReg, 500 SDValue &Opc, 501 bool CheckProfitability) { 502 if (DisableShifterOp) 503 return false; 504 505 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 506 507 // Don't match base register only case. That is matched to a separate 508 // lower complexity pattern with explicit register operand. 509 if (ShOpcVal == ARM_AM::no_shift) return false; 510 511 BaseReg = N.getOperand(0); 512 unsigned ShImmVal = 0; 513 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 514 if (RHS) return false; 515 516 ShReg = N.getOperand(1); 517 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 518 return false; 519 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 520 MVT::i32); 521 return true; 522} 523 524 525bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 526 SDValue &Base, 527 SDValue &OffImm) { 528 // Match simple R + imm12 operands. 529 530 // Base only. 531 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 532 !CurDAG->isBaseWithConstantOffset(N)) { 533 if (N.getOpcode() == ISD::FrameIndex) { 534 // Match frame index. 535 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 536 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 537 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 538 return true; 539 } 540 541 if (N.getOpcode() == ARMISD::Wrapper && 542 !(Subtarget->useMovt() && 543 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { 544 Base = N.getOperand(0); 545 } else 546 Base = N; 547 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 548 return true; 549 } 550 551 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 552 int RHSC = (int)RHS->getZExtValue(); 553 if (N.getOpcode() == ISD::SUB) 554 RHSC = -RHSC; 555 556 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 557 Base = N.getOperand(0); 558 if (Base.getOpcode() == ISD::FrameIndex) { 559 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 560 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 561 } 562 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); 563 return true; 564 } 565 } 566 567 // Base only. 568 Base = N; 569 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 570 return true; 571} 572 573 574 575bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 576 SDValue &Opc) { 577 if (N.getOpcode() == ISD::MUL && 578 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 579 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 580 // X * [3,5,9] -> X + X * [2,4,8] etc. 581 int RHSC = (int)RHS->getZExtValue(); 582 if (RHSC & 1) { 583 RHSC = RHSC & ~1; 584 ARM_AM::AddrOpc AddSub = ARM_AM::add; 585 if (RHSC < 0) { 586 AddSub = ARM_AM::sub; 587 RHSC = - RHSC; 588 } 589 if (isPowerOf2_32(RHSC)) { 590 unsigned ShAmt = Log2_32(RHSC); 591 Base = Offset = N.getOperand(0); 592 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 593 ARM_AM::lsl), 594 MVT::i32); 595 return true; 596 } 597 } 598 } 599 } 600 601 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 602 // ISD::OR that is equivalent to an ISD::ADD. 603 !CurDAG->isBaseWithConstantOffset(N)) 604 return false; 605 606 // Leave simple R +/- imm12 operands for LDRi12 607 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 608 int RHSC; 609 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 610 -0x1000+1, 0x1000, RHSC)) // 12 bits. 611 return false; 612 } 613 614 // Otherwise this is R +/- [possibly shifted] R. 615 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 616 ARM_AM::ShiftOpc ShOpcVal = 617 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 618 unsigned ShAmt = 0; 619 620 Base = N.getOperand(0); 621 Offset = N.getOperand(1); 622 623 if (ShOpcVal != ARM_AM::no_shift) { 624 // Check to see if the RHS of the shift is a constant, if not, we can't fold 625 // it. 626 if (ConstantSDNode *Sh = 627 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 628 ShAmt = Sh->getZExtValue(); 629 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 630 Offset = N.getOperand(1).getOperand(0); 631 else { 632 ShAmt = 0; 633 ShOpcVal = ARM_AM::no_shift; 634 } 635 } else { 636 ShOpcVal = ARM_AM::no_shift; 637 } 638 } 639 640 // Try matching (R shl C) + (R). 641 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 642 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 643 N.getOperand(0).hasOneUse())) { 644 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 645 if (ShOpcVal != ARM_AM::no_shift) { 646 // Check to see if the RHS of the shift is a constant, if not, we can't 647 // fold it. 648 if (ConstantSDNode *Sh = 649 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 650 ShAmt = Sh->getZExtValue(); 651 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 652 Offset = N.getOperand(0).getOperand(0); 653 Base = N.getOperand(1); 654 } else { 655 ShAmt = 0; 656 ShOpcVal = ARM_AM::no_shift; 657 } 658 } else { 659 ShOpcVal = ARM_AM::no_shift; 660 } 661 } 662 } 663 664 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 665 MVT::i32); 666 return true; 667} 668 669 670//----- 671 672AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, 673 SDValue &Base, 674 SDValue &Offset, 675 SDValue &Opc) { 676 if (N.getOpcode() == ISD::MUL && 677 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { 678 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 679 // X * [3,5,9] -> X + X * [2,4,8] etc. 680 int RHSC = (int)RHS->getZExtValue(); 681 if (RHSC & 1) { 682 RHSC = RHSC & ~1; 683 ARM_AM::AddrOpc AddSub = ARM_AM::add; 684 if (RHSC < 0) { 685 AddSub = ARM_AM::sub; 686 RHSC = - RHSC; 687 } 688 if (isPowerOf2_32(RHSC)) { 689 unsigned ShAmt = Log2_32(RHSC); 690 Base = Offset = N.getOperand(0); 691 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 692 ARM_AM::lsl), 693 MVT::i32); 694 return AM2_SHOP; 695 } 696 } 697 } 698 } 699 700 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 701 // ISD::OR that is equivalent to an ADD. 702 !CurDAG->isBaseWithConstantOffset(N)) { 703 Base = N; 704 if (N.getOpcode() == ISD::FrameIndex) { 705 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 706 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 707 } else if (N.getOpcode() == ARMISD::Wrapper && 708 !(Subtarget->useMovt() && 709 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { 710 Base = N.getOperand(0); 711 } 712 Offset = CurDAG->getRegister(0, MVT::i32); 713 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 714 ARM_AM::no_shift), 715 MVT::i32); 716 return AM2_BASE; 717 } 718 719 // Match simple R +/- imm12 operands. 720 if (N.getOpcode() != ISD::SUB) { 721 int RHSC; 722 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 723 -0x1000+1, 0x1000, RHSC)) { // 12 bits. 724 Base = N.getOperand(0); 725 if (Base.getOpcode() == ISD::FrameIndex) { 726 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 727 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 728 } 729 Offset = CurDAG->getRegister(0, MVT::i32); 730 731 ARM_AM::AddrOpc AddSub = ARM_AM::add; 732 if (RHSC < 0) { 733 AddSub = ARM_AM::sub; 734 RHSC = - RHSC; 735 } 736 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, 737 ARM_AM::no_shift), 738 MVT::i32); 739 return AM2_BASE; 740 } 741 } 742 743 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { 744 // Compute R +/- (R << N) and reuse it. 745 Base = N; 746 Offset = CurDAG->getRegister(0, MVT::i32); 747 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 748 ARM_AM::no_shift), 749 MVT::i32); 750 return AM2_BASE; 751 } 752 753 // Otherwise this is R +/- [possibly shifted] R. 754 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; 755 ARM_AM::ShiftOpc ShOpcVal = 756 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 757 unsigned ShAmt = 0; 758 759 Base = N.getOperand(0); 760 Offset = N.getOperand(1); 761 762 if (ShOpcVal != ARM_AM::no_shift) { 763 // Check to see if the RHS of the shift is a constant, if not, we can't fold 764 // it. 765 if (ConstantSDNode *Sh = 766 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 767 ShAmt = Sh->getZExtValue(); 768 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 769 Offset = N.getOperand(1).getOperand(0); 770 else { 771 ShAmt = 0; 772 ShOpcVal = ARM_AM::no_shift; 773 } 774 } else { 775 ShOpcVal = ARM_AM::no_shift; 776 } 777 } 778 779 // Try matching (R shl C) + (R). 780 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 781 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 782 N.getOperand(0).hasOneUse())) { 783 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 784 if (ShOpcVal != ARM_AM::no_shift) { 785 // Check to see if the RHS of the shift is a constant, if not, we can't 786 // fold it. 787 if (ConstantSDNode *Sh = 788 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 789 ShAmt = Sh->getZExtValue(); 790 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 791 Offset = N.getOperand(0).getOperand(0); 792 Base = N.getOperand(1); 793 } else { 794 ShAmt = 0; 795 ShOpcVal = ARM_AM::no_shift; 796 } 797 } else { 798 ShOpcVal = ARM_AM::no_shift; 799 } 800 } 801 } 802 803 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 804 MVT::i32); 805 return AM2_SHOP; 806} 807 808bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 809 SDValue &Offset, SDValue &Opc) { 810 unsigned Opcode = Op->getOpcode(); 811 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 812 ? cast<LoadSDNode>(Op)->getAddressingMode() 813 : cast<StoreSDNode>(Op)->getAddressingMode(); 814 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 815 ? ARM_AM::add : ARM_AM::sub; 816 int Val; 817 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 818 return false; 819 820 Offset = N; 821 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 822 unsigned ShAmt = 0; 823 if (ShOpcVal != ARM_AM::no_shift) { 824 // Check to see if the RHS of the shift is a constant, if not, we can't fold 825 // it. 826 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 827 ShAmt = Sh->getZExtValue(); 828 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 829 Offset = N.getOperand(0); 830 else { 831 ShAmt = 0; 832 ShOpcVal = ARM_AM::no_shift; 833 } 834 } else { 835 ShOpcVal = ARM_AM::no_shift; 836 } 837 } 838 839 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 840 MVT::i32); 841 return true; 842} 843 844bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 845 SDValue &Offset, SDValue &Opc) { 846 unsigned Opcode = Op->getOpcode(); 847 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 848 ? cast<LoadSDNode>(Op)->getAddressingMode() 849 : cast<StoreSDNode>(Op)->getAddressingMode(); 850 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 851 ? ARM_AM::add : ARM_AM::sub; 852 int Val; 853 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 854 if (AddSub == ARM_AM::sub) Val *= -1; 855 Offset = CurDAG->getRegister(0, MVT::i32); 856 Opc = CurDAG->getTargetConstant(Val, MVT::i32); 857 return true; 858 } 859 860 return false; 861} 862 863 864bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 865 SDValue &Offset, SDValue &Opc) { 866 unsigned Opcode = Op->getOpcode(); 867 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 868 ? cast<LoadSDNode>(Op)->getAddressingMode() 869 : cast<StoreSDNode>(Op)->getAddressingMode(); 870 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 871 ? ARM_AM::add : ARM_AM::sub; 872 int Val; 873 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 874 Offset = CurDAG->getRegister(0, MVT::i32); 875 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 876 ARM_AM::no_shift), 877 MVT::i32); 878 return true; 879 } 880 881 return false; 882} 883 884bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 885 Base = N; 886 return true; 887} 888 889bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 890 SDValue &Base, SDValue &Offset, 891 SDValue &Opc) { 892 if (N.getOpcode() == ISD::SUB) { 893 // X - C is canonicalize to X + -C, no need to handle it here. 894 Base = N.getOperand(0); 895 Offset = N.getOperand(1); 896 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32); 897 return true; 898 } 899 900 if (!CurDAG->isBaseWithConstantOffset(N)) { 901 Base = N; 902 if (N.getOpcode() == ISD::FrameIndex) { 903 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 904 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 905 } 906 Offset = CurDAG->getRegister(0, MVT::i32); 907 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); 908 return true; 909 } 910 911 // If the RHS is +/- imm8, fold into addr mode. 912 int RHSC; 913 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 914 -256 + 1, 256, RHSC)) { // 8 bits. 915 Base = N.getOperand(0); 916 if (Base.getOpcode() == ISD::FrameIndex) { 917 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 918 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 919 } 920 Offset = CurDAG->getRegister(0, MVT::i32); 921 922 ARM_AM::AddrOpc AddSub = ARM_AM::add; 923 if (RHSC < 0) { 924 AddSub = ARM_AM::sub; 925 RHSC = -RHSC; 926 } 927 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32); 928 return true; 929 } 930 931 Base = N.getOperand(0); 932 Offset = N.getOperand(1); 933 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32); 934 return true; 935} 936 937bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 938 SDValue &Offset, SDValue &Opc) { 939 unsigned Opcode = Op->getOpcode(); 940 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 941 ? cast<LoadSDNode>(Op)->getAddressingMode() 942 : cast<StoreSDNode>(Op)->getAddressingMode(); 943 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 944 ? ARM_AM::add : ARM_AM::sub; 945 int Val; 946 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 947 Offset = CurDAG->getRegister(0, MVT::i32); 948 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32); 949 return true; 950 } 951 952 Offset = N; 953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32); 954 return true; 955} 956 957bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 958 SDValue &Base, SDValue &Offset) { 959 if (!CurDAG->isBaseWithConstantOffset(N)) { 960 Base = N; 961 if (N.getOpcode() == ISD::FrameIndex) { 962 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 963 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 964 } else if (N.getOpcode() == ARMISD::Wrapper && 965 !(Subtarget->useMovt() && 966 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { 967 Base = N.getOperand(0); 968 } 969 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 970 MVT::i32); 971 return true; 972 } 973 974 // If the RHS is +/- imm8, fold into addr mode. 975 int RHSC; 976 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 977 -256 + 1, 256, RHSC)) { 978 Base = N.getOperand(0); 979 if (Base.getOpcode() == ISD::FrameIndex) { 980 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 981 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 982 } 983 984 ARM_AM::AddrOpc AddSub = ARM_AM::add; 985 if (RHSC < 0) { 986 AddSub = ARM_AM::sub; 987 RHSC = -RHSC; 988 } 989 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 990 MVT::i32); 991 return true; 992 } 993 994 Base = N; 995 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 996 MVT::i32); 997 return true; 998} 999 1000bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1001 SDValue &Align) { 1002 Addr = N; 1003 1004 unsigned Alignment = 0; 1005 if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) { 1006 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1007 // The maximum alignment is equal to the memory size being referenced. 1008 unsigned LSNAlign = LSN->getAlignment(); 1009 unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8; 1010 if (LSNAlign >= MemSize && MemSize > 1) 1011 Alignment = MemSize; 1012 } else { 1013 // All other uses of addrmode6 are for intrinsics. For now just record 1014 // the raw alignment value; it will be refined later based on the legal 1015 // alignment operands for the intrinsic. 1016 Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment(); 1017 } 1018 1019 Align = CurDAG->getTargetConstant(Alignment, MVT::i32); 1020 return true; 1021} 1022 1023bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1024 SDValue &Offset) { 1025 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1026 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1027 if (AM != ISD::POST_INC) 1028 return false; 1029 Offset = N; 1030 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1031 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1032 Offset = CurDAG->getRegister(0, MVT::i32); 1033 } 1034 return true; 1035} 1036 1037bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1038 SDValue &Offset, SDValue &Label) { 1039 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1040 Offset = N.getOperand(0); 1041 SDValue N1 = N.getOperand(1); 1042 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1043 MVT::i32); 1044 return true; 1045 } 1046 1047 return false; 1048} 1049 1050 1051//===----------------------------------------------------------------------===// 1052// Thumb Addressing Modes 1053//===----------------------------------------------------------------------===// 1054 1055bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, 1056 SDValue &Base, SDValue &Offset){ 1057 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1058 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1059 if (!NC || !NC->isNullValue()) 1060 return false; 1061 1062 Base = Offset = N; 1063 return true; 1064 } 1065 1066 Base = N.getOperand(0); 1067 Offset = N.getOperand(1); 1068 return true; 1069} 1070 1071bool 1072ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base, 1073 SDValue &Offset, unsigned Scale) { 1074 if (Scale == 4) { 1075 SDValue TmpBase, TmpOffImm; 1076 if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) 1077 return false; // We want to select tLDRspi / tSTRspi instead. 1078 1079 if (N.getOpcode() == ARMISD::Wrapper && 1080 N.getOperand(0).getOpcode() == ISD::TargetConstantPool) 1081 return false; // We want to select tLDRpci instead. 1082 } 1083 1084 if (!CurDAG->isBaseWithConstantOffset(N)) 1085 return false; 1086 1087 // Thumb does not have [sp, r] address mode. 1088 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1089 RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); 1090 if ((LHSR && LHSR->getReg() == ARM::SP) || 1091 (RHSR && RHSR->getReg() == ARM::SP)) 1092 return false; 1093 1094 // FIXME: Why do we explicitly check for a match here and then return false? 1095 // Presumably to allow something else to match, but shouldn't this be 1096 // documented? 1097 int RHSC; 1098 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) 1099 return false; 1100 1101 Base = N.getOperand(0); 1102 Offset = N.getOperand(1); 1103 return true; 1104} 1105 1106bool 1107ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N, 1108 SDValue &Base, 1109 SDValue &Offset) { 1110 return SelectThumbAddrModeRI(N, Base, Offset, 1); 1111} 1112 1113bool 1114ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N, 1115 SDValue &Base, 1116 SDValue &Offset) { 1117 return SelectThumbAddrModeRI(N, Base, Offset, 2); 1118} 1119 1120bool 1121ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N, 1122 SDValue &Base, 1123 SDValue &Offset) { 1124 return SelectThumbAddrModeRI(N, Base, Offset, 4); 1125} 1126 1127bool 1128ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1129 SDValue &Base, SDValue &OffImm) { 1130 if (Scale == 4) { 1131 SDValue TmpBase, TmpOffImm; 1132 if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) 1133 return false; // We want to select tLDRspi / tSTRspi instead. 1134 1135 if (N.getOpcode() == ARMISD::Wrapper && 1136 N.getOperand(0).getOpcode() == ISD::TargetConstantPool) 1137 return false; // We want to select tLDRpci instead. 1138 } 1139 1140 if (!CurDAG->isBaseWithConstantOffset(N)) { 1141 if (N.getOpcode() == ARMISD::Wrapper && 1142 !(Subtarget->useMovt() && 1143 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { 1144 Base = N.getOperand(0); 1145 } else { 1146 Base = N; 1147 } 1148 1149 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1150 return true; 1151 } 1152 1153 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1154 RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); 1155 if ((LHSR && LHSR->getReg() == ARM::SP) || 1156 (RHSR && RHSR->getReg() == ARM::SP)) { 1157 ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0)); 1158 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1159 unsigned LHSC = LHS ? LHS->getZExtValue() : 0; 1160 unsigned RHSC = RHS ? RHS->getZExtValue() : 0; 1161 1162 // Thumb does not have [sp, #imm5] address mode for non-zero imm5. 1163 if (LHSC != 0 || RHSC != 0) return false; 1164 1165 Base = N; 1166 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1167 return true; 1168 } 1169 1170 // If the RHS is + imm5 * scale, fold into addr mode. 1171 int RHSC; 1172 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1173 Base = N.getOperand(0); 1174 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); 1175 return true; 1176 } 1177 1178 Base = N.getOperand(0); 1179 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1180 return true; 1181} 1182 1183bool 1184ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1185 SDValue &OffImm) { 1186 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1187} 1188 1189bool 1190ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1191 SDValue &OffImm) { 1192 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1193} 1194 1195bool 1196ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1197 SDValue &OffImm) { 1198 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1199} 1200 1201bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1202 SDValue &Base, SDValue &OffImm) { 1203 if (N.getOpcode() == ISD::FrameIndex) { 1204 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1205 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 1206 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1207 return true; 1208 } 1209 1210 if (!CurDAG->isBaseWithConstantOffset(N)) 1211 return false; 1212 1213 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1214 if (N.getOperand(0).getOpcode() == ISD::FrameIndex || 1215 (LHSR && LHSR->getReg() == ARM::SP)) { 1216 // If the RHS is + imm8 * scale, fold into addr mode. 1217 int RHSC; 1218 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1219 Base = N.getOperand(0); 1220 if (Base.getOpcode() == ISD::FrameIndex) { 1221 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1222 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 1223 } 1224 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); 1225 return true; 1226 } 1227 } 1228 1229 return false; 1230} 1231 1232 1233//===----------------------------------------------------------------------===// 1234// Thumb 2 Addressing Modes 1235//===----------------------------------------------------------------------===// 1236 1237 1238bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, 1239 SDValue &Opc) { 1240 if (DisableShifterOp) 1241 return false; 1242 1243 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 1244 1245 // Don't match base register only case. That is matched to a separate 1246 // lower complexity pattern with explicit register operand. 1247 if (ShOpcVal == ARM_AM::no_shift) return false; 1248 1249 BaseReg = N.getOperand(0); 1250 unsigned ShImmVal = 0; 1251 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1252 ShImmVal = RHS->getZExtValue() & 31; 1253 Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal)); 1254 return true; 1255 } 1256 1257 return false; 1258} 1259 1260bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1261 SDValue &Base, SDValue &OffImm) { 1262 // Match simple R + imm12 operands. 1263 1264 // Base only. 1265 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1266 !CurDAG->isBaseWithConstantOffset(N)) { 1267 if (N.getOpcode() == ISD::FrameIndex) { 1268 // Match frame index. 1269 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1270 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 1271 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1272 return true; 1273 } 1274 1275 if (N.getOpcode() == ARMISD::Wrapper && 1276 !(Subtarget->useMovt() && 1277 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { 1278 Base = N.getOperand(0); 1279 if (Base.getOpcode() == ISD::TargetConstantPool) 1280 return false; // We want to select t2LDRpci instead. 1281 } else 1282 Base = N; 1283 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1284 return true; 1285 } 1286 1287 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1288 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1289 // Let t2LDRi8 handle (R - imm8). 1290 return false; 1291 1292 int RHSC = (int)RHS->getZExtValue(); 1293 if (N.getOpcode() == ISD::SUB) 1294 RHSC = -RHSC; 1295 1296 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1297 Base = N.getOperand(0); 1298 if (Base.getOpcode() == ISD::FrameIndex) { 1299 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1300 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 1301 } 1302 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); 1303 return true; 1304 } 1305 } 1306 1307 // Base only. 1308 Base = N; 1309 OffImm = CurDAG->getTargetConstant(0, MVT::i32); 1310 return true; 1311} 1312 1313bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1314 SDValue &Base, SDValue &OffImm) { 1315 // Match simple R - imm8 operands. 1316 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1317 !CurDAG->isBaseWithConstantOffset(N)) 1318 return false; 1319 1320 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1321 int RHSC = (int)RHS->getSExtValue(); 1322 if (N.getOpcode() == ISD::SUB) 1323 RHSC = -RHSC; 1324 1325 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1326 Base = N.getOperand(0); 1327 if (Base.getOpcode() == ISD::FrameIndex) { 1328 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1329 Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 1330 } 1331 OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); 1332 return true; 1333 } 1334 } 1335 1336 return false; 1337} 1338 1339bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1340 SDValue &OffImm){ 1341 unsigned Opcode = Op->getOpcode(); 1342 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1343 ? cast<LoadSDNode>(Op)->getAddressingMode() 1344 : cast<StoreSDNode>(Op)->getAddressingMode(); 1345 int RHSC; 1346 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1347 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1348 ? CurDAG->getTargetConstant(RHSC, MVT::i32) 1349 : CurDAG->getTargetConstant(-RHSC, MVT::i32); 1350 return true; 1351 } 1352 1353 return false; 1354} 1355 1356bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1357 SDValue &Base, 1358 SDValue &OffReg, SDValue &ShImm) { 1359 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1360 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1361 return false; 1362 1363 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1364 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1365 int RHSC = (int)RHS->getZExtValue(); 1366 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1367 return false; 1368 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1369 return false; 1370 } 1371 1372 // Look for (R + R) or (R + (R << [1,2,3])). 1373 unsigned ShAmt = 0; 1374 Base = N.getOperand(0); 1375 OffReg = N.getOperand(1); 1376 1377 // Swap if it is ((R << c) + R). 1378 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1379 if (ShOpcVal != ARM_AM::lsl) { 1380 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1381 if (ShOpcVal == ARM_AM::lsl) 1382 std::swap(Base, OffReg); 1383 } 1384 1385 if (ShOpcVal == ARM_AM::lsl) { 1386 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1387 // it. 1388 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1389 ShAmt = Sh->getZExtValue(); 1390 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1391 OffReg = OffReg.getOperand(0); 1392 else { 1393 ShAmt = 0; 1394 ShOpcVal = ARM_AM::no_shift; 1395 } 1396 } else { 1397 ShOpcVal = ARM_AM::no_shift; 1398 } 1399 } 1400 1401 ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32); 1402 1403 return true; 1404} 1405 1406//===--------------------------------------------------------------------===// 1407 1408/// getAL - Returns a ARMCC::AL immediate node. 1409static inline SDValue getAL(SelectionDAG *CurDAG) { 1410 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32); 1411} 1412 1413SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { 1414 LoadSDNode *LD = cast<LoadSDNode>(N); 1415 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1416 if (AM == ISD::UNINDEXED) 1417 return NULL; 1418 1419 EVT LoadedVT = LD->getMemoryVT(); 1420 SDValue Offset, AMOpc; 1421 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1422 unsigned Opcode = 0; 1423 bool Match = false; 1424 if (LoadedVT == MVT::i32 && isPre && 1425 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1426 Opcode = ARM::LDR_PRE_IMM; 1427 Match = true; 1428 } else if (LoadedVT == MVT::i32 && !isPre && 1429 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1430 Opcode = ARM::LDR_POST_IMM; 1431 Match = true; 1432 } else if (LoadedVT == MVT::i32 && 1433 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1434 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1435 Match = true; 1436 1437 } else if (LoadedVT == MVT::i16 && 1438 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1439 Match = true; 1440 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1441 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1442 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1443 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1444 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1445 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1446 Match = true; 1447 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1448 } 1449 } else { 1450 if (isPre && 1451 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1452 Match = true; 1453 Opcode = ARM::LDRB_PRE_IMM; 1454 } else if (!isPre && 1455 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1456 Match = true; 1457 Opcode = ARM::LDRB_POST_IMM; 1458 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1459 Match = true; 1460 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1461 } 1462 } 1463 } 1464 1465 if (Match) { 1466 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1467 SDValue Chain = LD->getChain(); 1468 SDValue Base = LD->getBasePtr(); 1469 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), 1470 CurDAG->getRegister(0, MVT::i32), Chain }; 1471 return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, 1472 MVT::i32, MVT::Other, Ops, 5); 1473 } else { 1474 SDValue Chain = LD->getChain(); 1475 SDValue Base = LD->getBasePtr(); 1476 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), 1477 CurDAG->getRegister(0, MVT::i32), Chain }; 1478 return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, 1479 MVT::i32, MVT::Other, Ops, 6); 1480 } 1481 } 1482 1483 return NULL; 1484} 1485 1486SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { 1487 LoadSDNode *LD = cast<LoadSDNode>(N); 1488 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1489 if (AM == ISD::UNINDEXED) 1490 return NULL; 1491 1492 EVT LoadedVT = LD->getMemoryVT(); 1493 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1494 SDValue Offset; 1495 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1496 unsigned Opcode = 0; 1497 bool Match = false; 1498 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1499 switch (LoadedVT.getSimpleVT().SimpleTy) { 1500 case MVT::i32: 1501 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1502 break; 1503 case MVT::i16: 1504 if (isSExtLd) 1505 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1506 else 1507 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1508 break; 1509 case MVT::i8: 1510 case MVT::i1: 1511 if (isSExtLd) 1512 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1513 else 1514 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1515 break; 1516 default: 1517 return NULL; 1518 } 1519 Match = true; 1520 } 1521 1522 if (Match) { 1523 SDValue Chain = LD->getChain(); 1524 SDValue Base = LD->getBasePtr(); 1525 SDValue Ops[]= { Base, Offset, getAL(CurDAG), 1526 CurDAG->getRegister(0, MVT::i32), Chain }; 1527 return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, 1528 MVT::Other, Ops, 5); 1529 } 1530 1531 return NULL; 1532} 1533 1534/// \brief Form a GPRPair pseudo register from a pair of GPR regs. 1535SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1536 DebugLoc dl = V0.getNode()->getDebugLoc(); 1537 SDValue RegClass = 1538 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); 1539 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); 1540 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); 1541 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1542 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); 1543} 1544 1545/// \brief Form a D register from a pair of S registers. 1546SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1547 DebugLoc dl = V0.getNode()->getDebugLoc(); 1548 SDValue RegClass = 1549 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); 1550 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); 1551 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); 1552 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1553 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); 1554} 1555 1556/// \brief Form a quad register from a pair of D registers. 1557SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1558 DebugLoc dl = V0.getNode()->getDebugLoc(); 1559 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); 1560 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); 1561 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); 1562 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1563 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); 1564} 1565 1566/// \brief Form 4 consecutive D registers from a pair of Q registers. 1567SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1568 DebugLoc dl = V0.getNode()->getDebugLoc(); 1569 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); 1570 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); 1571 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); 1572 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1573 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); 1574} 1575 1576/// \brief Form 4 consecutive S registers. 1577SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1578 SDValue V2, SDValue V3) { 1579 DebugLoc dl = V0.getNode()->getDebugLoc(); 1580 SDValue RegClass = 1581 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32); 1582 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); 1583 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); 1584 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32); 1585 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); 1586 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1587 V2, SubReg2, V3, SubReg3 }; 1588 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); 1589} 1590 1591/// \brief Form 4 consecutive D registers. 1592SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1593 SDValue V2, SDValue V3) { 1594 DebugLoc dl = V0.getNode()->getDebugLoc(); 1595 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); 1596 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); 1597 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); 1598 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); 1599 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); 1600 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1601 V2, SubReg2, V3, SubReg3 }; 1602 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); 1603} 1604 1605/// \brief Form 4 consecutive Q registers. 1606SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1607 SDValue V2, SDValue V3) { 1608 DebugLoc dl = V0.getNode()->getDebugLoc(); 1609 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); 1610 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); 1611 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); 1612 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32); 1613 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); 1614 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1615 V2, SubReg2, V3, SubReg3 }; 1616 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); 1617} 1618 1619/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1620/// of a NEON VLD or VST instruction. The supported values depend on the 1621/// number of registers being loaded. 1622SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, 1623 bool is64BitVector) { 1624 unsigned NumRegs = NumVecs; 1625 if (!is64BitVector && NumVecs < 3) 1626 NumRegs *= 2; 1627 1628 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1629 if (Alignment >= 32 && NumRegs == 4) 1630 Alignment = 32; 1631 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1632 Alignment = 16; 1633 else if (Alignment >= 8) 1634 Alignment = 8; 1635 else 1636 Alignment = 0; 1637 1638 return CurDAG->getTargetConstant(Alignment, MVT::i32); 1639} 1640 1641// Get the register stride update opcode of a VLD/VST instruction that 1642// is otherwise equivalent to the given fixed stride updating instruction. 1643static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1644 switch (Opc) { 1645 default: break; 1646 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1647 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1648 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1649 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1650 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1651 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1652 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1653 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1654 1655 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1656 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1657 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1658 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1659 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1660 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1661 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1662 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1663 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1664 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1665 1666 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1667 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1668 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1669 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1670 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1671 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1672 1673 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1674 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1675 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1676 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1677 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1678 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1679 1680 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1681 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1682 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1683 } 1684 return Opc; // If not one we handle, return it unchanged. 1685} 1686 1687SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1688 const uint16_t *DOpcodes, 1689 const uint16_t *QOpcodes0, 1690 const uint16_t *QOpcodes1) { 1691 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1692 DebugLoc dl = N->getDebugLoc(); 1693 1694 SDValue MemAddr, Align; 1695 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1696 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1697 return NULL; 1698 1699 SDValue Chain = N->getOperand(0); 1700 EVT VT = N->getValueType(0); 1701 bool is64BitVector = VT.is64BitVector(); 1702 Align = GetVLDSTAlign(Align, NumVecs, is64BitVector); 1703 1704 unsigned OpcodeIndex; 1705 switch (VT.getSimpleVT().SimpleTy) { 1706 default: llvm_unreachable("unhandled vld type"); 1707 // Double-register operations: 1708 case MVT::v8i8: OpcodeIndex = 0; break; 1709 case MVT::v4i16: OpcodeIndex = 1; break; 1710 case MVT::v2f32: 1711 case MVT::v2i32: OpcodeIndex = 2; break; 1712 case MVT::v1i64: OpcodeIndex = 3; break; 1713 // Quad-register operations: 1714 case MVT::v16i8: OpcodeIndex = 0; break; 1715 case MVT::v8i16: OpcodeIndex = 1; break; 1716 case MVT::v4f32: 1717 case MVT::v4i32: OpcodeIndex = 2; break; 1718 case MVT::v2i64: OpcodeIndex = 3; 1719 assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); 1720 break; 1721 } 1722 1723 EVT ResTy; 1724 if (NumVecs == 1) 1725 ResTy = VT; 1726 else { 1727 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1728 if (!is64BitVector) 1729 ResTyElts *= 2; 1730 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1731 } 1732 std::vector<EVT> ResTys; 1733 ResTys.push_back(ResTy); 1734 if (isUpdating) 1735 ResTys.push_back(MVT::i32); 1736 ResTys.push_back(MVT::Other); 1737 1738 SDValue Pred = getAL(CurDAG); 1739 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1740 SDNode *VLd; 1741 SmallVector<SDValue, 7> Ops; 1742 1743 // Double registers and VLD1/VLD2 quad registers are directly supported. 1744 if (is64BitVector || NumVecs <= 2) { 1745 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1746 QOpcodes0[OpcodeIndex]); 1747 Ops.push_back(MemAddr); 1748 Ops.push_back(Align); 1749 if (isUpdating) { 1750 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1751 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 1752 // case entirely when the rest are updated to that form, too. 1753 if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) 1754 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1755 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1756 // check for that explicitly too. Horribly hacky, but temporary. 1757 if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || 1758 !isa<ConstantSDNode>(Inc.getNode())) 1759 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 1760 } 1761 Ops.push_back(Pred); 1762 Ops.push_back(Reg0); 1763 Ops.push_back(Chain); 1764 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); 1765 1766 } else { 1767 // Otherwise, quad registers are loaded with two separate instructions, 1768 // where one loads the even registers and the other loads the odd registers. 1769 EVT AddrTy = MemAddr.getValueType(); 1770 1771 // Load the even subregs. This is always an updating load, so that it 1772 // provides the address to the second load for the odd subregs. 1773 SDValue ImplDef = 1774 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1775 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1776 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1777 ResTy, AddrTy, MVT::Other, OpsA, 7); 1778 Chain = SDValue(VLdA, 2); 1779 1780 // Load the odd subregs. 1781 Ops.push_back(SDValue(VLdA, 1)); 1782 Ops.push_back(Align); 1783 if (isUpdating) { 1784 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1785 assert(isa<ConstantSDNode>(Inc.getNode()) && 1786 "only constant post-increment update allowed for VLD3/4"); 1787 (void)Inc; 1788 Ops.push_back(Reg0); 1789 } 1790 Ops.push_back(SDValue(VLdA, 0)); 1791 Ops.push_back(Pred); 1792 Ops.push_back(Reg0); 1793 Ops.push_back(Chain); 1794 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 1795 Ops.data(), Ops.size()); 1796 } 1797 1798 // Transfer memoperands. 1799 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1800 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1801 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 1802 1803 if (NumVecs == 1) 1804 return VLd; 1805 1806 // Extract out the subregisters. 1807 SDValue SuperReg = SDValue(VLd, 0); 1808 assert(ARM::dsub_7 == ARM::dsub_0+7 && 1809 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); 1810 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1811 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1812 ReplaceUses(SDValue(N, Vec), 1813 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1814 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1815 if (isUpdating) 1816 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1817 return NULL; 1818} 1819 1820SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 1821 const uint16_t *DOpcodes, 1822 const uint16_t *QOpcodes0, 1823 const uint16_t *QOpcodes1) { 1824 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 1825 DebugLoc dl = N->getDebugLoc(); 1826 1827 SDValue MemAddr, Align; 1828 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1829 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 1830 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1831 return NULL; 1832 1833 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1834 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1835 1836 SDValue Chain = N->getOperand(0); 1837 EVT VT = N->getOperand(Vec0Idx).getValueType(); 1838 bool is64BitVector = VT.is64BitVector(); 1839 Align = GetVLDSTAlign(Align, NumVecs, is64BitVector); 1840 1841 unsigned OpcodeIndex; 1842 switch (VT.getSimpleVT().SimpleTy) { 1843 default: llvm_unreachable("unhandled vst type"); 1844 // Double-register operations: 1845 case MVT::v8i8: OpcodeIndex = 0; break; 1846 case MVT::v4i16: OpcodeIndex = 1; break; 1847 case MVT::v2f32: 1848 case MVT::v2i32: OpcodeIndex = 2; break; 1849 case MVT::v1i64: OpcodeIndex = 3; break; 1850 // Quad-register operations: 1851 case MVT::v16i8: OpcodeIndex = 0; break; 1852 case MVT::v8i16: OpcodeIndex = 1; break; 1853 case MVT::v4f32: 1854 case MVT::v4i32: OpcodeIndex = 2; break; 1855 case MVT::v2i64: OpcodeIndex = 3; 1856 assert(NumVecs == 1 && "v2i64 type only supported for VST1"); 1857 break; 1858 } 1859 1860 std::vector<EVT> ResTys; 1861 if (isUpdating) 1862 ResTys.push_back(MVT::i32); 1863 ResTys.push_back(MVT::Other); 1864 1865 SDValue Pred = getAL(CurDAG); 1866 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1867 SmallVector<SDValue, 7> Ops; 1868 1869 // Double registers and VST1/VST2 quad registers are directly supported. 1870 if (is64BitVector || NumVecs <= 2) { 1871 SDValue SrcReg; 1872 if (NumVecs == 1) { 1873 SrcReg = N->getOperand(Vec0Idx); 1874 } else if (is64BitVector) { 1875 // Form a REG_SEQUENCE to force register allocation. 1876 SDValue V0 = N->getOperand(Vec0Idx + 0); 1877 SDValue V1 = N->getOperand(Vec0Idx + 1); 1878 if (NumVecs == 2) 1879 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 1880 else { 1881 SDValue V2 = N->getOperand(Vec0Idx + 2); 1882 // If it's a vst3, form a quad D-register and leave the last part as 1883 // an undef. 1884 SDValue V3 = (NumVecs == 3) 1885 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 1886 : N->getOperand(Vec0Idx + 3); 1887 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 1888 } 1889 } else { 1890 // Form a QQ register. 1891 SDValue Q0 = N->getOperand(Vec0Idx); 1892 SDValue Q1 = N->getOperand(Vec0Idx + 1); 1893 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 1894 } 1895 1896 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1897 QOpcodes0[OpcodeIndex]); 1898 Ops.push_back(MemAddr); 1899 Ops.push_back(Align); 1900 if (isUpdating) { 1901 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1902 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 1903 // case entirely when the rest are updated to that form, too. 1904 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) 1905 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1906 // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so 1907 // check for that explicitly too. Horribly hacky, but temporary. 1908 if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || 1909 !isa<ConstantSDNode>(Inc.getNode())) 1910 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 1911 } 1912 Ops.push_back(SrcReg); 1913 Ops.push_back(Pred); 1914 Ops.push_back(Reg0); 1915 Ops.push_back(Chain); 1916 SDNode *VSt = 1917 CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); 1918 1919 // Transfer memoperands. 1920 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 1921 1922 return VSt; 1923 } 1924 1925 // Otherwise, quad registers are stored with two separate instructions, 1926 // where one stores the even registers and the other stores the odd registers. 1927 1928 // Form the QQQQ REG_SEQUENCE. 1929 SDValue V0 = N->getOperand(Vec0Idx + 0); 1930 SDValue V1 = N->getOperand(Vec0Idx + 1); 1931 SDValue V2 = N->getOperand(Vec0Idx + 2); 1932 SDValue V3 = (NumVecs == 3) 1933 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 1934 : N->getOperand(Vec0Idx + 3); 1935 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 1936 1937 // Store the even D registers. This is always an updating store, so that it 1938 // provides the address to the second store for the odd subregs. 1939 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 1940 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1941 MemAddr.getValueType(), 1942 MVT::Other, OpsA, 7); 1943 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); 1944 Chain = SDValue(VStA, 1); 1945 1946 // Store the odd D registers. 1947 Ops.push_back(SDValue(VStA, 0)); 1948 Ops.push_back(Align); 1949 if (isUpdating) { 1950 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1951 assert(isa<ConstantSDNode>(Inc.getNode()) && 1952 "only constant post-increment update allowed for VST3/4"); 1953 (void)Inc; 1954 Ops.push_back(Reg0); 1955 } 1956 Ops.push_back(RegSeq); 1957 Ops.push_back(Pred); 1958 Ops.push_back(Reg0); 1959 Ops.push_back(Chain); 1960 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 1961 Ops.data(), Ops.size()); 1962 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); 1963 return VStB; 1964} 1965 1966SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, 1967 bool isUpdating, unsigned NumVecs, 1968 const uint16_t *DOpcodes, 1969 const uint16_t *QOpcodes) { 1970 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 1971 DebugLoc dl = N->getDebugLoc(); 1972 1973 SDValue MemAddr, Align; 1974 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1975 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 1976 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1977 return NULL; 1978 1979 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1980 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1981 1982 SDValue Chain = N->getOperand(0); 1983 unsigned Lane = 1984 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 1985 EVT VT = N->getOperand(Vec0Idx).getValueType(); 1986 bool is64BitVector = VT.is64BitVector(); 1987 1988 unsigned Alignment = 0; 1989 if (NumVecs != 3) { 1990 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1991 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 1992 if (Alignment > NumBytes) 1993 Alignment = NumBytes; 1994 if (Alignment < 8 && Alignment < NumBytes) 1995 Alignment = 0; 1996 // Alignment must be a power of two; make sure of that. 1997 Alignment = (Alignment & -Alignment); 1998 if (Alignment == 1) 1999 Alignment = 0; 2000 } 2001 Align = CurDAG->getTargetConstant(Alignment, MVT::i32); 2002 2003 unsigned OpcodeIndex; 2004 switch (VT.getSimpleVT().SimpleTy) { 2005 default: llvm_unreachable("unhandled vld/vst lane type"); 2006 // Double-register operations: 2007 case MVT::v8i8: OpcodeIndex = 0; break; 2008 case MVT::v4i16: OpcodeIndex = 1; break; 2009 case MVT::v2f32: 2010 case MVT::v2i32: OpcodeIndex = 2; break; 2011 // Quad-register operations: 2012 case MVT::v8i16: OpcodeIndex = 0; break; 2013 case MVT::v4f32: 2014 case MVT::v4i32: OpcodeIndex = 1; break; 2015 } 2016 2017 std::vector<EVT> ResTys; 2018 if (IsLoad) { 2019 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2020 if (!is64BitVector) 2021 ResTyElts *= 2; 2022 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2023 MVT::i64, ResTyElts)); 2024 } 2025 if (isUpdating) 2026 ResTys.push_back(MVT::i32); 2027 ResTys.push_back(MVT::Other); 2028 2029 SDValue Pred = getAL(CurDAG); 2030 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2031 2032 SmallVector<SDValue, 8> Ops; 2033 Ops.push_back(MemAddr); 2034 Ops.push_back(Align); 2035 if (isUpdating) { 2036 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2037 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 2038 } 2039 2040 SDValue SuperReg; 2041 SDValue V0 = N->getOperand(Vec0Idx + 0); 2042 SDValue V1 = N->getOperand(Vec0Idx + 1); 2043 if (NumVecs == 2) { 2044 if (is64BitVector) 2045 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2046 else 2047 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2048 } else { 2049 SDValue V2 = N->getOperand(Vec0Idx + 2); 2050 SDValue V3 = (NumVecs == 3) 2051 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2052 : N->getOperand(Vec0Idx + 3); 2053 if (is64BitVector) 2054 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2055 else 2056 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2057 } 2058 Ops.push_back(SuperReg); 2059 Ops.push_back(getI32Imm(Lane)); 2060 Ops.push_back(Pred); 2061 Ops.push_back(Reg0); 2062 Ops.push_back(Chain); 2063 2064 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2065 QOpcodes[OpcodeIndex]); 2066 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, 2067 Ops.data(), Ops.size()); 2068 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 2069 if (!IsLoad) 2070 return VLdLn; 2071 2072 // Extract the subregisters. 2073 SuperReg = SDValue(VLdLn, 0); 2074 assert(ARM::dsub_7 == ARM::dsub_0+7 && 2075 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); 2076 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2077 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2078 ReplaceUses(SDValue(N, Vec), 2079 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2080 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2081 if (isUpdating) 2082 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2083 return NULL; 2084} 2085 2086SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, 2087 unsigned NumVecs, 2088 const uint16_t *Opcodes) { 2089 assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2090 DebugLoc dl = N->getDebugLoc(); 2091 2092 SDValue MemAddr, Align; 2093 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) 2094 return NULL; 2095 2096 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2097 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2098 2099 SDValue Chain = N->getOperand(0); 2100 EVT VT = N->getValueType(0); 2101 2102 unsigned Alignment = 0; 2103 if (NumVecs != 3) { 2104 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2105 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2106 if (Alignment > NumBytes) 2107 Alignment = NumBytes; 2108 if (Alignment < 8 && Alignment < NumBytes) 2109 Alignment = 0; 2110 // Alignment must be a power of two; make sure of that. 2111 Alignment = (Alignment & -Alignment); 2112 if (Alignment == 1) 2113 Alignment = 0; 2114 } 2115 Align = CurDAG->getTargetConstant(Alignment, MVT::i32); 2116 2117 unsigned OpcodeIndex; 2118 switch (VT.getSimpleVT().SimpleTy) { 2119 default: llvm_unreachable("unhandled vld-dup type"); 2120 case MVT::v8i8: OpcodeIndex = 0; break; 2121 case MVT::v4i16: OpcodeIndex = 1; break; 2122 case MVT::v2f32: 2123 case MVT::v2i32: OpcodeIndex = 2; break; 2124 } 2125 2126 SDValue Pred = getAL(CurDAG); 2127 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2128 SDValue SuperReg; 2129 unsigned Opc = Opcodes[OpcodeIndex]; 2130 SmallVector<SDValue, 6> Ops; 2131 Ops.push_back(MemAddr); 2132 Ops.push_back(Align); 2133 if (isUpdating) { 2134 // fixed-stride update instructions don't have an explicit writeback 2135 // operand. It's implicit in the opcode itself. 2136 SDValue Inc = N->getOperand(2); 2137 if (!isa<ConstantSDNode>(Inc.getNode())) 2138 Ops.push_back(Inc); 2139 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2140 else if (NumVecs > 2) 2141 Ops.push_back(Reg0); 2142 } 2143 Ops.push_back(Pred); 2144 Ops.push_back(Reg0); 2145 Ops.push_back(Chain); 2146 2147 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2148 std::vector<EVT> ResTys; 2149 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); 2150 if (isUpdating) 2151 ResTys.push_back(MVT::i32); 2152 ResTys.push_back(MVT::Other); 2153 SDNode *VLdDup = 2154 CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); 2155 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 2156 SuperReg = SDValue(VLdDup, 0); 2157 2158 // Extract the subregisters. 2159 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2160 unsigned SubIdx = ARM::dsub_0; 2161 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2162 ReplaceUses(SDValue(N, Vec), 2163 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2164 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2165 if (isUpdating) 2166 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2167 return NULL; 2168} 2169 2170SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, 2171 unsigned Opc) { 2172 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); 2173 DebugLoc dl = N->getDebugLoc(); 2174 EVT VT = N->getValueType(0); 2175 unsigned FirstTblReg = IsExt ? 2 : 1; 2176 2177 // Form a REG_SEQUENCE to force register allocation. 2178 SDValue RegSeq; 2179 SDValue V0 = N->getOperand(FirstTblReg + 0); 2180 SDValue V1 = N->getOperand(FirstTblReg + 1); 2181 if (NumVecs == 2) 2182 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 2183 else { 2184 SDValue V2 = N->getOperand(FirstTblReg + 2); 2185 // If it's a vtbl3, form a quad D-register and leave the last part as 2186 // an undef. 2187 SDValue V3 = (NumVecs == 3) 2188 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2189 : N->getOperand(FirstTblReg + 3); 2190 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2191 } 2192 2193 SmallVector<SDValue, 6> Ops; 2194 if (IsExt) 2195 Ops.push_back(N->getOperand(1)); 2196 Ops.push_back(RegSeq); 2197 Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); 2198 Ops.push_back(getAL(CurDAG)); // predicate 2199 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register 2200 return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size()); 2201} 2202 2203SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, 2204 bool isSigned) { 2205 if (!Subtarget->hasV6T2Ops()) 2206 return NULL; 2207 2208 unsigned Opc = isSigned 2209 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2210 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2211 2212 // For unsigned extracts, check for a shift right and mask 2213 unsigned And_imm = 0; 2214 if (N->getOpcode() == ISD::AND) { 2215 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2216 2217 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2218 if (And_imm & (And_imm + 1)) 2219 return NULL; 2220 2221 unsigned Srl_imm = 0; 2222 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2223 Srl_imm)) { 2224 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2225 2226 // Note: The width operand is encoded as width-1. 2227 unsigned Width = CountTrailingOnes_32(And_imm) - 1; 2228 unsigned LSB = Srl_imm; 2229 2230 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2231 2232 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2233 // It's cheaper to use a right shift to extract the top bits. 2234 if (Subtarget->isThumb()) { 2235 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2236 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2237 CurDAG->getTargetConstant(LSB, MVT::i32), 2238 getAL(CurDAG), Reg0, Reg0 }; 2239 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); 2240 } 2241 2242 // ARM models shift instructions as MOVsi with shifter operand. 2243 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2244 SDValue ShOpc = 2245 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), 2246 MVT::i32); 2247 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2248 getAL(CurDAG), Reg0, Reg0 }; 2249 return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); 2250 } 2251 2252 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2253 CurDAG->getTargetConstant(LSB, MVT::i32), 2254 CurDAG->getTargetConstant(Width, MVT::i32), 2255 getAL(CurDAG), Reg0 }; 2256 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); 2257 } 2258 } 2259 return NULL; 2260 } 2261 2262 // Otherwise, we're looking for a shift of a shift 2263 unsigned Shl_imm = 0; 2264 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2265 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2266 unsigned Srl_imm = 0; 2267 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2268 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2269 // Note: The width operand is encoded as width-1. 2270 unsigned Width = 32 - Srl_imm - 1; 2271 int LSB = Srl_imm - Shl_imm; 2272 if (LSB < 0) 2273 return NULL; 2274 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2275 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2276 CurDAG->getTargetConstant(LSB, MVT::i32), 2277 CurDAG->getTargetConstant(Width, MVT::i32), 2278 getAL(CurDAG), Reg0 }; 2279 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); 2280 } 2281 } 2282 return NULL; 2283} 2284 2285SDNode *ARMDAGToDAGISel:: 2286SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 2287 ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { 2288 SDValue CPTmp0; 2289 SDValue CPTmp1; 2290 if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { 2291 unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue(); 2292 unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); 2293 unsigned Opc = 0; 2294 switch (SOShOp) { 2295 case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; 2296 case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; 2297 case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; 2298 case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; 2299 default: 2300 llvm_unreachable("Unknown so_reg opcode!"); 2301 } 2302 SDValue SOShImm = 2303 CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); 2304 SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); 2305 SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; 2306 return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6); 2307 } 2308 return 0; 2309} 2310 2311SDNode *ARMDAGToDAGISel:: 2312SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 2313 ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { 2314 SDValue CPTmp0; 2315 SDValue CPTmp1; 2316 SDValue CPTmp2; 2317 if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { 2318 SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); 2319 SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag }; 2320 return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6); 2321 } 2322 2323 if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { 2324 SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); 2325 SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; 2326 return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7); 2327 } 2328 return 0; 2329} 2330 2331SDNode *ARMDAGToDAGISel:: 2332SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 2333 ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { 2334 ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); 2335 if (!T) 2336 return 0; 2337 2338 unsigned Opc = 0; 2339 unsigned TrueImm = T->getZExtValue(); 2340 if (is_t2_so_imm(TrueImm)) { 2341 Opc = ARM::t2MOVCCi; 2342 } else if (TrueImm <= 0xffff) { 2343 Opc = ARM::t2MOVCCi16; 2344 } else if (is_t2_so_imm_not(TrueImm)) { 2345 TrueImm = ~TrueImm; 2346 Opc = ARM::t2MVNCCi; 2347 } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) { 2348 // Large immediate. 2349 Opc = ARM::t2MOVCCi32imm; 2350 } 2351 2352 if (Opc) { 2353 SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); 2354 SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); 2355 SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; 2356 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); 2357 } 2358 2359 return 0; 2360} 2361 2362SDNode *ARMDAGToDAGISel:: 2363SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, 2364 ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { 2365 ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); 2366 if (!T) 2367 return 0; 2368 2369 unsigned Opc = 0; 2370 unsigned TrueImm = T->getZExtValue(); 2371 bool isSoImm = is_so_imm(TrueImm); 2372 if (isSoImm) { 2373 Opc = ARM::MOVCCi; 2374 } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) { 2375 Opc = ARM::MOVCCi16; 2376 } else if (is_so_imm_not(TrueImm)) { 2377 TrueImm = ~TrueImm; 2378 Opc = ARM::MVNCCi; 2379 } else if (TrueVal.getNode()->hasOneUse() && 2380 (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) { 2381 // Large immediate. 2382 Opc = ARM::MOVCCi32imm; 2383 } 2384 2385 if (Opc) { 2386 SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); 2387 SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); 2388 SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; 2389 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); 2390 } 2391 2392 return 0; 2393} 2394 2395SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { 2396 EVT VT = N->getValueType(0); 2397 SDValue FalseVal = N->getOperand(0); 2398 SDValue TrueVal = N->getOperand(1); 2399 SDValue CC = N->getOperand(2); 2400 SDValue CCR = N->getOperand(3); 2401 SDValue InFlag = N->getOperand(4); 2402 assert(CC.getOpcode() == ISD::Constant); 2403 assert(CCR.getOpcode() == ISD::Register); 2404 ARMCC::CondCodes CCVal = 2405 (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue(); 2406 2407 if (!Subtarget->isThumb1Only() && VT == MVT::i32) { 2408 // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) 2409 // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) 2410 // Pattern complexity = 18 cost = 1 size = 0 2411 if (Subtarget->isThumb()) { 2412 SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, 2413 CCVal, CCR, InFlag); 2414 if (!Res) 2415 Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal, 2416 ARMCC::getOppositeCondition(CCVal), CCR, InFlag); 2417 if (Res) 2418 return Res; 2419 } else { 2420 SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal, 2421 CCVal, CCR, InFlag); 2422 if (!Res) 2423 Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal, 2424 ARMCC::getOppositeCondition(CCVal), CCR, InFlag); 2425 if (Res) 2426 return Res; 2427 } 2428 2429 // Pattern: (ARMcmov:i32 GPR:i32:$false, 2430 // (imm:i32)<<P:Pred_so_imm>>:$true, 2431 // (imm:i32):$cc) 2432 // Emits: (MOVCCi:i32 GPR:i32:$false, 2433 // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) 2434 // Pattern complexity = 10 cost = 1 size = 0 2435 if (Subtarget->isThumb()) { 2436 SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal, 2437 CCVal, CCR, InFlag); 2438 if (!Res) 2439 Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal, 2440 ARMCC::getOppositeCondition(CCVal), CCR, InFlag); 2441 if (Res) 2442 return Res; 2443 } else { 2444 SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal, 2445 CCVal, CCR, InFlag); 2446 if (!Res) 2447 Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal, 2448 ARMCC::getOppositeCondition(CCVal), CCR, InFlag); 2449 if (Res) 2450 return Res; 2451 } 2452 } 2453 2454 // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) 2455 // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) 2456 // Pattern complexity = 6 cost = 1 size = 0 2457 // 2458 // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) 2459 // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) 2460 // Pattern complexity = 6 cost = 11 size = 0 2461 // 2462 // Also VMOVScc and VMOVDcc. 2463 SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); 2464 SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; 2465 unsigned Opc = 0; 2466 switch (VT.getSimpleVT().SimpleTy) { 2467 default: llvm_unreachable("Illegal conditional move type!"); 2468 case MVT::i32: 2469 Opc = Subtarget->isThumb() 2470 ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) 2471 : ARM::MOVCCr; 2472 break; 2473 case MVT::f32: 2474 Opc = ARM::VMOVScc; 2475 break; 2476 case MVT::f64: 2477 Opc = ARM::VMOVDcc; 2478 break; 2479 } 2480 return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); 2481} 2482 2483/// Target-specific DAG combining for ISD::XOR. 2484/// Target-independent combining lowers SELECT_CC nodes of the form 2485/// select_cc setg[ge] X, 0, X, -X 2486/// select_cc setgt X, -1, X, -X 2487/// select_cc setl[te] X, 0, -X, X 2488/// select_cc setlt X, 1, -X, X 2489/// which represent Integer ABS into: 2490/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2491/// ARM instruction selection detects the latter and matches it to 2492/// ARM::ABS or ARM::t2ABS machine node. 2493SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ 2494 SDValue XORSrc0 = N->getOperand(0); 2495 SDValue XORSrc1 = N->getOperand(1); 2496 EVT VT = N->getValueType(0); 2497 2498 if (Subtarget->isThumb1Only()) 2499 return NULL; 2500 2501 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2502 return NULL; 2503 2504 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2505 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2506 SDValue SRASrc0 = XORSrc1.getOperand(0); 2507 SDValue SRASrc1 = XORSrc1.getOperand(1); 2508 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2509 EVT XType = SRASrc0.getValueType(); 2510 unsigned Size = XType.getSizeInBits() - 1; 2511 2512 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2513 XType.isInteger() && SRAConstant != NULL && 2514 Size == SRAConstant->getZExtValue()) { 2515 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2516 return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2517 } 2518 2519 return NULL; 2520} 2521 2522SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { 2523 // The only time a CONCAT_VECTORS operation can have legal types is when 2524 // two 64-bit vectors are concatenated to a 128-bit vector. 2525 EVT VT = N->getValueType(0); 2526 if (!VT.is128BitVector() || N->getNumOperands() != 2) 2527 llvm_unreachable("unexpected CONCAT_VECTORS"); 2528 return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); 2529} 2530 2531SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 2532 SmallVector<SDValue, 6> Ops; 2533 Ops.push_back(Node->getOperand(1)); // Ptr 2534 Ops.push_back(Node->getOperand(2)); // Low part of Val1 2535 Ops.push_back(Node->getOperand(3)); // High part of Val1 2536 if (Opc == ARM::ATOMCMPXCHG6432) { 2537 Ops.push_back(Node->getOperand(4)); // Low part of Val2 2538 Ops.push_back(Node->getOperand(5)); // High part of Val2 2539 } 2540 Ops.push_back(Node->getOperand(0)); // Chain 2541 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2542 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 2543 SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), 2544 MVT::i32, MVT::i32, MVT::Other, 2545 Ops.data() ,Ops.size()); 2546 cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 2547 return ResNode; 2548} 2549 2550SDNode *ARMDAGToDAGISel::Select(SDNode *N) { 2551 DebugLoc dl = N->getDebugLoc(); 2552 2553 if (N->isMachineOpcode()) 2554 return NULL; // Already selected. 2555 2556 switch (N->getOpcode()) { 2557 default: break; 2558 case ISD::INLINEASM: { 2559 SDNode *ResNode = SelectInlineAsm(N); 2560 if (ResNode) 2561 return ResNode; 2562 break; 2563 } 2564 case ISD::XOR: { 2565 // Select special operations if XOR node forms integer ABS pattern 2566 SDNode *ResNode = SelectABSOp(N); 2567 if (ResNode) 2568 return ResNode; 2569 // Other cases are autogenerated. 2570 break; 2571 } 2572 case ISD::Constant: { 2573 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2574 bool UseCP = true; 2575 if (Subtarget->hasThumb2()) 2576 // Thumb2-aware targets have the MOVT instruction, so all immediates can 2577 // be done with MOV + MOVT, at worst. 2578 UseCP = 0; 2579 else { 2580 if (Subtarget->isThumb()) { 2581 UseCP = (Val > 255 && // MOV 2582 ~Val > 255 && // MOV + MVN 2583 !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL 2584 } else 2585 UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV 2586 ARM_AM::getSOImmVal(~Val) == -1 && // MVN 2587 !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. 2588 } 2589 2590 if (UseCP) { 2591 SDValue CPIdx = 2592 CurDAG->getTargetConstantPool(ConstantInt::get( 2593 Type::getInt32Ty(*CurDAG->getContext()), Val), 2594 TLI.getPointerTy()); 2595 2596 SDNode *ResNode; 2597 if (Subtarget->isThumb1Only()) { 2598 SDValue Pred = getAL(CurDAG); 2599 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2600 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; 2601 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2602 Ops, 4); 2603 } else { 2604 SDValue Ops[] = { 2605 CPIdx, 2606 CurDAG->getTargetConstant(0, MVT::i32), 2607 getAL(CurDAG), 2608 CurDAG->getRegister(0, MVT::i32), 2609 CurDAG->getEntryNode() 2610 }; 2611 ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2612 Ops, 5); 2613 } 2614 ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); 2615 return NULL; 2616 } 2617 2618 // Other cases are autogenerated. 2619 break; 2620 } 2621 case ISD::FrameIndex: { 2622 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2623 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2624 SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 2625 if (Subtarget->isThumb1Only()) { 2626 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), 2627 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; 2628 return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4); 2629 } else { 2630 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2631 ARM::t2ADDri : ARM::ADDri); 2632 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), 2633 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), 2634 CurDAG->getRegister(0, MVT::i32) }; 2635 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); 2636 } 2637 } 2638 case ISD::SRL: 2639 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) 2640 return I; 2641 break; 2642 case ISD::SRA: 2643 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) 2644 return I; 2645 break; 2646 case ISD::MUL: 2647 if (Subtarget->isThumb1Only()) 2648 break; 2649 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2650 unsigned RHSV = C->getZExtValue(); 2651 if (!RHSV) break; 2652 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2653 unsigned ShImm = Log2_32(RHSV-1); 2654 if (ShImm >= 32) 2655 break; 2656 SDValue V = N->getOperand(0); 2657 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2658 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); 2659 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2660 if (Subtarget->isThumb()) { 2661 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; 2662 return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); 2663 } else { 2664 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; 2665 return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7); 2666 } 2667 } 2668 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2669 unsigned ShImm = Log2_32(RHSV+1); 2670 if (ShImm >= 32) 2671 break; 2672 SDValue V = N->getOperand(0); 2673 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2674 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); 2675 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2676 if (Subtarget->isThumb()) { 2677 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; 2678 return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); 2679 } else { 2680 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; 2681 return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7); 2682 } 2683 } 2684 } 2685 break; 2686 case ISD::AND: { 2687 // Check for unsigned bitfield extract 2688 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) 2689 return I; 2690 2691 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2692 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2693 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2694 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2695 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2696 EVT VT = N->getValueType(0); 2697 if (VT != MVT::i32) 2698 break; 2699 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2700 ? ARM::t2MOVTi16 2701 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2702 if (!Opc) 2703 break; 2704 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2705 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2706 if (!N1C) 2707 break; 2708 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2709 SDValue N2 = N0.getOperand(1); 2710 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2711 if (!N2C) 2712 break; 2713 unsigned N1CVal = N1C->getZExtValue(); 2714 unsigned N2CVal = N2C->getZExtValue(); 2715 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2716 (N1CVal & 0xffffU) == 0xffffU && 2717 (N2CVal & 0xffffU) == 0x0U) { 2718 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2719 MVT::i32); 2720 SDValue Ops[] = { N0.getOperand(0), Imm16, 2721 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; 2722 return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4); 2723 } 2724 } 2725 break; 2726 } 2727 case ARMISD::VMOVRRD: 2728 return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, 2729 N->getOperand(0), getAL(CurDAG), 2730 CurDAG->getRegister(0, MVT::i32)); 2731 case ISD::UMUL_LOHI: { 2732 if (Subtarget->isThumb1Only()) 2733 break; 2734 if (Subtarget->isThumb()) { 2735 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2736 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), 2737 CurDAG->getRegister(0, MVT::i32) }; 2738 return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4); 2739 } else { 2740 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2741 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), 2742 CurDAG->getRegister(0, MVT::i32) }; 2743 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2744 ARM::UMULL : ARM::UMULLv5, 2745 dl, MVT::i32, MVT::i32, Ops, 5); 2746 } 2747 } 2748 case ISD::SMUL_LOHI: { 2749 if (Subtarget->isThumb1Only()) 2750 break; 2751 if (Subtarget->isThumb()) { 2752 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2753 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; 2754 return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4); 2755 } else { 2756 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2757 getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), 2758 CurDAG->getRegister(0, MVT::i32) }; 2759 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2760 ARM::SMULL : ARM::SMULLv5, 2761 dl, MVT::i32, MVT::i32, Ops, 5); 2762 } 2763 } 2764 case ARMISD::UMLAL:{ 2765 if (Subtarget->isThumb()) { 2766 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2767 N->getOperand(3), getAL(CurDAG), 2768 CurDAG->getRegister(0, MVT::i32)}; 2769 return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); 2770 }else{ 2771 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2772 N->getOperand(3), getAL(CurDAG), 2773 CurDAG->getRegister(0, MVT::i32), 2774 CurDAG->getRegister(0, MVT::i32) }; 2775 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2776 ARM::UMLAL : ARM::UMLALv5, 2777 dl, MVT::i32, MVT::i32, Ops, 7); 2778 } 2779 } 2780 case ARMISD::SMLAL:{ 2781 if (Subtarget->isThumb()) { 2782 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2783 N->getOperand(3), getAL(CurDAG), 2784 CurDAG->getRegister(0, MVT::i32)}; 2785 return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); 2786 }else{ 2787 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2788 N->getOperand(3), getAL(CurDAG), 2789 CurDAG->getRegister(0, MVT::i32), 2790 CurDAG->getRegister(0, MVT::i32) }; 2791 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? 2792 ARM::SMLAL : ARM::SMLALv5, 2793 dl, MVT::i32, MVT::i32, Ops, 7); 2794 } 2795 } 2796 case ISD::LOAD: { 2797 SDNode *ResNode = 0; 2798 if (Subtarget->isThumb() && Subtarget->hasThumb2()) 2799 ResNode = SelectT2IndexedLoad(N); 2800 else 2801 ResNode = SelectARMIndexedLoad(N); 2802 if (ResNode) 2803 return ResNode; 2804 // Other cases are autogenerated. 2805 break; 2806 } 2807 case ARMISD::BRCOND: { 2808 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2809 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2810 // Pattern complexity = 6 cost = 1 size = 0 2811 2812 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2813 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 2814 // Pattern complexity = 6 cost = 1 size = 0 2815 2816 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 2817 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 2818 // Pattern complexity = 6 cost = 1 size = 0 2819 2820 unsigned Opc = Subtarget->isThumb() ? 2821 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 2822 SDValue Chain = N->getOperand(0); 2823 SDValue N1 = N->getOperand(1); 2824 SDValue N2 = N->getOperand(2); 2825 SDValue N3 = N->getOperand(3); 2826 SDValue InFlag = N->getOperand(4); 2827 assert(N1.getOpcode() == ISD::BasicBlock); 2828 assert(N2.getOpcode() == ISD::Constant); 2829 assert(N3.getOpcode() == ISD::Register); 2830 2831 SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) 2832 cast<ConstantSDNode>(N2)->getZExtValue()), 2833 MVT::i32); 2834 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 2835 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 2836 MVT::Glue, Ops, 5); 2837 Chain = SDValue(ResNode, 0); 2838 if (N->getNumValues() == 2) { 2839 InFlag = SDValue(ResNode, 1); 2840 ReplaceUses(SDValue(N, 1), InFlag); 2841 } 2842 ReplaceUses(SDValue(N, 0), 2843 SDValue(Chain.getNode(), Chain.getResNo())); 2844 return NULL; 2845 } 2846 case ARMISD::CMOV: 2847 return SelectCMOVOp(N); 2848 case ARMISD::VZIP: { 2849 unsigned Opc = 0; 2850 EVT VT = N->getValueType(0); 2851 switch (VT.getSimpleVT().SimpleTy) { 2852 default: return NULL; 2853 case MVT::v8i8: Opc = ARM::VZIPd8; break; 2854 case MVT::v4i16: Opc = ARM::VZIPd16; break; 2855 case MVT::v2f32: 2856 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 2857 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2858 case MVT::v16i8: Opc = ARM::VZIPq8; break; 2859 case MVT::v8i16: Opc = ARM::VZIPq16; break; 2860 case MVT::v4f32: 2861 case MVT::v4i32: Opc = ARM::VZIPq32; break; 2862 } 2863 SDValue Pred = getAL(CurDAG); 2864 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2865 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2866 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); 2867 } 2868 case ARMISD::VUZP: { 2869 unsigned Opc = 0; 2870 EVT VT = N->getValueType(0); 2871 switch (VT.getSimpleVT().SimpleTy) { 2872 default: return NULL; 2873 case MVT::v8i8: Opc = ARM::VUZPd8; break; 2874 case MVT::v4i16: Opc = ARM::VUZPd16; break; 2875 case MVT::v2f32: 2876 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 2877 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2878 case MVT::v16i8: Opc = ARM::VUZPq8; break; 2879 case MVT::v8i16: Opc = ARM::VUZPq16; break; 2880 case MVT::v4f32: 2881 case MVT::v4i32: Opc = ARM::VUZPq32; break; 2882 } 2883 SDValue Pred = getAL(CurDAG); 2884 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2885 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2886 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); 2887 } 2888 case ARMISD::VTRN: { 2889 unsigned Opc = 0; 2890 EVT VT = N->getValueType(0); 2891 switch (VT.getSimpleVT().SimpleTy) { 2892 default: return NULL; 2893 case MVT::v8i8: Opc = ARM::VTRNd8; break; 2894 case MVT::v4i16: Opc = ARM::VTRNd16; break; 2895 case MVT::v2f32: 2896 case MVT::v2i32: Opc = ARM::VTRNd32; break; 2897 case MVT::v16i8: Opc = ARM::VTRNq8; break; 2898 case MVT::v8i16: Opc = ARM::VTRNq16; break; 2899 case MVT::v4f32: 2900 case MVT::v4i32: Opc = ARM::VTRNq32; break; 2901 } 2902 SDValue Pred = getAL(CurDAG); 2903 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2904 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 2905 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); 2906 } 2907 case ARMISD::BUILD_VECTOR: { 2908 EVT VecVT = N->getValueType(0); 2909 EVT EltVT = VecVT.getVectorElementType(); 2910 unsigned NumElts = VecVT.getVectorNumElements(); 2911 if (EltVT == MVT::f64) { 2912 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 2913 return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); 2914 } 2915 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 2916 if (NumElts == 2) 2917 return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); 2918 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 2919 return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 2920 N->getOperand(2), N->getOperand(3)); 2921 } 2922 2923 case ARMISD::VLD2DUP: { 2924 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 2925 ARM::VLD2DUPd32 }; 2926 return SelectVLDDup(N, false, 2, Opcodes); 2927 } 2928 2929 case ARMISD::VLD3DUP: { 2930 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 2931 ARM::VLD3DUPd16Pseudo, 2932 ARM::VLD3DUPd32Pseudo }; 2933 return SelectVLDDup(N, false, 3, Opcodes); 2934 } 2935 2936 case ARMISD::VLD4DUP: { 2937 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 2938 ARM::VLD4DUPd16Pseudo, 2939 ARM::VLD4DUPd32Pseudo }; 2940 return SelectVLDDup(N, false, 4, Opcodes); 2941 } 2942 2943 case ARMISD::VLD2DUP_UPD: { 2944 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 2945 ARM::VLD2DUPd16wb_fixed, 2946 ARM::VLD2DUPd32wb_fixed }; 2947 return SelectVLDDup(N, true, 2, Opcodes); 2948 } 2949 2950 case ARMISD::VLD3DUP_UPD: { 2951 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 2952 ARM::VLD3DUPd16Pseudo_UPD, 2953 ARM::VLD3DUPd32Pseudo_UPD }; 2954 return SelectVLDDup(N, true, 3, Opcodes); 2955 } 2956 2957 case ARMISD::VLD4DUP_UPD: { 2958 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 2959 ARM::VLD4DUPd16Pseudo_UPD, 2960 ARM::VLD4DUPd32Pseudo_UPD }; 2961 return SelectVLDDup(N, true, 4, Opcodes); 2962 } 2963 2964 case ARMISD::VLD1_UPD: { 2965 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 2966 ARM::VLD1d16wb_fixed, 2967 ARM::VLD1d32wb_fixed, 2968 ARM::VLD1d64wb_fixed }; 2969 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 2970 ARM::VLD1q16wb_fixed, 2971 ARM::VLD1q32wb_fixed, 2972 ARM::VLD1q64wb_fixed }; 2973 return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); 2974 } 2975 2976 case ARMISD::VLD2_UPD: { 2977 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 2978 ARM::VLD2d16wb_fixed, 2979 ARM::VLD2d32wb_fixed, 2980 ARM::VLD1q64wb_fixed}; 2981 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 2982 ARM::VLD2q16PseudoWB_fixed, 2983 ARM::VLD2q32PseudoWB_fixed }; 2984 return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); 2985 } 2986 2987 case ARMISD::VLD3_UPD: { 2988 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 2989 ARM::VLD3d16Pseudo_UPD, 2990 ARM::VLD3d32Pseudo_UPD, 2991 ARM::VLD1q64wb_fixed}; 2992 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 2993 ARM::VLD3q16Pseudo_UPD, 2994 ARM::VLD3q32Pseudo_UPD }; 2995 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 2996 ARM::VLD3q16oddPseudo_UPD, 2997 ARM::VLD3q32oddPseudo_UPD }; 2998 return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 2999 } 3000 3001 case ARMISD::VLD4_UPD: { 3002 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3003 ARM::VLD4d16Pseudo_UPD, 3004 ARM::VLD4d32Pseudo_UPD, 3005 ARM::VLD1q64wb_fixed}; 3006 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3007 ARM::VLD4q16Pseudo_UPD, 3008 ARM::VLD4q32Pseudo_UPD }; 3009 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3010 ARM::VLD4q16oddPseudo_UPD, 3011 ARM::VLD4q32oddPseudo_UPD }; 3012 return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3013 } 3014 3015 case ARMISD::VLD2LN_UPD: { 3016 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3017 ARM::VLD2LNd16Pseudo_UPD, 3018 ARM::VLD2LNd32Pseudo_UPD }; 3019 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3020 ARM::VLD2LNq32Pseudo_UPD }; 3021 return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3022 } 3023 3024 case ARMISD::VLD3LN_UPD: { 3025 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3026 ARM::VLD3LNd16Pseudo_UPD, 3027 ARM::VLD3LNd32Pseudo_UPD }; 3028 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3029 ARM::VLD3LNq32Pseudo_UPD }; 3030 return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3031 } 3032 3033 case ARMISD::VLD4LN_UPD: { 3034 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3035 ARM::VLD4LNd16Pseudo_UPD, 3036 ARM::VLD4LNd32Pseudo_UPD }; 3037 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3038 ARM::VLD4LNq32Pseudo_UPD }; 3039 return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3040 } 3041 3042 case ARMISD::VST1_UPD: { 3043 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3044 ARM::VST1d16wb_fixed, 3045 ARM::VST1d32wb_fixed, 3046 ARM::VST1d64wb_fixed }; 3047 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3048 ARM::VST1q16wb_fixed, 3049 ARM::VST1q32wb_fixed, 3050 ARM::VST1q64wb_fixed }; 3051 return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); 3052 } 3053 3054 case ARMISD::VST2_UPD: { 3055 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3056 ARM::VST2d16wb_fixed, 3057 ARM::VST2d32wb_fixed, 3058 ARM::VST1q64wb_fixed}; 3059 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3060 ARM::VST2q16PseudoWB_fixed, 3061 ARM::VST2q32PseudoWB_fixed }; 3062 return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); 3063 } 3064 3065 case ARMISD::VST3_UPD: { 3066 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3067 ARM::VST3d16Pseudo_UPD, 3068 ARM::VST3d32Pseudo_UPD, 3069 ARM::VST1d64TPseudoWB_fixed}; 3070 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3071 ARM::VST3q16Pseudo_UPD, 3072 ARM::VST3q32Pseudo_UPD }; 3073 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3074 ARM::VST3q16oddPseudo_UPD, 3075 ARM::VST3q32oddPseudo_UPD }; 3076 return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3077 } 3078 3079 case ARMISD::VST4_UPD: { 3080 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3081 ARM::VST4d16Pseudo_UPD, 3082 ARM::VST4d32Pseudo_UPD, 3083 ARM::VST1d64QPseudoWB_fixed}; 3084 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3085 ARM::VST4q16Pseudo_UPD, 3086 ARM::VST4q32Pseudo_UPD }; 3087 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3088 ARM::VST4q16oddPseudo_UPD, 3089 ARM::VST4q32oddPseudo_UPD }; 3090 return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3091 } 3092 3093 case ARMISD::VST2LN_UPD: { 3094 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3095 ARM::VST2LNd16Pseudo_UPD, 3096 ARM::VST2LNd32Pseudo_UPD }; 3097 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3098 ARM::VST2LNq32Pseudo_UPD }; 3099 return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3100 } 3101 3102 case ARMISD::VST3LN_UPD: { 3103 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3104 ARM::VST3LNd16Pseudo_UPD, 3105 ARM::VST3LNd32Pseudo_UPD }; 3106 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3107 ARM::VST3LNq32Pseudo_UPD }; 3108 return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3109 } 3110 3111 case ARMISD::VST4LN_UPD: { 3112 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3113 ARM::VST4LNd16Pseudo_UPD, 3114 ARM::VST4LNd32Pseudo_UPD }; 3115 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3116 ARM::VST4LNq32Pseudo_UPD }; 3117 return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3118 } 3119 3120 case ISD::INTRINSIC_VOID: 3121 case ISD::INTRINSIC_W_CHAIN: { 3122 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3123 switch (IntNo) { 3124 default: 3125 break; 3126 3127 case Intrinsic::arm_ldrexd: { 3128 SDValue MemAddr = N->getOperand(2); 3129 DebugLoc dl = N->getDebugLoc(); 3130 SDValue Chain = N->getOperand(0); 3131 3132 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3133 unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD; 3134 3135 // arm_ldrexd returns a i64 value in {i32, i32} 3136 std::vector<EVT> ResTys; 3137 if (isThumb) { 3138 ResTys.push_back(MVT::i32); 3139 ResTys.push_back(MVT::i32); 3140 } else 3141 ResTys.push_back(MVT::Untyped); 3142 ResTys.push_back(MVT::Other); 3143 3144 // Place arguments in the right order. 3145 SmallVector<SDValue, 7> Ops; 3146 Ops.push_back(MemAddr); 3147 Ops.push_back(getAL(CurDAG)); 3148 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3149 Ops.push_back(Chain); 3150 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), 3151 Ops.size()); 3152 // Transfer memoperands. 3153 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3154 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3155 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 3156 3157 // Remap uses. 3158 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3159 if (!SDValue(N, 0).use_empty()) { 3160 SDValue Result; 3161 if (isThumb) 3162 Result = SDValue(Ld, 0); 3163 else { 3164 SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); 3165 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3166 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3167 Result = SDValue(ResNode,0); 3168 } 3169 ReplaceUses(SDValue(N, 0), Result); 3170 } 3171 if (!SDValue(N, 1).use_empty()) { 3172 SDValue Result; 3173 if (isThumb) 3174 Result = SDValue(Ld, 1); 3175 else { 3176 SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); 3177 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3178 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3179 Result = SDValue(ResNode,0); 3180 } 3181 ReplaceUses(SDValue(N, 1), Result); 3182 } 3183 ReplaceUses(SDValue(N, 2), OutChain); 3184 return NULL; 3185 } 3186 3187 case Intrinsic::arm_strexd: { 3188 DebugLoc dl = N->getDebugLoc(); 3189 SDValue Chain = N->getOperand(0); 3190 SDValue Val0 = N->getOperand(2); 3191 SDValue Val1 = N->getOperand(3); 3192 SDValue MemAddr = N->getOperand(4); 3193 3194 // Store exclusive double return a i32 value which is the return status 3195 // of the issued store. 3196 EVT ResTys[] = { MVT::i32, MVT::Other }; 3197 3198 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3199 // Place arguments in the right order. 3200 SmallVector<SDValue, 7> Ops; 3201 if (isThumb) { 3202 Ops.push_back(Val0); 3203 Ops.push_back(Val1); 3204 } else 3205 // arm_strexd uses GPRPair. 3206 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3207 Ops.push_back(MemAddr); 3208 Ops.push_back(getAL(CurDAG)); 3209 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3210 Ops.push_back(Chain); 3211 3212 unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; 3213 3214 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), 3215 Ops.size()); 3216 // Transfer memoperands. 3217 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3218 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3219 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 3220 3221 return St; 3222 } 3223 3224 case Intrinsic::arm_neon_vld1: { 3225 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3226 ARM::VLD1d32, ARM::VLD1d64 }; 3227 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3228 ARM::VLD1q32, ARM::VLD1q64}; 3229 return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); 3230 } 3231 3232 case Intrinsic::arm_neon_vld2: { 3233 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3234 ARM::VLD2d32, ARM::VLD1q64 }; 3235 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3236 ARM::VLD2q32Pseudo }; 3237 return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); 3238 } 3239 3240 case Intrinsic::arm_neon_vld3: { 3241 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3242 ARM::VLD3d16Pseudo, 3243 ARM::VLD3d32Pseudo, 3244 ARM::VLD1d64TPseudo }; 3245 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3246 ARM::VLD3q16Pseudo_UPD, 3247 ARM::VLD3q32Pseudo_UPD }; 3248 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3249 ARM::VLD3q16oddPseudo, 3250 ARM::VLD3q32oddPseudo }; 3251 return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3252 } 3253 3254 case Intrinsic::arm_neon_vld4: { 3255 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3256 ARM::VLD4d16Pseudo, 3257 ARM::VLD4d32Pseudo, 3258 ARM::VLD1d64QPseudo }; 3259 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3260 ARM::VLD4q16Pseudo_UPD, 3261 ARM::VLD4q32Pseudo_UPD }; 3262 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3263 ARM::VLD4q16oddPseudo, 3264 ARM::VLD4q32oddPseudo }; 3265 return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3266 } 3267 3268 case Intrinsic::arm_neon_vld2lane: { 3269 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3270 ARM::VLD2LNd16Pseudo, 3271 ARM::VLD2LNd32Pseudo }; 3272 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3273 ARM::VLD2LNq32Pseudo }; 3274 return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3275 } 3276 3277 case Intrinsic::arm_neon_vld3lane: { 3278 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3279 ARM::VLD3LNd16Pseudo, 3280 ARM::VLD3LNd32Pseudo }; 3281 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3282 ARM::VLD3LNq32Pseudo }; 3283 return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3284 } 3285 3286 case Intrinsic::arm_neon_vld4lane: { 3287 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3288 ARM::VLD4LNd16Pseudo, 3289 ARM::VLD4LNd32Pseudo }; 3290 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3291 ARM::VLD4LNq32Pseudo }; 3292 return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3293 } 3294 3295 case Intrinsic::arm_neon_vst1: { 3296 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3297 ARM::VST1d32, ARM::VST1d64 }; 3298 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3299 ARM::VST1q32, ARM::VST1q64 }; 3300 return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); 3301 } 3302 3303 case Intrinsic::arm_neon_vst2: { 3304 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3305 ARM::VST2d32, ARM::VST1q64 }; 3306 static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3307 ARM::VST2q32Pseudo }; 3308 return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); 3309 } 3310 3311 case Intrinsic::arm_neon_vst3: { 3312 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3313 ARM::VST3d16Pseudo, 3314 ARM::VST3d32Pseudo, 3315 ARM::VST1d64TPseudo }; 3316 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3317 ARM::VST3q16Pseudo_UPD, 3318 ARM::VST3q32Pseudo_UPD }; 3319 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3320 ARM::VST3q16oddPseudo, 3321 ARM::VST3q32oddPseudo }; 3322 return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3323 } 3324 3325 case Intrinsic::arm_neon_vst4: { 3326 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3327 ARM::VST4d16Pseudo, 3328 ARM::VST4d32Pseudo, 3329 ARM::VST1d64QPseudo }; 3330 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3331 ARM::VST4q16Pseudo_UPD, 3332 ARM::VST4q32Pseudo_UPD }; 3333 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3334 ARM::VST4q16oddPseudo, 3335 ARM::VST4q32oddPseudo }; 3336 return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3337 } 3338 3339 case Intrinsic::arm_neon_vst2lane: { 3340 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3341 ARM::VST2LNd16Pseudo, 3342 ARM::VST2LNd32Pseudo }; 3343 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3344 ARM::VST2LNq32Pseudo }; 3345 return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3346 } 3347 3348 case Intrinsic::arm_neon_vst3lane: { 3349 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3350 ARM::VST3LNd16Pseudo, 3351 ARM::VST3LNd32Pseudo }; 3352 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3353 ARM::VST3LNq32Pseudo }; 3354 return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3355 } 3356 3357 case Intrinsic::arm_neon_vst4lane: { 3358 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3359 ARM::VST4LNd16Pseudo, 3360 ARM::VST4LNd32Pseudo }; 3361 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3362 ARM::VST4LNq32Pseudo }; 3363 return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3364 } 3365 } 3366 break; 3367 } 3368 3369 case ISD::INTRINSIC_WO_CHAIN: { 3370 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3371 switch (IntNo) { 3372 default: 3373 break; 3374 3375 case Intrinsic::arm_neon_vtbl2: 3376 return SelectVTBL(N, false, 2, ARM::VTBL2); 3377 case Intrinsic::arm_neon_vtbl3: 3378 return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); 3379 case Intrinsic::arm_neon_vtbl4: 3380 return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); 3381 3382 case Intrinsic::arm_neon_vtbx2: 3383 return SelectVTBL(N, true, 2, ARM::VTBX2); 3384 case Intrinsic::arm_neon_vtbx3: 3385 return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); 3386 case Intrinsic::arm_neon_vtbx4: 3387 return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); 3388 } 3389 break; 3390 } 3391 3392 case ARMISD::VTBL1: { 3393 DebugLoc dl = N->getDebugLoc(); 3394 EVT VT = N->getValueType(0); 3395 SmallVector<SDValue, 6> Ops; 3396 3397 Ops.push_back(N->getOperand(0)); 3398 Ops.push_back(N->getOperand(1)); 3399 Ops.push_back(getAL(CurDAG)); // Predicate 3400 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register 3401 return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size()); 3402 } 3403 case ARMISD::VTBL2: { 3404 DebugLoc dl = N->getDebugLoc(); 3405 EVT VT = N->getValueType(0); 3406 3407 // Form a REG_SEQUENCE to force register allocation. 3408 SDValue V0 = N->getOperand(0); 3409 SDValue V1 = N->getOperand(1); 3410 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 3411 3412 SmallVector<SDValue, 6> Ops; 3413 Ops.push_back(RegSeq); 3414 Ops.push_back(N->getOperand(2)); 3415 Ops.push_back(getAL(CurDAG)); // Predicate 3416 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register 3417 return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, 3418 Ops.data(), Ops.size()); 3419 } 3420 3421 case ISD::CONCAT_VECTORS: 3422 return SelectConcatVector(N); 3423 3424 case ARMISD::ATOMOR64_DAG: 3425 return SelectAtomic64(N, ARM::ATOMOR6432); 3426 case ARMISD::ATOMXOR64_DAG: 3427 return SelectAtomic64(N, ARM::ATOMXOR6432); 3428 case ARMISD::ATOMADD64_DAG: 3429 return SelectAtomic64(N, ARM::ATOMADD6432); 3430 case ARMISD::ATOMSUB64_DAG: 3431 return SelectAtomic64(N, ARM::ATOMSUB6432); 3432 case ARMISD::ATOMNAND64_DAG: 3433 return SelectAtomic64(N, ARM::ATOMNAND6432); 3434 case ARMISD::ATOMAND64_DAG: 3435 return SelectAtomic64(N, ARM::ATOMAND6432); 3436 case ARMISD::ATOMSWAP64_DAG: 3437 return SelectAtomic64(N, ARM::ATOMSWAP6432); 3438 case ARMISD::ATOMCMPXCHG64_DAG: 3439 return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); 3440 3441 case ARMISD::ATOMMIN64_DAG: 3442 return SelectAtomic64(N, ARM::ATOMMIN6432); 3443 case ARMISD::ATOMUMIN64_DAG: 3444 return SelectAtomic64(N, ARM::ATOMUMIN6432); 3445 case ARMISD::ATOMMAX64_DAG: 3446 return SelectAtomic64(N, ARM::ATOMMAX6432); 3447 case ARMISD::ATOMUMAX64_DAG: 3448 return SelectAtomic64(N, ARM::ATOMUMAX6432); 3449 } 3450 3451 return SelectCode(N); 3452} 3453 3454SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ 3455 std::vector<SDValue> AsmNodeOperands; 3456 unsigned Flag, Kind; 3457 bool Changed = false; 3458 unsigned NumOps = N->getNumOperands(); 3459 3460 ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>( 3461 N->getOperand(InlineAsm::Op_AsmString)); 3462 StringRef AsmString = StringRef(S->getSymbol()); 3463 3464 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 3465 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 3466 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 3467 // respectively. Since there is no constraint to explicitly specify a 3468 // reg pair, we search %H operand inside the asm string. If it is found, the 3469 // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. 3470 if (AsmString.find(":H}") == StringRef::npos) 3471 return NULL; 3472 3473 DebugLoc dl = N->getDebugLoc(); 3474 SDValue Glue = N->getOperand(NumOps-1); 3475 3476 // Glue node will be appended late. 3477 for(unsigned i = 0; i < NumOps -1; ++i) { 3478 SDValue op = N->getOperand(i); 3479 AsmNodeOperands.push_back(op); 3480 3481 if (i < InlineAsm::Op_FirstOperand) 3482 continue; 3483 3484 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 3485 Flag = C->getZExtValue(); 3486 Kind = InlineAsm::getKind(Flag); 3487 } 3488 else 3489 continue; 3490 3491 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 3492 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 3493 continue; 3494 3495 unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); 3496 unsigned RC; 3497 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 3498 if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) 3499 continue; 3500 3501 assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); 3502 SDValue V0 = N->getOperand(i+1); 3503 SDValue V1 = N->getOperand(i+2); 3504 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 3505 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 3506 SDValue PairedReg; 3507 MachineRegisterInfo &MRI = MF->getRegInfo(); 3508 3509 if (Kind == InlineAsm::Kind_RegDef || 3510 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 3511 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 3512 // the original GPRs. 3513 3514 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 3515 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 3516 SDValue Chain = SDValue(N,0); 3517 3518 SDNode *GU = N->getGluedUser(); 3519 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 3520 Chain.getValue(1)); 3521 3522 // Extract values from a GPRPair reg and copy to the original GPR reg. 3523 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3524 RegCopy); 3525 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3526 RegCopy); 3527 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 3528 RegCopy.getValue(1)); 3529 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 3530 3531 // Update the original glue user. 3532 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 3533 Ops.push_back(T1.getValue(1)); 3534 CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); 3535 GU = T1.getNode(); 3536 } 3537 else { 3538 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 3539 // GPRPair and then pass the GPRPair to the inline asm. 3540 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 3541 3542 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 3543 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 3544 Chain.getValue(1)); 3545 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 3546 T0.getValue(1)); 3547 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 3548 3549 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 3550 // i32 VRs of inline asm with it. 3551 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 3552 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 3553 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 3554 3555 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 3556 Glue = Chain.getValue(1); 3557 } 3558 3559 Changed = true; 3560 3561 if(PairedReg.getNode()) { 3562 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 3563 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 3564 // Replace the current flag. 3565 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 3566 Flag, MVT::i32); 3567 // Add the new register node and skip the original two GPRs. 3568 AsmNodeOperands.push_back(PairedReg); 3569 // Skip the next two GPRs. 3570 i += 2; 3571 } 3572 } 3573 3574 AsmNodeOperands.push_back(Glue); 3575 if (!Changed) 3576 return NULL; 3577 3578 SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), 3579 CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], 3580 AsmNodeOperands.size()); 3581 New->setNodeId(-1); 3582 return New.getNode(); 3583} 3584 3585 3586bool ARMDAGToDAGISel:: 3587SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 3588 std::vector<SDValue> &OutOps) { 3589 assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); 3590 // Require the address to be in a register. That is safe for all ARM 3591 // variants and it is hard to do anything much smarter without knowing 3592 // how the operand is used. 3593 OutOps.push_back(Op); 3594 return false; 3595} 3596 3597/// createARMISelDag - This pass converts a legalized DAG into a 3598/// ARM-specific DAG, ready for instruction scheduling. 3599/// 3600FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 3601 CodeGenOpt::Level OptLevel) { 3602 return new ARMDAGToDAGISel(TM, OptLevel); 3603} 3604