1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines an instruction selector for the ARM target. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ARM.h" 15#include "ARMBaseInstrInfo.h" 16#include "ARMTargetMachine.h" 17#include "MCTargetDesc/ARMAddressingModes.h" 18#include "llvm/ADT/StringSwitch.h" 19#include "llvm/CodeGen/MachineFrameInfo.h" 20#include "llvm/CodeGen/MachineFunction.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/CodeGen/SelectionDAG.h" 24#include "llvm/CodeGen/SelectionDAGISel.h" 25#include "llvm/IR/CallingConv.h" 26#include "llvm/IR/Constants.h" 27#include "llvm/IR/DerivedTypes.h" 28#include "llvm/IR/Function.h" 29#include "llvm/IR/Intrinsics.h" 30#include "llvm/IR/LLVMContext.h" 31#include "llvm/Support/CommandLine.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Target/TargetLowering.h" 35#include "llvm/Target/TargetOptions.h" 36 37using namespace llvm; 38 39#define DEBUG_TYPE "arm-isel" 40 41static cl::opt<bool> 42DisableShifterOp("disable-shifter-op", cl::Hidden, 43 cl::desc("Disable isel of shifter-op"), 44 cl::init(false)); 45 46//===--------------------------------------------------------------------===// 47/// ARMDAGToDAGISel - ARM specific code to select ARM machine 48/// instructions for SelectionDAG operations. 49/// 50namespace { 51 52enum AddrMode2Type { 53 AM2_BASE, // Simple AM2 (+-imm12) 54 AM2_SHOP // Shifter-op AM2 55}; 56 57class ARMDAGToDAGISel : public SelectionDAGISel { 58 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 59 /// make the right decision when generating code for different targets. 60 const ARMSubtarget *Subtarget; 61 62public: 63 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 64 : SelectionDAGISel(tm, OptLevel) {} 65 66 bool runOnMachineFunction(MachineFunction &MF) override { 67 // Reset the subtarget each time through. 68 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 69 SelectionDAGISel::runOnMachineFunction(MF); 70 return true; 71 } 72 73 const char *getPassName() const override { 74 return "ARM Instruction Selection"; 75 } 76 77 void PreprocessISelDAG() override; 78 79 /// getI32Imm - Return a target constant of type i32 with the specified 80 /// value. 81 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 82 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 83 } 84 85 void Select(SDNode *N) override; 86 87 bool hasNoVMLxHazardUse(SDNode *N) const; 88 bool isShifterOpProfitable(const SDValue &Shift, 89 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 90 bool SelectRegShifterOperand(SDValue N, SDValue &A, 91 SDValue &B, SDValue &C, 92 bool CheckProfitability = true); 93 bool SelectImmShifterOperand(SDValue N, SDValue &A, 94 SDValue &B, bool CheckProfitability = true); 95 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, 96 SDValue &B, SDValue &C) { 97 // Don't apply the profitability check 98 return SelectRegShifterOperand(N, A, B, C, false); 99 } 100 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, 101 SDValue &B) { 102 // Don't apply the profitability check 103 return SelectImmShifterOperand(N, A, B, false); 104 } 105 106 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 107 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 108 109 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, 110 SDValue &Offset, SDValue &Opc); 111 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, 112 SDValue &Opc) { 113 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; 114 } 115 116 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, 117 SDValue &Opc) { 118 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; 119 } 120 121 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, 122 SDValue &Opc) { 123 SelectAddrMode2Worker(N, Base, Offset, Opc); 124// return SelectAddrMode2ShOp(N, Base, Offset, Opc); 125 // This always matches one way or another. 126 return true; 127 } 128 129 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 130 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 131 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 132 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 133 return true; 134 } 135 136 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 137 SDValue &Offset, SDValue &Opc); 138 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 139 SDValue &Offset, SDValue &Opc); 140 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 141 SDValue &Offset, SDValue &Opc); 142 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 143 bool SelectAddrMode3(SDValue N, SDValue &Base, 144 SDValue &Offset, SDValue &Opc); 145 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 146 SDValue &Offset, SDValue &Opc); 147 bool SelectAddrMode5(SDValue N, SDValue &Base, 148 SDValue &Offset); 149 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 150 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 151 152 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 153 154 // Thumb Addressing Modes: 155 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 156 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 157 SDValue &OffImm); 158 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 159 SDValue &OffImm); 160 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 161 SDValue &OffImm); 162 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 163 SDValue &OffImm); 164 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 165 166 // Thumb 2 Addressing Modes: 167 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 168 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 169 SDValue &OffImm); 170 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 171 SDValue &OffImm); 172 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 173 SDValue &OffReg, SDValue &ShImm); 174 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 175 176 inline bool is_so_imm(unsigned Imm) const { 177 return ARM_AM::getSOImmVal(Imm) != -1; 178 } 179 180 inline bool is_so_imm_not(unsigned Imm) const { 181 return ARM_AM::getSOImmVal(~Imm) != -1; 182 } 183 184 inline bool is_t2_so_imm(unsigned Imm) const { 185 return ARM_AM::getT2SOImmVal(Imm) != -1; 186 } 187 188 inline bool is_t2_so_imm_not(unsigned Imm) const { 189 return ARM_AM::getT2SOImmVal(~Imm) != -1; 190 } 191 192 // Include the pieces autogenerated from the target description. 193#include "ARMGenDAGISel.inc" 194 195private: 196 /// Indexed (pre/post inc/dec) load matching code for ARM. 197 bool tryARMIndexedLoad(SDNode *N); 198 bool tryT2IndexedLoad(SDNode *N); 199 200 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 201 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 202 /// loads of D registers and even subregs and odd subregs of Q registers. 203 /// For NumVecs <= 2, QOpcodes1 is not used. 204 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 205 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 206 const uint16_t *QOpcodes1); 207 208 /// SelectVST - Select NEON store intrinsics. NumVecs should 209 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 210 /// stores of D registers and even subregs and odd subregs of Q registers. 211 /// For NumVecs <= 2, QOpcodes1 is not used. 212 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 213 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 214 const uint16_t *QOpcodes1); 215 216 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 217 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 218 /// load/store of D registers and Q registers. 219 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 220 unsigned NumVecs, const uint16_t *DOpcodes, 221 const uint16_t *QOpcodes); 222 223 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 224 /// should be 2, 3 or 4. The opcode array specifies the instructions used 225 /// for loading D registers. (Q registers are not supported.) 226 void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 227 const uint16_t *Opcodes); 228 229 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, 230 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be 231 /// generated to force the table registers to be consecutive. 232 void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); 233 234 /// Try to select SBFX/UBFX instructions for ARM. 235 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 236 237 // Select special operations if node forms integer ABS pattern 238 bool tryABSOp(SDNode *N); 239 240 bool tryReadRegister(SDNode *N); 241 bool tryWriteRegister(SDNode *N); 242 243 bool tryInlineAsm(SDNode *N); 244 245 void SelectConcatVector(SDNode *N); 246 247 bool trySMLAWSMULW(SDNode *N); 248 249 void SelectCMP_SWAP(SDNode *N); 250 251 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 252 /// inline asm expressions. 253 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 254 std::vector<SDValue> &OutOps) override; 255 256 // Form pairs of consecutive R, S, D, or Q registers. 257 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 258 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 259 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 260 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 261 262 // Form sequences of 4 consecutive S, D, or Q registers. 263 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 264 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 265 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 266 267 // Get the alignment operand for a NEON VLD or VST instruction. 268 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 269 bool is64BitVector); 270 271 /// Returns the number of instructions required to materialize the given 272 /// constant in a register, or 3 if a literal pool load is needed. 273 unsigned ConstantMaterializationCost(unsigned Val) const; 274 275 /// Checks if N is a multiplication by a constant where we can extract out a 276 /// power of two from the constant so that it can be used in a shift, but only 277 /// if it simplifies the materialization of the constant. Returns true if it 278 /// is, and assigns to PowerOfTwo the power of two that should be extracted 279 /// out and to NewMulConst the new constant to be multiplied by. 280 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 281 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 282 283 /// Replace N with M in CurDAG, in a way that also ensures that M gets 284 /// selected when N would have been selected. 285 void replaceDAGValue(const SDValue &N, SDValue M); 286}; 287} 288 289/// isInt32Immediate - This method tests to see if the node is a 32-bit constant 290/// operand. If so Imm will receive the 32-bit value. 291static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 292 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 293 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 294 return true; 295 } 296 return false; 297} 298 299// isInt32Immediate - This method tests to see if a constant operand. 300// If so Imm will receive the 32 bit value. 301static bool isInt32Immediate(SDValue N, unsigned &Imm) { 302 return isInt32Immediate(N.getNode(), Imm); 303} 304 305// isOpcWithIntImmediate - This method tests to see if the node is a specific 306// opcode and that it has a immediate integer right operand. 307// If so Imm will receive the 32 bit value. 308static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 309 return N->getOpcode() == Opc && 310 isInt32Immediate(N->getOperand(1).getNode(), Imm); 311} 312 313/// \brief Check whether a particular node is a constant value representable as 314/// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 315/// 316/// \param ScaledConstant [out] - On success, the pre-scaled constant value. 317static bool isScaledConstantInRange(SDValue Node, int Scale, 318 int RangeMin, int RangeMax, 319 int &ScaledConstant) { 320 assert(Scale > 0 && "Invalid scale!"); 321 322 // Check that this is a constant. 323 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 324 if (!C) 325 return false; 326 327 ScaledConstant = (int) C->getZExtValue(); 328 if ((ScaledConstant % Scale) != 0) 329 return false; 330 331 ScaledConstant /= Scale; 332 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 333} 334 335void ARMDAGToDAGISel::PreprocessISelDAG() { 336 if (!Subtarget->hasV6T2Ops()) 337 return; 338 339 bool isThumb2 = Subtarget->isThumb(); 340 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 341 E = CurDAG->allnodes_end(); I != E; ) { 342 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 343 344 if (N->getOpcode() != ISD::ADD) 345 continue; 346 347 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 348 // leading zeros, followed by consecutive set bits, followed by 1 or 2 349 // trailing zeros, e.g. 1020. 350 // Transform the expression to 351 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 352 // of trailing zeros of c2. The left shift would be folded as an shifter 353 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 354 // node (UBFX). 355 356 SDValue N0 = N->getOperand(0); 357 SDValue N1 = N->getOperand(1); 358 unsigned And_imm = 0; 359 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 360 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 361 std::swap(N0, N1); 362 } 363 if (!And_imm) 364 continue; 365 366 // Check if the AND mask is an immediate of the form: 000.....1111111100 367 unsigned TZ = countTrailingZeros(And_imm); 368 if (TZ != 1 && TZ != 2) 369 // Be conservative here. Shifter operands aren't always free. e.g. On 370 // Swift, left shifter operand of 1 / 2 for free but others are not. 371 // e.g. 372 // ubfx r3, r1, #16, #8 373 // ldr.w r3, [r0, r3, lsl #2] 374 // vs. 375 // mov.w r9, #1020 376 // and.w r2, r9, r1, lsr #14 377 // ldr r2, [r0, r2] 378 continue; 379 And_imm >>= TZ; 380 if (And_imm & (And_imm + 1)) 381 continue; 382 383 // Look for (and (srl X, c1), c2). 384 SDValue Srl = N1.getOperand(0); 385 unsigned Srl_imm = 0; 386 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 387 (Srl_imm <= 2)) 388 continue; 389 390 // Make sure first operand is not a shifter operand which would prevent 391 // folding of the left shift. 392 SDValue CPTmp0; 393 SDValue CPTmp1; 394 SDValue CPTmp2; 395 if (isThumb2) { 396 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 397 continue; 398 } else { 399 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 400 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 401 continue; 402 } 403 404 // Now make the transformation. 405 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 406 Srl.getOperand(0), 407 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 408 MVT::i32)); 409 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 410 Srl, 411 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 412 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 413 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 414 CurDAG->UpdateNodeOperands(N, N0, N1); 415 } 416} 417 418/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 419/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 420/// least on current ARM implementations) which should be avoidded. 421bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 422 if (OptLevel == CodeGenOpt::None) 423 return true; 424 425 if (!Subtarget->hasVMLxHazards()) 426 return true; 427 428 if (!N->hasOneUse()) 429 return false; 430 431 SDNode *Use = *N->use_begin(); 432 if (Use->getOpcode() == ISD::CopyToReg) 433 return true; 434 if (Use->isMachineOpcode()) { 435 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 436 CurDAG->getSubtarget().getInstrInfo()); 437 438 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 439 if (MCID.mayStore()) 440 return true; 441 unsigned Opcode = MCID.getOpcode(); 442 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 443 return true; 444 // vmlx feeding into another vmlx. We actually want to unfold 445 // the use later in the MLxExpansion pass. e.g. 446 // vmla 447 // vmla (stall 8 cycles) 448 // 449 // vmul (5 cycles) 450 // vadd (5 cycles) 451 // vmla 452 // This adds up to about 18 - 19 cycles. 453 // 454 // vmla 455 // vmul (stall 4 cycles) 456 // vadd adds up to about 14 cycles. 457 return TII->isFpMLxInstruction(Opcode); 458 } 459 460 return false; 461} 462 463bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 464 ARM_AM::ShiftOpc ShOpcVal, 465 unsigned ShAmt) { 466 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 467 return true; 468 if (Shift.hasOneUse()) 469 return true; 470 // R << 2 is free. 471 return ShOpcVal == ARM_AM::lsl && 472 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 473} 474 475unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { 476 if (Subtarget->isThumb()) { 477 if (Val <= 255) return 1; // MOV 478 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 479 if (Val <= 510) return 2; // MOV + ADDi8 480 if (~Val <= 255) return 2; // MOV + MVN 481 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL 482 } else { 483 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV 484 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN 485 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW 486 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs 487 } 488 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT 489 return 3; // Literal pool load 490} 491 492bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 493 unsigned MaxShift, 494 unsigned &PowerOfTwo, 495 SDValue &NewMulConst) const { 496 assert(N.getOpcode() == ISD::MUL); 497 assert(MaxShift > 0); 498 499 // If the multiply is used in more than one place then changing the constant 500 // will make other uses incorrect, so don't. 501 if (!N.hasOneUse()) return false; 502 // Check if the multiply is by a constant 503 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 504 if (!MulConst) return false; 505 // If the constant is used in more than one place then modifying it will mean 506 // we need to materialize two constants instead of one, which is a bad idea. 507 if (!MulConst->hasOneUse()) return false; 508 unsigned MulConstVal = MulConst->getZExtValue(); 509 if (MulConstVal == 0) return false; 510 511 // Find the largest power of 2 that MulConstVal is a multiple of 512 PowerOfTwo = MaxShift; 513 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 514 --PowerOfTwo; 515 if (PowerOfTwo == 0) return false; 516 } 517 518 // Only optimise if the new cost is better 519 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 520 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 521 unsigned OldCost = ConstantMaterializationCost(MulConstVal); 522 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); 523 return NewCost < OldCost; 524} 525 526void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 527 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 528 CurDAG->ReplaceAllUsesWith(N, M); 529} 530 531bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 532 SDValue &BaseReg, 533 SDValue &Opc, 534 bool CheckProfitability) { 535 if (DisableShifterOp) 536 return false; 537 538 // If N is a multiply-by-constant and it's profitable to extract a shift and 539 // use it in a shifted operand do so. 540 if (N.getOpcode() == ISD::MUL) { 541 unsigned PowerOfTwo = 0; 542 SDValue NewMulConst; 543 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 544 HandleSDNode Handle(N); 545 replaceDAGValue(N.getOperand(1), NewMulConst); 546 BaseReg = Handle.getValue(); 547 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, 548 PowerOfTwo), 549 SDLoc(N), MVT::i32); 550 return true; 551 } 552 } 553 554 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 555 556 // Don't match base register only case. That is matched to a separate 557 // lower complexity pattern with explicit register operand. 558 if (ShOpcVal == ARM_AM::no_shift) return false; 559 560 BaseReg = N.getOperand(0); 561 unsigned ShImmVal = 0; 562 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 563 if (!RHS) return false; 564 ShImmVal = RHS->getZExtValue() & 31; 565 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 566 SDLoc(N), MVT::i32); 567 return true; 568} 569 570bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 571 SDValue &BaseReg, 572 SDValue &ShReg, 573 SDValue &Opc, 574 bool CheckProfitability) { 575 if (DisableShifterOp) 576 return false; 577 578 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 579 580 // Don't match base register only case. That is matched to a separate 581 // lower complexity pattern with explicit register operand. 582 if (ShOpcVal == ARM_AM::no_shift) return false; 583 584 BaseReg = N.getOperand(0); 585 unsigned ShImmVal = 0; 586 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 587 if (RHS) return false; 588 589 ShReg = N.getOperand(1); 590 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 591 return false; 592 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 593 SDLoc(N), MVT::i32); 594 return true; 595} 596 597 598bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 599 SDValue &Base, 600 SDValue &OffImm) { 601 // Match simple R + imm12 operands. 602 603 // Base only. 604 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 605 !CurDAG->isBaseWithConstantOffset(N)) { 606 if (N.getOpcode() == ISD::FrameIndex) { 607 // Match frame index. 608 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 609 Base = CurDAG->getTargetFrameIndex( 610 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 611 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 612 return true; 613 } 614 615 if (N.getOpcode() == ARMISD::Wrapper && 616 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 617 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 618 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 619 Base = N.getOperand(0); 620 } else 621 Base = N; 622 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 623 return true; 624 } 625 626 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 627 int RHSC = (int)RHS->getSExtValue(); 628 if (N.getOpcode() == ISD::SUB) 629 RHSC = -RHSC; 630 631 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 632 Base = N.getOperand(0); 633 if (Base.getOpcode() == ISD::FrameIndex) { 634 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 635 Base = CurDAG->getTargetFrameIndex( 636 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 637 } 638 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 639 return true; 640 } 641 } 642 643 // Base only. 644 Base = N; 645 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 646 return true; 647} 648 649 650 651bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 652 SDValue &Opc) { 653 if (N.getOpcode() == ISD::MUL && 654 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 655 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 656 // X * [3,5,9] -> X + X * [2,4,8] etc. 657 int RHSC = (int)RHS->getZExtValue(); 658 if (RHSC & 1) { 659 RHSC = RHSC & ~1; 660 ARM_AM::AddrOpc AddSub = ARM_AM::add; 661 if (RHSC < 0) { 662 AddSub = ARM_AM::sub; 663 RHSC = - RHSC; 664 } 665 if (isPowerOf2_32(RHSC)) { 666 unsigned ShAmt = Log2_32(RHSC); 667 Base = Offset = N.getOperand(0); 668 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 669 ARM_AM::lsl), 670 SDLoc(N), MVT::i32); 671 return true; 672 } 673 } 674 } 675 } 676 677 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 678 // ISD::OR that is equivalent to an ISD::ADD. 679 !CurDAG->isBaseWithConstantOffset(N)) 680 return false; 681 682 // Leave simple R +/- imm12 operands for LDRi12 683 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 684 int RHSC; 685 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 686 -0x1000+1, 0x1000, RHSC)) // 12 bits. 687 return false; 688 } 689 690 // Otherwise this is R +/- [possibly shifted] R. 691 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 692 ARM_AM::ShiftOpc ShOpcVal = 693 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 694 unsigned ShAmt = 0; 695 696 Base = N.getOperand(0); 697 Offset = N.getOperand(1); 698 699 if (ShOpcVal != ARM_AM::no_shift) { 700 // Check to see if the RHS of the shift is a constant, if not, we can't fold 701 // it. 702 if (ConstantSDNode *Sh = 703 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 704 ShAmt = Sh->getZExtValue(); 705 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 706 Offset = N.getOperand(1).getOperand(0); 707 else { 708 ShAmt = 0; 709 ShOpcVal = ARM_AM::no_shift; 710 } 711 } else { 712 ShOpcVal = ARM_AM::no_shift; 713 } 714 } 715 716 // Try matching (R shl C) + (R). 717 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 718 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 719 N.getOperand(0).hasOneUse())) { 720 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 721 if (ShOpcVal != ARM_AM::no_shift) { 722 // Check to see if the RHS of the shift is a constant, if not, we can't 723 // fold it. 724 if (ConstantSDNode *Sh = 725 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 726 ShAmt = Sh->getZExtValue(); 727 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 728 Offset = N.getOperand(0).getOperand(0); 729 Base = N.getOperand(1); 730 } else { 731 ShAmt = 0; 732 ShOpcVal = ARM_AM::no_shift; 733 } 734 } else { 735 ShOpcVal = ARM_AM::no_shift; 736 } 737 } 738 } 739 740 // If Offset is a multiply-by-constant and it's profitable to extract a shift 741 // and use it in a shifted operand do so. 742 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 743 unsigned PowerOfTwo = 0; 744 SDValue NewMulConst; 745 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 746 replaceDAGValue(Offset.getOperand(1), NewMulConst); 747 ShAmt = PowerOfTwo; 748 ShOpcVal = ARM_AM::lsl; 749 } 750 } 751 752 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 753 SDLoc(N), MVT::i32); 754 return true; 755} 756 757 758//----- 759 760AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, 761 SDValue &Base, 762 SDValue &Offset, 763 SDValue &Opc) { 764 if (N.getOpcode() == ISD::MUL && 765 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { 766 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 767 // X * [3,5,9] -> X + X * [2,4,8] etc. 768 int RHSC = (int)RHS->getZExtValue(); 769 if (RHSC & 1) { 770 RHSC = RHSC & ~1; 771 ARM_AM::AddrOpc AddSub = ARM_AM::add; 772 if (RHSC < 0) { 773 AddSub = ARM_AM::sub; 774 RHSC = - RHSC; 775 } 776 if (isPowerOf2_32(RHSC)) { 777 unsigned ShAmt = Log2_32(RHSC); 778 Base = Offset = N.getOperand(0); 779 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 780 ARM_AM::lsl), 781 SDLoc(N), MVT::i32); 782 return AM2_SHOP; 783 } 784 } 785 } 786 } 787 788 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 789 // ISD::OR that is equivalent to an ADD. 790 !CurDAG->isBaseWithConstantOffset(N)) { 791 Base = N; 792 if (N.getOpcode() == ISD::FrameIndex) { 793 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 794 Base = CurDAG->getTargetFrameIndex( 795 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 796 } else if (N.getOpcode() == ARMISD::Wrapper && 797 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 798 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 799 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 800 Base = N.getOperand(0); 801 } 802 Offset = CurDAG->getRegister(0, MVT::i32); 803 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 804 ARM_AM::no_shift), 805 SDLoc(N), MVT::i32); 806 return AM2_BASE; 807 } 808 809 // Match simple R +/- imm12 operands. 810 if (N.getOpcode() != ISD::SUB) { 811 int RHSC; 812 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 813 -0x1000+1, 0x1000, RHSC)) { // 12 bits. 814 Base = N.getOperand(0); 815 if (Base.getOpcode() == ISD::FrameIndex) { 816 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 817 Base = CurDAG->getTargetFrameIndex( 818 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 819 } 820 Offset = CurDAG->getRegister(0, MVT::i32); 821 822 ARM_AM::AddrOpc AddSub = ARM_AM::add; 823 if (RHSC < 0) { 824 AddSub = ARM_AM::sub; 825 RHSC = - RHSC; 826 } 827 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, 828 ARM_AM::no_shift), 829 SDLoc(N), MVT::i32); 830 return AM2_BASE; 831 } 832 } 833 834 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { 835 // Compute R +/- (R << N) and reuse it. 836 Base = N; 837 Offset = CurDAG->getRegister(0, MVT::i32); 838 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, 839 ARM_AM::no_shift), 840 SDLoc(N), MVT::i32); 841 return AM2_BASE; 842 } 843 844 // Otherwise this is R +/- [possibly shifted] R. 845 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; 846 ARM_AM::ShiftOpc ShOpcVal = 847 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 848 unsigned ShAmt = 0; 849 850 Base = N.getOperand(0); 851 Offset = N.getOperand(1); 852 853 if (ShOpcVal != ARM_AM::no_shift) { 854 // Check to see if the RHS of the shift is a constant, if not, we can't fold 855 // it. 856 if (ConstantSDNode *Sh = 857 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 858 ShAmt = Sh->getZExtValue(); 859 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 860 Offset = N.getOperand(1).getOperand(0); 861 else { 862 ShAmt = 0; 863 ShOpcVal = ARM_AM::no_shift; 864 } 865 } else { 866 ShOpcVal = ARM_AM::no_shift; 867 } 868 } 869 870 // Try matching (R shl C) + (R). 871 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 872 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 873 N.getOperand(0).hasOneUse())) { 874 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 875 if (ShOpcVal != ARM_AM::no_shift) { 876 // Check to see if the RHS of the shift is a constant, if not, we can't 877 // fold it. 878 if (ConstantSDNode *Sh = 879 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 880 ShAmt = Sh->getZExtValue(); 881 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 882 Offset = N.getOperand(0).getOperand(0); 883 Base = N.getOperand(1); 884 } else { 885 ShAmt = 0; 886 ShOpcVal = ARM_AM::no_shift; 887 } 888 } else { 889 ShOpcVal = ARM_AM::no_shift; 890 } 891 } 892 } 893 894 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 895 SDLoc(N), MVT::i32); 896 return AM2_SHOP; 897} 898 899bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 900 SDValue &Offset, SDValue &Opc) { 901 unsigned Opcode = Op->getOpcode(); 902 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 903 ? cast<LoadSDNode>(Op)->getAddressingMode() 904 : cast<StoreSDNode>(Op)->getAddressingMode(); 905 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 906 ? ARM_AM::add : ARM_AM::sub; 907 int Val; 908 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 909 return false; 910 911 Offset = N; 912 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 913 unsigned ShAmt = 0; 914 if (ShOpcVal != ARM_AM::no_shift) { 915 // Check to see if the RHS of the shift is a constant, if not, we can't fold 916 // it. 917 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 918 ShAmt = Sh->getZExtValue(); 919 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 920 Offset = N.getOperand(0); 921 else { 922 ShAmt = 0; 923 ShOpcVal = ARM_AM::no_shift; 924 } 925 } else { 926 ShOpcVal = ARM_AM::no_shift; 927 } 928 } 929 930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 931 SDLoc(N), MVT::i32); 932 return true; 933} 934 935bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 936 SDValue &Offset, SDValue &Opc) { 937 unsigned Opcode = Op->getOpcode(); 938 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 939 ? cast<LoadSDNode>(Op)->getAddressingMode() 940 : cast<StoreSDNode>(Op)->getAddressingMode(); 941 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 942 ? ARM_AM::add : ARM_AM::sub; 943 int Val; 944 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 945 if (AddSub == ARM_AM::sub) Val *= -1; 946 Offset = CurDAG->getRegister(0, MVT::i32); 947 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 948 return true; 949 } 950 951 return false; 952} 953 954 955bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 956 SDValue &Offset, SDValue &Opc) { 957 unsigned Opcode = Op->getOpcode(); 958 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 959 ? cast<LoadSDNode>(Op)->getAddressingMode() 960 : cast<StoreSDNode>(Op)->getAddressingMode(); 961 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 962 ? ARM_AM::add : ARM_AM::sub; 963 int Val; 964 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 965 Offset = CurDAG->getRegister(0, MVT::i32); 966 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 967 ARM_AM::no_shift), 968 SDLoc(Op), MVT::i32); 969 return true; 970 } 971 972 return false; 973} 974 975bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 976 Base = N; 977 return true; 978} 979 980bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 981 SDValue &Base, SDValue &Offset, 982 SDValue &Opc) { 983 if (N.getOpcode() == ISD::SUB) { 984 // X - C is canonicalize to X + -C, no need to handle it here. 985 Base = N.getOperand(0); 986 Offset = N.getOperand(1); 987 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 988 MVT::i32); 989 return true; 990 } 991 992 if (!CurDAG->isBaseWithConstantOffset(N)) { 993 Base = N; 994 if (N.getOpcode() == ISD::FrameIndex) { 995 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 996 Base = CurDAG->getTargetFrameIndex( 997 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 998 } 999 Offset = CurDAG->getRegister(0, MVT::i32); 1000 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1001 MVT::i32); 1002 return true; 1003 } 1004 1005 // If the RHS is +/- imm8, fold into addr mode. 1006 int RHSC; 1007 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 1008 -256 + 1, 256, RHSC)) { // 8 bits. 1009 Base = N.getOperand(0); 1010 if (Base.getOpcode() == ISD::FrameIndex) { 1011 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1012 Base = CurDAG->getTargetFrameIndex( 1013 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1014 } 1015 Offset = CurDAG->getRegister(0, MVT::i32); 1016 1017 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1018 if (RHSC < 0) { 1019 AddSub = ARM_AM::sub; 1020 RHSC = -RHSC; 1021 } 1022 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 1023 MVT::i32); 1024 return true; 1025 } 1026 1027 Base = N.getOperand(0); 1028 Offset = N.getOperand(1); 1029 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 1030 MVT::i32); 1031 return true; 1032} 1033 1034bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 1035 SDValue &Offset, SDValue &Opc) { 1036 unsigned Opcode = Op->getOpcode(); 1037 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1038 ? cast<LoadSDNode>(Op)->getAddressingMode() 1039 : cast<StoreSDNode>(Op)->getAddressingMode(); 1040 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 1041 ? ARM_AM::add : ARM_AM::sub; 1042 int Val; 1043 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 1044 Offset = CurDAG->getRegister(0, MVT::i32); 1045 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 1046 MVT::i32); 1047 return true; 1048 } 1049 1050 Offset = N; 1051 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 1052 MVT::i32); 1053 return true; 1054} 1055 1056bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1057 SDValue &Base, SDValue &Offset) { 1058 if (!CurDAG->isBaseWithConstantOffset(N)) { 1059 Base = N; 1060 if (N.getOpcode() == ISD::FrameIndex) { 1061 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1062 Base = CurDAG->getTargetFrameIndex( 1063 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1064 } else if (N.getOpcode() == ARMISD::Wrapper && 1065 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1066 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1067 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1068 Base = N.getOperand(0); 1069 } 1070 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1071 SDLoc(N), MVT::i32); 1072 return true; 1073 } 1074 1075 // If the RHS is +/- imm8, fold into addr mode. 1076 int RHSC; 1077 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 1078 -256 + 1, 256, RHSC)) { 1079 Base = N.getOperand(0); 1080 if (Base.getOpcode() == ISD::FrameIndex) { 1081 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1082 Base = CurDAG->getTargetFrameIndex( 1083 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1084 } 1085 1086 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1087 if (RHSC < 0) { 1088 AddSub = ARM_AM::sub; 1089 RHSC = -RHSC; 1090 } 1091 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1092 SDLoc(N), MVT::i32); 1093 return true; 1094 } 1095 1096 Base = N; 1097 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1098 SDLoc(N), MVT::i32); 1099 return true; 1100} 1101 1102bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1103 SDValue &Align) { 1104 Addr = N; 1105 1106 unsigned Alignment = 0; 1107 1108 MemSDNode *MemN = cast<MemSDNode>(Parent); 1109 1110 if (isa<LSBaseSDNode>(MemN) || 1111 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1112 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1113 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1114 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1115 // The maximum alignment is equal to the memory size being referenced. 1116 unsigned MMOAlign = MemN->getAlignment(); 1117 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1118 if (MMOAlign >= MemSize && MemSize > 1) 1119 Alignment = MemSize; 1120 } else { 1121 // All other uses of addrmode6 are for intrinsics. For now just record 1122 // the raw alignment value; it will be refined later based on the legal 1123 // alignment operands for the intrinsic. 1124 Alignment = MemN->getAlignment(); 1125 } 1126 1127 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1128 return true; 1129} 1130 1131bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1132 SDValue &Offset) { 1133 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1134 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1135 if (AM != ISD::POST_INC) 1136 return false; 1137 Offset = N; 1138 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1139 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1140 Offset = CurDAG->getRegister(0, MVT::i32); 1141 } 1142 return true; 1143} 1144 1145bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1146 SDValue &Offset, SDValue &Label) { 1147 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1148 Offset = N.getOperand(0); 1149 SDValue N1 = N.getOperand(1); 1150 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1151 SDLoc(N), MVT::i32); 1152 return true; 1153 } 1154 1155 return false; 1156} 1157 1158 1159//===----------------------------------------------------------------------===// 1160// Thumb Addressing Modes 1161//===----------------------------------------------------------------------===// 1162 1163bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, 1164 SDValue &Base, SDValue &Offset){ 1165 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1166 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1167 if (!NC || !NC->isNullValue()) 1168 return false; 1169 1170 Base = Offset = N; 1171 return true; 1172 } 1173 1174 Base = N.getOperand(0); 1175 Offset = N.getOperand(1); 1176 return true; 1177} 1178 1179bool 1180ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1181 SDValue &Base, SDValue &OffImm) { 1182 if (!CurDAG->isBaseWithConstantOffset(N)) { 1183 if (N.getOpcode() == ISD::ADD) { 1184 return false; // We want to select register offset instead 1185 } else if (N.getOpcode() == ARMISD::Wrapper && 1186 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1187 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1188 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1189 Base = N.getOperand(0); 1190 } else { 1191 Base = N; 1192 } 1193 1194 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1195 return true; 1196 } 1197 1198 // If the RHS is + imm5 * scale, fold into addr mode. 1199 int RHSC; 1200 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1201 Base = N.getOperand(0); 1202 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1203 return true; 1204 } 1205 1206 // Offset is too large, so use register offset instead. 1207 return false; 1208} 1209 1210bool 1211ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1212 SDValue &OffImm) { 1213 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1214} 1215 1216bool 1217ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1218 SDValue &OffImm) { 1219 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1220} 1221 1222bool 1223ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1224 SDValue &OffImm) { 1225 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1226} 1227 1228bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1229 SDValue &Base, SDValue &OffImm) { 1230 if (N.getOpcode() == ISD::FrameIndex) { 1231 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1232 // Only multiples of 4 are allowed for the offset, so the frame object 1233 // alignment must be at least 4. 1234 MachineFrameInfo *MFI = MF->getFrameInfo(); 1235 if (MFI->getObjectAlignment(FI) < 4) 1236 MFI->setObjectAlignment(FI, 4); 1237 Base = CurDAG->getTargetFrameIndex( 1238 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1239 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1240 return true; 1241 } 1242 1243 if (!CurDAG->isBaseWithConstantOffset(N)) 1244 return false; 1245 1246 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); 1247 if (N.getOperand(0).getOpcode() == ISD::FrameIndex || 1248 (LHSR && LHSR->getReg() == ARM::SP)) { 1249 // If the RHS is + imm8 * scale, fold into addr mode. 1250 int RHSC; 1251 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1252 Base = N.getOperand(0); 1253 if (Base.getOpcode() == ISD::FrameIndex) { 1254 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1255 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1256 // indexed by the LHS must be 4-byte aligned. 1257 MachineFrameInfo *MFI = MF->getFrameInfo(); 1258 if (MFI->getObjectAlignment(FI) < 4) 1259 MFI->setObjectAlignment(FI, 4); 1260 Base = CurDAG->getTargetFrameIndex( 1261 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1262 } 1263 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1264 return true; 1265 } 1266 } 1267 1268 return false; 1269} 1270 1271 1272//===----------------------------------------------------------------------===// 1273// Thumb 2 Addressing Modes 1274//===----------------------------------------------------------------------===// 1275 1276 1277bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1278 SDValue &Base, SDValue &OffImm) { 1279 // Match simple R + imm12 operands. 1280 1281 // Base only. 1282 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1283 !CurDAG->isBaseWithConstantOffset(N)) { 1284 if (N.getOpcode() == ISD::FrameIndex) { 1285 // Match frame index. 1286 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1287 Base = CurDAG->getTargetFrameIndex( 1288 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1289 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1290 return true; 1291 } 1292 1293 if (N.getOpcode() == ARMISD::Wrapper && 1294 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1295 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1296 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1297 Base = N.getOperand(0); 1298 if (Base.getOpcode() == ISD::TargetConstantPool) 1299 return false; // We want to select t2LDRpci instead. 1300 } else 1301 Base = N; 1302 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1303 return true; 1304 } 1305 1306 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1307 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1308 // Let t2LDRi8 handle (R - imm8). 1309 return false; 1310 1311 int RHSC = (int)RHS->getZExtValue(); 1312 if (N.getOpcode() == ISD::SUB) 1313 RHSC = -RHSC; 1314 1315 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1316 Base = N.getOperand(0); 1317 if (Base.getOpcode() == ISD::FrameIndex) { 1318 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1319 Base = CurDAG->getTargetFrameIndex( 1320 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1321 } 1322 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1323 return true; 1324 } 1325 } 1326 1327 // Base only. 1328 Base = N; 1329 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1330 return true; 1331} 1332 1333bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1334 SDValue &Base, SDValue &OffImm) { 1335 // Match simple R - imm8 operands. 1336 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1337 !CurDAG->isBaseWithConstantOffset(N)) 1338 return false; 1339 1340 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1341 int RHSC = (int)RHS->getSExtValue(); 1342 if (N.getOpcode() == ISD::SUB) 1343 RHSC = -RHSC; 1344 1345 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1346 Base = N.getOperand(0); 1347 if (Base.getOpcode() == ISD::FrameIndex) { 1348 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1349 Base = CurDAG->getTargetFrameIndex( 1350 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1351 } 1352 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1353 return true; 1354 } 1355 } 1356 1357 return false; 1358} 1359 1360bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1361 SDValue &OffImm){ 1362 unsigned Opcode = Op->getOpcode(); 1363 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1364 ? cast<LoadSDNode>(Op)->getAddressingMode() 1365 : cast<StoreSDNode>(Op)->getAddressingMode(); 1366 int RHSC; 1367 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1368 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1369 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1370 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1371 return true; 1372 } 1373 1374 return false; 1375} 1376 1377bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1378 SDValue &Base, 1379 SDValue &OffReg, SDValue &ShImm) { 1380 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1381 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1382 return false; 1383 1384 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1385 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1386 int RHSC = (int)RHS->getZExtValue(); 1387 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1388 return false; 1389 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1390 return false; 1391 } 1392 1393 // Look for (R + R) or (R + (R << [1,2,3])). 1394 unsigned ShAmt = 0; 1395 Base = N.getOperand(0); 1396 OffReg = N.getOperand(1); 1397 1398 // Swap if it is ((R << c) + R). 1399 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1400 if (ShOpcVal != ARM_AM::lsl) { 1401 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1402 if (ShOpcVal == ARM_AM::lsl) 1403 std::swap(Base, OffReg); 1404 } 1405 1406 if (ShOpcVal == ARM_AM::lsl) { 1407 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1408 // it. 1409 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1410 ShAmt = Sh->getZExtValue(); 1411 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1412 OffReg = OffReg.getOperand(0); 1413 else { 1414 ShAmt = 0; 1415 } 1416 } 1417 } 1418 1419 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1420 // and use it in a shifted operand do so. 1421 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1422 unsigned PowerOfTwo = 0; 1423 SDValue NewMulConst; 1424 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1425 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1426 ShAmt = PowerOfTwo; 1427 } 1428 } 1429 1430 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1431 1432 return true; 1433} 1434 1435bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1436 SDValue &OffImm) { 1437 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1438 // instructions. 1439 Base = N; 1440 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1441 1442 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1443 return true; 1444 1445 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1446 if (!RHS) 1447 return true; 1448 1449 uint32_t RHSC = (int)RHS->getZExtValue(); 1450 if (RHSC > 1020 || RHSC % 4 != 0) 1451 return true; 1452 1453 Base = N.getOperand(0); 1454 if (Base.getOpcode() == ISD::FrameIndex) { 1455 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1456 Base = CurDAG->getTargetFrameIndex( 1457 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1458 } 1459 1460 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1461 return true; 1462} 1463 1464//===--------------------------------------------------------------------===// 1465 1466/// getAL - Returns a ARMCC::AL immediate node. 1467static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1468 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1469} 1470 1471bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1472 LoadSDNode *LD = cast<LoadSDNode>(N); 1473 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1474 if (AM == ISD::UNINDEXED) 1475 return false; 1476 1477 EVT LoadedVT = LD->getMemoryVT(); 1478 SDValue Offset, AMOpc; 1479 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1480 unsigned Opcode = 0; 1481 bool Match = false; 1482 if (LoadedVT == MVT::i32 && isPre && 1483 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1484 Opcode = ARM::LDR_PRE_IMM; 1485 Match = true; 1486 } else if (LoadedVT == MVT::i32 && !isPre && 1487 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1488 Opcode = ARM::LDR_POST_IMM; 1489 Match = true; 1490 } else if (LoadedVT == MVT::i32 && 1491 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1492 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1493 Match = true; 1494 1495 } else if (LoadedVT == MVT::i16 && 1496 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1497 Match = true; 1498 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1499 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1500 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1501 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1502 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1503 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1504 Match = true; 1505 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1506 } 1507 } else { 1508 if (isPre && 1509 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1510 Match = true; 1511 Opcode = ARM::LDRB_PRE_IMM; 1512 } else if (!isPre && 1513 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1514 Match = true; 1515 Opcode = ARM::LDRB_POST_IMM; 1516 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1517 Match = true; 1518 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1519 } 1520 } 1521 } 1522 1523 if (Match) { 1524 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1525 SDValue Chain = LD->getChain(); 1526 SDValue Base = LD->getBasePtr(); 1527 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1528 CurDAG->getRegister(0, MVT::i32), Chain }; 1529 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1530 MVT::i32, MVT::Other, Ops)); 1531 return true; 1532 } else { 1533 SDValue Chain = LD->getChain(); 1534 SDValue Base = LD->getBasePtr(); 1535 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1536 CurDAG->getRegister(0, MVT::i32), Chain }; 1537 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1538 MVT::i32, MVT::Other, Ops)); 1539 return true; 1540 } 1541 } 1542 1543 return false; 1544} 1545 1546bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1547 LoadSDNode *LD = cast<LoadSDNode>(N); 1548 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1549 if (AM == ISD::UNINDEXED) 1550 return false; 1551 1552 EVT LoadedVT = LD->getMemoryVT(); 1553 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1554 SDValue Offset; 1555 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1556 unsigned Opcode = 0; 1557 bool Match = false; 1558 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1559 switch (LoadedVT.getSimpleVT().SimpleTy) { 1560 case MVT::i32: 1561 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1562 break; 1563 case MVT::i16: 1564 if (isSExtLd) 1565 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1566 else 1567 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1568 break; 1569 case MVT::i8: 1570 case MVT::i1: 1571 if (isSExtLd) 1572 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1573 else 1574 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1575 break; 1576 default: 1577 return false; 1578 } 1579 Match = true; 1580 } 1581 1582 if (Match) { 1583 SDValue Chain = LD->getChain(); 1584 SDValue Base = LD->getBasePtr(); 1585 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1586 CurDAG->getRegister(0, MVT::i32), Chain }; 1587 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1588 MVT::Other, Ops)); 1589 return true; 1590 } 1591 1592 return false; 1593} 1594 1595/// \brief Form a GPRPair pseudo register from a pair of GPR regs. 1596SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1597 SDLoc dl(V0.getNode()); 1598 SDValue RegClass = 1599 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1600 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1601 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1602 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1603 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1604} 1605 1606/// \brief Form a D register from a pair of S registers. 1607SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1608 SDLoc dl(V0.getNode()); 1609 SDValue RegClass = 1610 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1611 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1612 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1613 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1614 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1615} 1616 1617/// \brief Form a quad register from a pair of D registers. 1618SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1619 SDLoc dl(V0.getNode()); 1620 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1621 MVT::i32); 1622 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1623 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1624 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1625 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1626} 1627 1628/// \brief Form 4 consecutive D registers from a pair of Q registers. 1629SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1630 SDLoc dl(V0.getNode()); 1631 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1632 MVT::i32); 1633 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1634 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1635 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1636 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1637} 1638 1639/// \brief Form 4 consecutive S registers. 1640SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1641 SDValue V2, SDValue V3) { 1642 SDLoc dl(V0.getNode()); 1643 SDValue RegClass = 1644 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1645 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1646 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1647 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1648 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1649 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1650 V2, SubReg2, V3, SubReg3 }; 1651 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1652} 1653 1654/// \brief Form 4 consecutive D registers. 1655SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1656 SDValue V2, SDValue V3) { 1657 SDLoc dl(V0.getNode()); 1658 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1659 MVT::i32); 1660 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1661 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1662 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1663 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1664 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1665 V2, SubReg2, V3, SubReg3 }; 1666 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1667} 1668 1669/// \brief Form 4 consecutive Q registers. 1670SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1671 SDValue V2, SDValue V3) { 1672 SDLoc dl(V0.getNode()); 1673 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1674 MVT::i32); 1675 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1676 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1677 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1678 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1679 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1680 V2, SubReg2, V3, SubReg3 }; 1681 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1682} 1683 1684/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1685/// of a NEON VLD or VST instruction. The supported values depend on the 1686/// number of registers being loaded. 1687SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1688 unsigned NumVecs, bool is64BitVector) { 1689 unsigned NumRegs = NumVecs; 1690 if (!is64BitVector && NumVecs < 3) 1691 NumRegs *= 2; 1692 1693 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1694 if (Alignment >= 32 && NumRegs == 4) 1695 Alignment = 32; 1696 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1697 Alignment = 16; 1698 else if (Alignment >= 8) 1699 Alignment = 8; 1700 else 1701 Alignment = 0; 1702 1703 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1704} 1705 1706static bool isVLDfixed(unsigned Opc) 1707{ 1708 switch (Opc) { 1709 default: return false; 1710 case ARM::VLD1d8wb_fixed : return true; 1711 case ARM::VLD1d16wb_fixed : return true; 1712 case ARM::VLD1d64Qwb_fixed : return true; 1713 case ARM::VLD1d32wb_fixed : return true; 1714 case ARM::VLD1d64wb_fixed : return true; 1715 case ARM::VLD1d64TPseudoWB_fixed : return true; 1716 case ARM::VLD1d64QPseudoWB_fixed : return true; 1717 case ARM::VLD1q8wb_fixed : return true; 1718 case ARM::VLD1q16wb_fixed : return true; 1719 case ARM::VLD1q32wb_fixed : return true; 1720 case ARM::VLD1q64wb_fixed : return true; 1721 case ARM::VLD2d8wb_fixed : return true; 1722 case ARM::VLD2d16wb_fixed : return true; 1723 case ARM::VLD2d32wb_fixed : return true; 1724 case ARM::VLD2q8PseudoWB_fixed : return true; 1725 case ARM::VLD2q16PseudoWB_fixed : return true; 1726 case ARM::VLD2q32PseudoWB_fixed : return true; 1727 case ARM::VLD2DUPd8wb_fixed : return true; 1728 case ARM::VLD2DUPd16wb_fixed : return true; 1729 case ARM::VLD2DUPd32wb_fixed : return true; 1730 } 1731} 1732 1733static bool isVSTfixed(unsigned Opc) 1734{ 1735 switch (Opc) { 1736 default: return false; 1737 case ARM::VST1d8wb_fixed : return true; 1738 case ARM::VST1d16wb_fixed : return true; 1739 case ARM::VST1d32wb_fixed : return true; 1740 case ARM::VST1d64wb_fixed : return true; 1741 case ARM::VST1q8wb_fixed : return true; 1742 case ARM::VST1q16wb_fixed : return true; 1743 case ARM::VST1q32wb_fixed : return true; 1744 case ARM::VST1q64wb_fixed : return true; 1745 case ARM::VST1d64TPseudoWB_fixed : return true; 1746 case ARM::VST1d64QPseudoWB_fixed : return true; 1747 case ARM::VST2d8wb_fixed : return true; 1748 case ARM::VST2d16wb_fixed : return true; 1749 case ARM::VST2d32wb_fixed : return true; 1750 case ARM::VST2q8PseudoWB_fixed : return true; 1751 case ARM::VST2q16PseudoWB_fixed : return true; 1752 case ARM::VST2q32PseudoWB_fixed : return true; 1753 } 1754} 1755 1756// Get the register stride update opcode of a VLD/VST instruction that 1757// is otherwise equivalent to the given fixed stride updating instruction. 1758static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 1759 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 1760 && "Incorrect fixed stride updating instruction."); 1761 switch (Opc) { 1762 default: break; 1763 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 1764 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 1765 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 1766 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 1767 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 1768 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 1769 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 1770 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 1771 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 1772 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 1773 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 1774 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 1775 1776 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 1777 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 1778 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 1779 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 1780 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 1781 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 1782 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 1783 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 1784 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 1785 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 1786 1787 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 1788 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 1789 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 1790 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 1791 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 1792 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 1793 1794 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 1795 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 1796 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 1797 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 1798 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 1799 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 1800 1801 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 1802 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 1803 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 1804 } 1805 return Opc; // If not one we handle, return it unchanged. 1806} 1807 1808void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 1809 const uint16_t *DOpcodes, 1810 const uint16_t *QOpcodes0, 1811 const uint16_t *QOpcodes1) { 1812 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 1813 SDLoc dl(N); 1814 1815 SDValue MemAddr, Align; 1816 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1817 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1818 return; 1819 1820 SDValue Chain = N->getOperand(0); 1821 EVT VT = N->getValueType(0); 1822 bool is64BitVector = VT.is64BitVector(); 1823 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1824 1825 unsigned OpcodeIndex; 1826 switch (VT.getSimpleVT().SimpleTy) { 1827 default: llvm_unreachable("unhandled vld type"); 1828 // Double-register operations: 1829 case MVT::v8i8: OpcodeIndex = 0; break; 1830 case MVT::v4i16: OpcodeIndex = 1; break; 1831 case MVT::v2f32: 1832 case MVT::v2i32: OpcodeIndex = 2; break; 1833 case MVT::v1i64: OpcodeIndex = 3; break; 1834 // Quad-register operations: 1835 case MVT::v16i8: OpcodeIndex = 0; break; 1836 case MVT::v8i16: OpcodeIndex = 1; break; 1837 case MVT::v4f32: 1838 case MVT::v4i32: OpcodeIndex = 2; break; 1839 case MVT::v2f64: 1840 case MVT::v2i64: OpcodeIndex = 3; 1841 assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); 1842 break; 1843 } 1844 1845 EVT ResTy; 1846 if (NumVecs == 1) 1847 ResTy = VT; 1848 else { 1849 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 1850 if (!is64BitVector) 1851 ResTyElts *= 2; 1852 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 1853 } 1854 std::vector<EVT> ResTys; 1855 ResTys.push_back(ResTy); 1856 if (isUpdating) 1857 ResTys.push_back(MVT::i32); 1858 ResTys.push_back(MVT::Other); 1859 1860 SDValue Pred = getAL(CurDAG, dl); 1861 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1862 SDNode *VLd; 1863 SmallVector<SDValue, 7> Ops; 1864 1865 // Double registers and VLD1/VLD2 quad registers are directly supported. 1866 if (is64BitVector || NumVecs <= 2) { 1867 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 1868 QOpcodes0[OpcodeIndex]); 1869 Ops.push_back(MemAddr); 1870 Ops.push_back(Align); 1871 if (isUpdating) { 1872 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1873 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 1874 // case entirely when the rest are updated to that form, too. 1875 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode())) 1876 Opc = getVLDSTRegisterUpdateOpcode(Opc); 1877 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 1878 // check for that explicitly too. Horribly hacky, but temporary. 1879 if ((NumVecs > 2 && !isVLDfixed(Opc)) || 1880 !isa<ConstantSDNode>(Inc.getNode())) 1881 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 1882 } 1883 Ops.push_back(Pred); 1884 Ops.push_back(Reg0); 1885 Ops.push_back(Chain); 1886 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1887 1888 } else { 1889 // Otherwise, quad registers are loaded with two separate instructions, 1890 // where one loads the even registers and the other loads the odd registers. 1891 EVT AddrTy = MemAddr.getValueType(); 1892 1893 // Load the even subregs. This is always an updating load, so that it 1894 // provides the address to the second load for the odd subregs. 1895 SDValue ImplDef = 1896 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 1897 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 1898 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 1899 ResTy, AddrTy, MVT::Other, OpsA); 1900 Chain = SDValue(VLdA, 2); 1901 1902 // Load the odd subregs. 1903 Ops.push_back(SDValue(VLdA, 1)); 1904 Ops.push_back(Align); 1905 if (isUpdating) { 1906 SDValue Inc = N->getOperand(AddrOpIdx + 1); 1907 assert(isa<ConstantSDNode>(Inc.getNode()) && 1908 "only constant post-increment update allowed for VLD3/4"); 1909 (void)Inc; 1910 Ops.push_back(Reg0); 1911 } 1912 Ops.push_back(SDValue(VLdA, 0)); 1913 Ops.push_back(Pred); 1914 Ops.push_back(Reg0); 1915 Ops.push_back(Chain); 1916 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 1917 } 1918 1919 // Transfer memoperands. 1920 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1921 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1922 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); 1923 1924 if (NumVecs == 1) { 1925 ReplaceNode(N, VLd); 1926 return; 1927 } 1928 1929 // Extract out the subregisters. 1930 SDValue SuperReg = SDValue(VLd, 0); 1931 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 1932 ARM::qsub_3 == ARM::qsub_0 + 3, 1933 "Unexpected subreg numbering"); 1934 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 1935 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 1936 ReplaceUses(SDValue(N, Vec), 1937 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 1938 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 1939 if (isUpdating) 1940 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 1941 CurDAG->RemoveDeadNode(N); 1942} 1943 1944void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 1945 const uint16_t *DOpcodes, 1946 const uint16_t *QOpcodes0, 1947 const uint16_t *QOpcodes1) { 1948 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 1949 SDLoc dl(N); 1950 1951 SDValue MemAddr, Align; 1952 unsigned AddrOpIdx = isUpdating ? 1 : 2; 1953 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 1954 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 1955 return; 1956 1957 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1958 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1959 1960 SDValue Chain = N->getOperand(0); 1961 EVT VT = N->getOperand(Vec0Idx).getValueType(); 1962 bool is64BitVector = VT.is64BitVector(); 1963 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 1964 1965 unsigned OpcodeIndex; 1966 switch (VT.getSimpleVT().SimpleTy) { 1967 default: llvm_unreachable("unhandled vst type"); 1968 // Double-register operations: 1969 case MVT::v8i8: OpcodeIndex = 0; break; 1970 case MVT::v4i16: OpcodeIndex = 1; break; 1971 case MVT::v2f32: 1972 case MVT::v2i32: OpcodeIndex = 2; break; 1973 case MVT::v1i64: OpcodeIndex = 3; break; 1974 // Quad-register operations: 1975 case MVT::v16i8: OpcodeIndex = 0; break; 1976 case MVT::v8i16: OpcodeIndex = 1; break; 1977 case MVT::v4f32: 1978 case MVT::v4i32: OpcodeIndex = 2; break; 1979 case MVT::v2f64: 1980 case MVT::v2i64: OpcodeIndex = 3; 1981 assert(NumVecs == 1 && "v2i64 type only supported for VST1"); 1982 break; 1983 } 1984 1985 std::vector<EVT> ResTys; 1986 if (isUpdating) 1987 ResTys.push_back(MVT::i32); 1988 ResTys.push_back(MVT::Other); 1989 1990 SDValue Pred = getAL(CurDAG, dl); 1991 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 1992 SmallVector<SDValue, 7> Ops; 1993 1994 // Double registers and VST1/VST2 quad registers are directly supported. 1995 if (is64BitVector || NumVecs <= 2) { 1996 SDValue SrcReg; 1997 if (NumVecs == 1) { 1998 SrcReg = N->getOperand(Vec0Idx); 1999 } else if (is64BitVector) { 2000 // Form a REG_SEQUENCE to force register allocation. 2001 SDValue V0 = N->getOperand(Vec0Idx + 0); 2002 SDValue V1 = N->getOperand(Vec0Idx + 1); 2003 if (NumVecs == 2) 2004 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2005 else { 2006 SDValue V2 = N->getOperand(Vec0Idx + 2); 2007 // If it's a vst3, form a quad D-register and leave the last part as 2008 // an undef. 2009 SDValue V3 = (NumVecs == 3) 2010 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2011 : N->getOperand(Vec0Idx + 3); 2012 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2013 } 2014 } else { 2015 // Form a QQ register. 2016 SDValue Q0 = N->getOperand(Vec0Idx); 2017 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2018 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2019 } 2020 2021 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2022 QOpcodes0[OpcodeIndex]); 2023 Ops.push_back(MemAddr); 2024 Ops.push_back(Align); 2025 if (isUpdating) { 2026 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2027 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 2028 // case entirely when the rest are updated to that form, too. 2029 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) 2030 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2031 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so 2032 // check for that explicitly too. Horribly hacky, but temporary. 2033 if (!isa<ConstantSDNode>(Inc.getNode())) 2034 Ops.push_back(Inc); 2035 else if (NumVecs > 2 && !isVSTfixed(Opc)) 2036 Ops.push_back(Reg0); 2037 } 2038 Ops.push_back(SrcReg); 2039 Ops.push_back(Pred); 2040 Ops.push_back(Reg0); 2041 Ops.push_back(Chain); 2042 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2043 2044 // Transfer memoperands. 2045 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); 2046 2047 ReplaceNode(N, VSt); 2048 return; 2049 } 2050 2051 // Otherwise, quad registers are stored with two separate instructions, 2052 // where one stores the even registers and the other stores the odd registers. 2053 2054 // Form the QQQQ REG_SEQUENCE. 2055 SDValue V0 = N->getOperand(Vec0Idx + 0); 2056 SDValue V1 = N->getOperand(Vec0Idx + 1); 2057 SDValue V2 = N->getOperand(Vec0Idx + 2); 2058 SDValue V3 = (NumVecs == 3) 2059 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2060 : N->getOperand(Vec0Idx + 3); 2061 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2062 2063 // Store the even D registers. This is always an updating store, so that it 2064 // provides the address to the second store for the odd subregs. 2065 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2066 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2067 MemAddr.getValueType(), 2068 MVT::Other, OpsA); 2069 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); 2070 Chain = SDValue(VStA, 1); 2071 2072 // Store the odd D registers. 2073 Ops.push_back(SDValue(VStA, 0)); 2074 Ops.push_back(Align); 2075 if (isUpdating) { 2076 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2077 assert(isa<ConstantSDNode>(Inc.getNode()) && 2078 "only constant post-increment update allowed for VST3/4"); 2079 (void)Inc; 2080 Ops.push_back(Reg0); 2081 } 2082 Ops.push_back(RegSeq); 2083 Ops.push_back(Pred); 2084 Ops.push_back(Reg0); 2085 Ops.push_back(Chain); 2086 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2087 Ops); 2088 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); 2089 ReplaceNode(N, VStB); 2090} 2091 2092void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2093 unsigned NumVecs, 2094 const uint16_t *DOpcodes, 2095 const uint16_t *QOpcodes) { 2096 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2097 SDLoc dl(N); 2098 2099 SDValue MemAddr, Align; 2100 unsigned AddrOpIdx = isUpdating ? 1 : 2; 2101 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2102 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2103 return; 2104 2105 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2106 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2107 2108 SDValue Chain = N->getOperand(0); 2109 unsigned Lane = 2110 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2111 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2112 bool is64BitVector = VT.is64BitVector(); 2113 2114 unsigned Alignment = 0; 2115 if (NumVecs != 3) { 2116 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2117 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2118 if (Alignment > NumBytes) 2119 Alignment = NumBytes; 2120 if (Alignment < 8 && Alignment < NumBytes) 2121 Alignment = 0; 2122 // Alignment must be a power of two; make sure of that. 2123 Alignment = (Alignment & -Alignment); 2124 if (Alignment == 1) 2125 Alignment = 0; 2126 } 2127 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2128 2129 unsigned OpcodeIndex; 2130 switch (VT.getSimpleVT().SimpleTy) { 2131 default: llvm_unreachable("unhandled vld/vst lane type"); 2132 // Double-register operations: 2133 case MVT::v8i8: OpcodeIndex = 0; break; 2134 case MVT::v4i16: OpcodeIndex = 1; break; 2135 case MVT::v2f32: 2136 case MVT::v2i32: OpcodeIndex = 2; break; 2137 // Quad-register operations: 2138 case MVT::v8i16: OpcodeIndex = 0; break; 2139 case MVT::v4f32: 2140 case MVT::v4i32: OpcodeIndex = 1; break; 2141 } 2142 2143 std::vector<EVT> ResTys; 2144 if (IsLoad) { 2145 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2146 if (!is64BitVector) 2147 ResTyElts *= 2; 2148 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2149 MVT::i64, ResTyElts)); 2150 } 2151 if (isUpdating) 2152 ResTys.push_back(MVT::i32); 2153 ResTys.push_back(MVT::Other); 2154 2155 SDValue Pred = getAL(CurDAG, dl); 2156 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2157 2158 SmallVector<SDValue, 8> Ops; 2159 Ops.push_back(MemAddr); 2160 Ops.push_back(Align); 2161 if (isUpdating) { 2162 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2163 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); 2164 } 2165 2166 SDValue SuperReg; 2167 SDValue V0 = N->getOperand(Vec0Idx + 0); 2168 SDValue V1 = N->getOperand(Vec0Idx + 1); 2169 if (NumVecs == 2) { 2170 if (is64BitVector) 2171 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2172 else 2173 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2174 } else { 2175 SDValue V2 = N->getOperand(Vec0Idx + 2); 2176 SDValue V3 = (NumVecs == 3) 2177 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2178 : N->getOperand(Vec0Idx + 3); 2179 if (is64BitVector) 2180 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2181 else 2182 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2183 } 2184 Ops.push_back(SuperReg); 2185 Ops.push_back(getI32Imm(Lane, dl)); 2186 Ops.push_back(Pred); 2187 Ops.push_back(Reg0); 2188 Ops.push_back(Chain); 2189 2190 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2191 QOpcodes[OpcodeIndex]); 2192 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2193 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); 2194 if (!IsLoad) { 2195 ReplaceNode(N, VLdLn); 2196 return; 2197 } 2198 2199 // Extract the subregisters. 2200 SuperReg = SDValue(VLdLn, 0); 2201 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2202 ARM::qsub_3 == ARM::qsub_0 + 3, 2203 "Unexpected subreg numbering"); 2204 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2205 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2206 ReplaceUses(SDValue(N, Vec), 2207 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2208 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2209 if (isUpdating) 2210 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2211 CurDAG->RemoveDeadNode(N); 2212} 2213 2214void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, 2215 const uint16_t *Opcodes) { 2216 assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2217 SDLoc dl(N); 2218 2219 SDValue MemAddr, Align; 2220 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) 2221 return; 2222 2223 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2224 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2225 2226 SDValue Chain = N->getOperand(0); 2227 EVT VT = N->getValueType(0); 2228 2229 unsigned Alignment = 0; 2230 if (NumVecs != 3) { 2231 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2232 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; 2233 if (Alignment > NumBytes) 2234 Alignment = NumBytes; 2235 if (Alignment < 8 && Alignment < NumBytes) 2236 Alignment = 0; 2237 // Alignment must be a power of two; make sure of that. 2238 Alignment = (Alignment & -Alignment); 2239 if (Alignment == 1) 2240 Alignment = 0; 2241 } 2242 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2243 2244 unsigned OpcodeIndex; 2245 switch (VT.getSimpleVT().SimpleTy) { 2246 default: llvm_unreachable("unhandled vld-dup type"); 2247 case MVT::v8i8: OpcodeIndex = 0; break; 2248 case MVT::v4i16: OpcodeIndex = 1; break; 2249 case MVT::v2f32: 2250 case MVT::v2i32: OpcodeIndex = 2; break; 2251 } 2252 2253 SDValue Pred = getAL(CurDAG, dl); 2254 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2255 SDValue SuperReg; 2256 unsigned Opc = Opcodes[OpcodeIndex]; 2257 SmallVector<SDValue, 6> Ops; 2258 Ops.push_back(MemAddr); 2259 Ops.push_back(Align); 2260 if (isUpdating) { 2261 // fixed-stride update instructions don't have an explicit writeback 2262 // operand. It's implicit in the opcode itself. 2263 SDValue Inc = N->getOperand(2); 2264 if (!isa<ConstantSDNode>(Inc.getNode())) 2265 Ops.push_back(Inc); 2266 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2267 else if (NumVecs > 2) 2268 Ops.push_back(Reg0); 2269 } 2270 Ops.push_back(Pred); 2271 Ops.push_back(Reg0); 2272 Ops.push_back(Chain); 2273 2274 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2275 std::vector<EVT> ResTys; 2276 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); 2277 if (isUpdating) 2278 ResTys.push_back(MVT::i32); 2279 ResTys.push_back(MVT::Other); 2280 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2281 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); 2282 SuperReg = SDValue(VLdDup, 0); 2283 2284 // Extract the subregisters. 2285 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 2286 unsigned SubIdx = ARM::dsub_0; 2287 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2288 ReplaceUses(SDValue(N, Vec), 2289 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 2290 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 2291 if (isUpdating) 2292 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 2293 CurDAG->RemoveDeadNode(N); 2294} 2295 2296void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, 2297 unsigned Opc) { 2298 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); 2299 SDLoc dl(N); 2300 EVT VT = N->getValueType(0); 2301 unsigned FirstTblReg = IsExt ? 2 : 1; 2302 2303 // Form a REG_SEQUENCE to force register allocation. 2304 SDValue RegSeq; 2305 SDValue V0 = N->getOperand(FirstTblReg + 0); 2306 SDValue V1 = N->getOperand(FirstTblReg + 1); 2307 if (NumVecs == 2) 2308 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 2309 else { 2310 SDValue V2 = N->getOperand(FirstTblReg + 2); 2311 // If it's a vtbl3, form a quad D-register and leave the last part as 2312 // an undef. 2313 SDValue V3 = (NumVecs == 3) 2314 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2315 : N->getOperand(FirstTblReg + 3); 2316 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2317 } 2318 2319 SmallVector<SDValue, 6> Ops; 2320 if (IsExt) 2321 Ops.push_back(N->getOperand(1)); 2322 Ops.push_back(RegSeq); 2323 Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); 2324 Ops.push_back(getAL(CurDAG, dl)); // predicate 2325 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register 2326 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2327} 2328 2329bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 2330 if (!Subtarget->hasV6T2Ops()) 2331 return false; 2332 2333 unsigned Opc = isSigned 2334 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 2335 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 2336 SDLoc dl(N); 2337 2338 // For unsigned extracts, check for a shift right and mask 2339 unsigned And_imm = 0; 2340 if (N->getOpcode() == ISD::AND) { 2341 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 2342 2343 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 2344 if (And_imm & (And_imm + 1)) 2345 return false; 2346 2347 unsigned Srl_imm = 0; 2348 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 2349 Srl_imm)) { 2350 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2351 2352 // Note: The width operand is encoded as width-1. 2353 unsigned Width = countTrailingOnes(And_imm) - 1; 2354 unsigned LSB = Srl_imm; 2355 2356 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2357 2358 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 2359 // It's cheaper to use a right shift to extract the top bits. 2360 if (Subtarget->isThumb()) { 2361 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 2362 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2363 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2364 getAL(CurDAG, dl), Reg0, Reg0 }; 2365 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2366 return true; 2367 } 2368 2369 // ARM models shift instructions as MOVsi with shifter operand. 2370 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 2371 SDValue ShOpc = 2372 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 2373 MVT::i32); 2374 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 2375 getAL(CurDAG, dl), Reg0, Reg0 }; 2376 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 2377 return true; 2378 } 2379 2380 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2381 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2382 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2383 getAL(CurDAG, dl), Reg0 }; 2384 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2385 return true; 2386 } 2387 } 2388 return false; 2389 } 2390 2391 // Otherwise, we're looking for a shift of a shift 2392 unsigned Shl_imm = 0; 2393 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 2394 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 2395 unsigned Srl_imm = 0; 2396 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 2397 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2398 // Note: The width operand is encoded as width-1. 2399 unsigned Width = 32 - Srl_imm - 1; 2400 int LSB = Srl_imm - Shl_imm; 2401 if (LSB < 0) 2402 return false; 2403 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2404 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2405 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2406 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2407 getAL(CurDAG, dl), Reg0 }; 2408 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2409 return true; 2410 } 2411 } 2412 2413 // Or we are looking for a shift of an and, with a mask operand 2414 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 2415 isShiftedMask_32(And_imm)) { 2416 unsigned Srl_imm = 0; 2417 unsigned LSB = countTrailingZeros(And_imm); 2418 // Shift must be the same as the ands lsb 2419 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 2420 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 2421 unsigned MSB = 31 - countLeadingZeros(And_imm); 2422 // Note: The width operand is encoded as width-1. 2423 unsigned Width = MSB - LSB; 2424 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2425 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2426 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 2427 CurDAG->getTargetConstant(Width, dl, MVT::i32), 2428 getAL(CurDAG, dl), Reg0 }; 2429 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2430 return true; 2431 } 2432 } 2433 2434 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 2435 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 2436 unsigned LSB = 0; 2437 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 2438 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 2439 return false; 2440 2441 if (LSB + Width > 32) 2442 return false; 2443 2444 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2445 SDValue Ops[] = { N->getOperand(0).getOperand(0), 2446 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 2447 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 2448 getAL(CurDAG, dl), Reg0 }; 2449 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2450 return true; 2451 } 2452 2453 return false; 2454} 2455 2456/// Target-specific DAG combining for ISD::XOR. 2457/// Target-independent combining lowers SELECT_CC nodes of the form 2458/// select_cc setg[ge] X, 0, X, -X 2459/// select_cc setgt X, -1, X, -X 2460/// select_cc setl[te] X, 0, -X, X 2461/// select_cc setlt X, 1, -X, X 2462/// which represent Integer ABS into: 2463/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 2464/// ARM instruction selection detects the latter and matches it to 2465/// ARM::ABS or ARM::t2ABS machine node. 2466bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 2467 SDValue XORSrc0 = N->getOperand(0); 2468 SDValue XORSrc1 = N->getOperand(1); 2469 EVT VT = N->getValueType(0); 2470 2471 if (Subtarget->isThumb1Only()) 2472 return false; 2473 2474 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 2475 return false; 2476 2477 SDValue ADDSrc0 = XORSrc0.getOperand(0); 2478 SDValue ADDSrc1 = XORSrc0.getOperand(1); 2479 SDValue SRASrc0 = XORSrc1.getOperand(0); 2480 SDValue SRASrc1 = XORSrc1.getOperand(1); 2481 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 2482 EVT XType = SRASrc0.getValueType(); 2483 unsigned Size = XType.getSizeInBits() - 1; 2484 2485 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 2486 XType.isInteger() && SRAConstant != nullptr && 2487 Size == SRAConstant->getZExtValue()) { 2488 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 2489 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 2490 return true; 2491 } 2492 2493 return false; 2494} 2495 2496static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1, 2497 bool Accumulate) { 2498 // For SM*WB, we need to some form of sext. 2499 // For SM*WT, we need to search for (sra X, 16) 2500 // Src1 then gets set to X. 2501 if ((SignExt.getOpcode() == ISD::SIGN_EXTEND || 2502 SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG || 2503 SignExt.getOpcode() == ISD::AssertSext) && 2504 SignExt.getValueType() == MVT::i32) { 2505 2506 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; 2507 Src1 = SignExt.getOperand(0); 2508 return true; 2509 } 2510 2511 if (SignExt.getOpcode() != ISD::SRA) 2512 return false; 2513 2514 ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1)); 2515 if (!SRASrc1 || SRASrc1->getZExtValue() != 16) 2516 return false; 2517 2518 SDValue Op0 = SignExt.getOperand(0); 2519 2520 // The sign extend operand for SM*WB could be generated by a shl and ashr. 2521 if (Op0.getOpcode() == ISD::SHL) { 2522 SDValue SHL = Op0; 2523 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); 2524 if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16) 2525 return false; 2526 2527 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; 2528 Src1 = Op0.getOperand(0); 2529 return true; 2530 } 2531 *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT; 2532 Src1 = SignExt.getOperand(0); 2533 return true; 2534} 2535 2536static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0, 2537 SDValue &Src1, bool Accumulate) { 2538 // First we look for: 2539 // (add (or (srl ?, 16), (shl ?, 16))) 2540 if (OR.getOpcode() != ISD::OR) 2541 return false; 2542 2543 SDValue SRL = OR.getOperand(0); 2544 SDValue SHL = OR.getOperand(1); 2545 2546 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { 2547 SRL = OR.getOperand(1); 2548 SHL = OR.getOperand(0); 2549 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) 2550 return false; 2551 } 2552 2553 ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1)); 2554 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); 2555 if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 || 2556 SHLSrc1->getZExtValue() != 16) 2557 return false; 2558 2559 // The first operands to the shifts need to be the two results from the 2560 // same smul_lohi node. 2561 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || 2562 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) 2563 return false; 2564 2565 SDNode *SMULLOHI = SRL.getOperand(0).getNode(); 2566 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || 2567 SHL.getOperand(0) != SDValue(SMULLOHI, 1)) 2568 return false; 2569 2570 // Now we have: 2571 // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) 2572 // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. 2573 // For SMLAWB the 16-bit value will signed extended somehow. 2574 // For SMLAWT only the SRA is required. 2575 2576 // Check both sides of SMUL_LOHI 2577 if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) { 2578 Src0 = SMULLOHI->getOperand(1); 2579 } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1, 2580 Accumulate)) { 2581 Src0 = SMULLOHI->getOperand(0); 2582 } else { 2583 return false; 2584 } 2585 return true; 2586} 2587 2588bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) { 2589 SDLoc dl(N); 2590 SDValue Src0 = N->getOperand(0); 2591 SDValue Src1 = N->getOperand(1); 2592 SDValue A, B; 2593 unsigned Opc = 0; 2594 2595 if (N->getOpcode() == ISD::ADD) { 2596 if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR) 2597 return false; 2598 2599 SDValue Acc; 2600 if (SearchSignedMulLong(Src0, &Opc, A, B, true)) { 2601 Acc = Src1; 2602 } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) { 2603 Acc = Src0; 2604 } else { 2605 return false; 2606 } 2607 if (Opc == 0) 2608 return false; 2609 2610 SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl), 2611 CurDAG->getRegister(0, MVT::i32) }; 2612 CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops); 2613 return true; 2614 } else if (N->getOpcode() == ISD::OR && 2615 SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) { 2616 if (Opc == 0) 2617 return false; 2618 2619 SDValue Ops[] = { A, B, getAL(CurDAG, dl), 2620 CurDAG->getRegister(0, MVT::i32)}; 2621 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2622 return true; 2623 } 2624 return false; 2625} 2626 2627/// We've got special pseudo-instructions for these 2628void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 2629 unsigned Opcode; 2630 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 2631 if (MemTy == MVT::i8) 2632 Opcode = ARM::CMP_SWAP_8; 2633 else if (MemTy == MVT::i16) 2634 Opcode = ARM::CMP_SWAP_16; 2635 else if (MemTy == MVT::i32) 2636 Opcode = ARM::CMP_SWAP_32; 2637 else 2638 llvm_unreachable("Unknown AtomicCmpSwap type"); 2639 2640 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 2641 N->getOperand(0)}; 2642 SDNode *CmpSwap = CurDAG->getMachineNode( 2643 Opcode, SDLoc(N), 2644 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 2645 2646 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2647 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 2648 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); 2649 2650 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 2651 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 2652 CurDAG->RemoveDeadNode(N); 2653} 2654 2655void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { 2656 // The only time a CONCAT_VECTORS operation can have legal types is when 2657 // two 64-bit vectors are concatenated to a 128-bit vector. 2658 EVT VT = N->getValueType(0); 2659 if (!VT.is128BitVector() || N->getNumOperands() != 2) 2660 llvm_unreachable("unexpected CONCAT_VECTORS"); 2661 ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); 2662} 2663 2664void ARMDAGToDAGISel::Select(SDNode *N) { 2665 SDLoc dl(N); 2666 2667 if (N->isMachineOpcode()) { 2668 N->setNodeId(-1); 2669 return; // Already selected. 2670 } 2671 2672 switch (N->getOpcode()) { 2673 default: break; 2674 case ISD::ADD: 2675 case ISD::OR: 2676 if (trySMLAWSMULW(N)) 2677 return; 2678 break; 2679 case ISD::WRITE_REGISTER: 2680 if (tryWriteRegister(N)) 2681 return; 2682 break; 2683 case ISD::READ_REGISTER: 2684 if (tryReadRegister(N)) 2685 return; 2686 break; 2687 case ISD::INLINEASM: 2688 if (tryInlineAsm(N)) 2689 return; 2690 break; 2691 case ISD::XOR: 2692 // Select special operations if XOR node forms integer ABS pattern 2693 if (tryABSOp(N)) 2694 return; 2695 // Other cases are autogenerated. 2696 break; 2697 case ISD::Constant: { 2698 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 2699 // If we can't materialize the constant we need to use a literal pool 2700 if (ConstantMaterializationCost(Val) > 2) { 2701 SDValue CPIdx = CurDAG->getTargetConstantPool( 2702 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 2703 TLI->getPointerTy(CurDAG->getDataLayout())); 2704 2705 SDNode *ResNode; 2706 if (Subtarget->isThumb()) { 2707 SDValue Pred = getAL(CurDAG, dl); 2708 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2709 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; 2710 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 2711 Ops); 2712 } else { 2713 SDValue Ops[] = { 2714 CPIdx, 2715 CurDAG->getTargetConstant(0, dl, MVT::i32), 2716 getAL(CurDAG, dl), 2717 CurDAG->getRegister(0, MVT::i32), 2718 CurDAG->getEntryNode() 2719 }; 2720 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 2721 Ops); 2722 } 2723 ReplaceNode(N, ResNode); 2724 return; 2725 } 2726 2727 // Other cases are autogenerated. 2728 break; 2729 } 2730 case ISD::FrameIndex: { 2731 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 2732 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 2733 SDValue TFI = CurDAG->getTargetFrameIndex( 2734 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 2735 if (Subtarget->isThumb1Only()) { 2736 // Set the alignment of the frame object to 4, to avoid having to generate 2737 // more than one ADD 2738 MachineFrameInfo *MFI = MF->getFrameInfo(); 2739 if (MFI->getObjectAlignment(FI) < 4) 2740 MFI->setObjectAlignment(FI, 4); 2741 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 2742 CurDAG->getTargetConstant(0, dl, MVT::i32)); 2743 return; 2744 } else { 2745 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 2746 ARM::t2ADDri : ARM::ADDri); 2747 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 2748 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2749 CurDAG->getRegister(0, MVT::i32) }; 2750 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 2751 return; 2752 } 2753 } 2754 case ISD::SRL: 2755 if (tryV6T2BitfieldExtractOp(N, false)) 2756 return; 2757 break; 2758 case ISD::SIGN_EXTEND_INREG: 2759 case ISD::SRA: 2760 if (tryV6T2BitfieldExtractOp(N, true)) 2761 return; 2762 break; 2763 case ISD::MUL: 2764 if (Subtarget->isThumb1Only()) 2765 break; 2766 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 2767 unsigned RHSV = C->getZExtValue(); 2768 if (!RHSV) break; 2769 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 2770 unsigned ShImm = Log2_32(RHSV-1); 2771 if (ShImm >= 32) 2772 break; 2773 SDValue V = N->getOperand(0); 2774 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2775 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2776 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2777 if (Subtarget->isThumb()) { 2778 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2779 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 2780 return; 2781 } else { 2782 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2783 Reg0 }; 2784 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 2785 return; 2786 } 2787 } 2788 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 2789 unsigned ShImm = Log2_32(RHSV+1); 2790 if (ShImm >= 32) 2791 break; 2792 SDValue V = N->getOperand(0); 2793 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 2794 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 2795 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2796 if (Subtarget->isThumb()) { 2797 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 2798 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 2799 return; 2800 } else { 2801 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 2802 Reg0 }; 2803 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 2804 return; 2805 } 2806 } 2807 } 2808 break; 2809 case ISD::AND: { 2810 // Check for unsigned bitfield extract 2811 if (tryV6T2BitfieldExtractOp(N, false)) 2812 return; 2813 2814 // If an immediate is used in an AND node, it is possible that the immediate 2815 // can be more optimally materialized when negated. If this is the case we 2816 // can negate the immediate and use a BIC instead. 2817 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2818 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 2819 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 2820 2821 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 2822 // immediate can be negated and fit in the immediate operand of 2823 // a t2BIC, don't do any manual transform here as this can be 2824 // handled by the generic ISel machinery. 2825 bool PreferImmediateEncoding = 2826 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 2827 if (!PreferImmediateEncoding && 2828 ConstantMaterializationCost(Imm) > 2829 ConstantMaterializationCost(~Imm)) { 2830 // The current immediate costs more to materialize than a negated 2831 // immediate, so negate the immediate and use a BIC. 2832 SDValue NewImm = 2833 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 2834 // If the new constant didn't exist before, reposition it in the topological 2835 // ordering so it is just before N. Otherwise, don't touch its location. 2836 if (NewImm->getNodeId() == -1) 2837 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 2838 2839 if (!Subtarget->hasThumb2()) { 2840 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 2841 N->getOperand(0), NewImm, getAL(CurDAG, dl), 2842 CurDAG->getRegister(0, MVT::i32)}; 2843 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 2844 return; 2845 } else { 2846 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 2847 CurDAG->getRegister(0, MVT::i32), 2848 CurDAG->getRegister(0, MVT::i32)}; 2849 ReplaceNode(N, 2850 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 2851 return; 2852 } 2853 } 2854 } 2855 2856 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 2857 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 2858 // are entirely contributed by c2 and lower 16-bits are entirely contributed 2859 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 2860 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 2861 EVT VT = N->getValueType(0); 2862 if (VT != MVT::i32) 2863 break; 2864 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 2865 ? ARM::t2MOVTi16 2866 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 2867 if (!Opc) 2868 break; 2869 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2870 N1C = dyn_cast<ConstantSDNode>(N1); 2871 if (!N1C) 2872 break; 2873 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 2874 SDValue N2 = N0.getOperand(1); 2875 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 2876 if (!N2C) 2877 break; 2878 unsigned N1CVal = N1C->getZExtValue(); 2879 unsigned N2CVal = N2C->getZExtValue(); 2880 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 2881 (N1CVal & 0xffffU) == 0xffffU && 2882 (N2CVal & 0xffffU) == 0x0U) { 2883 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 2884 dl, MVT::i32); 2885 SDValue Ops[] = { N0.getOperand(0), Imm16, 2886 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2887 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 2888 return; 2889 } 2890 } 2891 break; 2892 } 2893 case ARMISD::VMOVRRD: 2894 ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, 2895 N->getOperand(0), getAL(CurDAG, dl), 2896 CurDAG->getRegister(0, MVT::i32))); 2897 return; 2898 case ISD::UMUL_LOHI: { 2899 if (Subtarget->isThumb1Only()) 2900 break; 2901 if (Subtarget->isThumb()) { 2902 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2903 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2904 ReplaceNode( 2905 N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops)); 2906 return; 2907 } else { 2908 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2909 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2910 CurDAG->getRegister(0, MVT::i32) }; 2911 ReplaceNode(N, CurDAG->getMachineNode( 2912 Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl, 2913 MVT::i32, MVT::i32, Ops)); 2914 return; 2915 } 2916 } 2917 case ISD::SMUL_LOHI: { 2918 if (Subtarget->isThumb1Only()) 2919 break; 2920 if (Subtarget->isThumb()) { 2921 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2922 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 2923 ReplaceNode( 2924 N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops)); 2925 return; 2926 } else { 2927 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2928 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 2929 CurDAG->getRegister(0, MVT::i32) }; 2930 ReplaceNode(N, CurDAG->getMachineNode( 2931 Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl, 2932 MVT::i32, MVT::i32, Ops)); 2933 return; 2934 } 2935 } 2936 case ARMISD::UMAAL: { 2937 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 2938 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2939 N->getOperand(2), N->getOperand(3), 2940 getAL(CurDAG, dl), 2941 CurDAG->getRegister(0, MVT::i32) }; 2942 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 2943 return; 2944 } 2945 case ARMISD::UMLAL:{ 2946 // UMAAL is similar to UMLAL but it adds two 32-bit values to the 2947 // 64-bit multiplication result. 2948 if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC && 2949 N->getOperand(3).getOpcode() == ARMISD::ADDE) { 2950 2951 SDValue Addc = N->getOperand(2); 2952 SDValue Adde = N->getOperand(3); 2953 2954 if (Adde.getOperand(2).getNode() == Addc.getNode()) { 2955 2956 ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0)); 2957 ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1)); 2958 2959 if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) 2960 { 2961 // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm 2962 // RdLo = one operand to be added, lower 32-bits of res 2963 // RdHi = other operand to be added, upper 32-bits of res 2964 // Rn = first multiply operand 2965 // Rm = second multiply operand 2966 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 2967 Addc.getOperand(0), Addc.getOperand(1), 2968 getAL(CurDAG, dl), 2969 CurDAG->getRegister(0, MVT::i32) }; 2970 unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 2971 CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); 2972 return; 2973 } 2974 } 2975 } 2976 2977 if (Subtarget->isThumb()) { 2978 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2979 N->getOperand(3), getAL(CurDAG, dl), 2980 CurDAG->getRegister(0, MVT::i32)}; 2981 ReplaceNode( 2982 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 2983 return; 2984 }else{ 2985 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2986 N->getOperand(3), getAL(CurDAG, dl), 2987 CurDAG->getRegister(0, MVT::i32), 2988 CurDAG->getRegister(0, MVT::i32) }; 2989 ReplaceNode(N, CurDAG->getMachineNode( 2990 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 2991 MVT::i32, MVT::i32, Ops)); 2992 return; 2993 } 2994 } 2995 case ARMISD::SMLAL:{ 2996 if (Subtarget->isThumb()) { 2997 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 2998 N->getOperand(3), getAL(CurDAG, dl), 2999 CurDAG->getRegister(0, MVT::i32)}; 3000 ReplaceNode( 3001 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3002 return; 3003 }else{ 3004 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3005 N->getOperand(3), getAL(CurDAG, dl), 3006 CurDAG->getRegister(0, MVT::i32), 3007 CurDAG->getRegister(0, MVT::i32) }; 3008 ReplaceNode(N, CurDAG->getMachineNode( 3009 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3010 MVT::i32, MVT::i32, Ops)); 3011 return; 3012 } 3013 } 3014 case ISD::LOAD: { 3015 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3016 if (tryT2IndexedLoad(N)) 3017 return; 3018 } else if (tryARMIndexedLoad(N)) 3019 return; 3020 // Other cases are autogenerated. 3021 break; 3022 } 3023 case ARMISD::BRCOND: { 3024 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3025 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3026 // Pattern complexity = 6 cost = 1 size = 0 3027 3028 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3029 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3030 // Pattern complexity = 6 cost = 1 size = 0 3031 3032 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3033 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3034 // Pattern complexity = 6 cost = 1 size = 0 3035 3036 unsigned Opc = Subtarget->isThumb() ? 3037 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3038 SDValue Chain = N->getOperand(0); 3039 SDValue N1 = N->getOperand(1); 3040 SDValue N2 = N->getOperand(2); 3041 SDValue N3 = N->getOperand(3); 3042 SDValue InFlag = N->getOperand(4); 3043 assert(N1.getOpcode() == ISD::BasicBlock); 3044 assert(N2.getOpcode() == ISD::Constant); 3045 assert(N3.getOpcode() == ISD::Register); 3046 3047 SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) 3048 cast<ConstantSDNode>(N2)->getZExtValue()), dl, 3049 MVT::i32); 3050 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3051 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3052 MVT::Glue, Ops); 3053 Chain = SDValue(ResNode, 0); 3054 if (N->getNumValues() == 2) { 3055 InFlag = SDValue(ResNode, 1); 3056 ReplaceUses(SDValue(N, 1), InFlag); 3057 } 3058 ReplaceUses(SDValue(N, 0), 3059 SDValue(Chain.getNode(), Chain.getResNo())); 3060 CurDAG->RemoveDeadNode(N); 3061 return; 3062 } 3063 case ARMISD::VZIP: { 3064 unsigned Opc = 0; 3065 EVT VT = N->getValueType(0); 3066 switch (VT.getSimpleVT().SimpleTy) { 3067 default: return; 3068 case MVT::v8i8: Opc = ARM::VZIPd8; break; 3069 case MVT::v4i16: Opc = ARM::VZIPd16; break; 3070 case MVT::v2f32: 3071 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3072 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3073 case MVT::v16i8: Opc = ARM::VZIPq8; break; 3074 case MVT::v8i16: Opc = ARM::VZIPq16; break; 3075 case MVT::v4f32: 3076 case MVT::v4i32: Opc = ARM::VZIPq32; break; 3077 } 3078 SDValue Pred = getAL(CurDAG, dl); 3079 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3080 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3081 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3082 return; 3083 } 3084 case ARMISD::VUZP: { 3085 unsigned Opc = 0; 3086 EVT VT = N->getValueType(0); 3087 switch (VT.getSimpleVT().SimpleTy) { 3088 default: return; 3089 case MVT::v8i8: Opc = ARM::VUZPd8; break; 3090 case MVT::v4i16: Opc = ARM::VUZPd16; break; 3091 case MVT::v2f32: 3092 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 3093 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3094 case MVT::v16i8: Opc = ARM::VUZPq8; break; 3095 case MVT::v8i16: Opc = ARM::VUZPq16; break; 3096 case MVT::v4f32: 3097 case MVT::v4i32: Opc = ARM::VUZPq32; break; 3098 } 3099 SDValue Pred = getAL(CurDAG, dl); 3100 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3101 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3102 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3103 return; 3104 } 3105 case ARMISD::VTRN: { 3106 unsigned Opc = 0; 3107 EVT VT = N->getValueType(0); 3108 switch (VT.getSimpleVT().SimpleTy) { 3109 default: return; 3110 case MVT::v8i8: Opc = ARM::VTRNd8; break; 3111 case MVT::v4i16: Opc = ARM::VTRNd16; break; 3112 case MVT::v2f32: 3113 case MVT::v2i32: Opc = ARM::VTRNd32; break; 3114 case MVT::v16i8: Opc = ARM::VTRNq8; break; 3115 case MVT::v8i16: Opc = ARM::VTRNq16; break; 3116 case MVT::v4f32: 3117 case MVT::v4i32: Opc = ARM::VTRNq32; break; 3118 } 3119 SDValue Pred = getAL(CurDAG, dl); 3120 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 3121 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 3122 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 3123 return; 3124 } 3125 case ARMISD::BUILD_VECTOR: { 3126 EVT VecVT = N->getValueType(0); 3127 EVT EltVT = VecVT.getVectorElementType(); 3128 unsigned NumElts = VecVT.getVectorNumElements(); 3129 if (EltVT == MVT::f64) { 3130 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 3131 ReplaceNode( 3132 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3133 return; 3134 } 3135 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 3136 if (NumElts == 2) { 3137 ReplaceNode( 3138 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 3139 return; 3140 } 3141 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 3142 ReplaceNode(N, 3143 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 3144 N->getOperand(2), N->getOperand(3))); 3145 return; 3146 } 3147 3148 case ARMISD::VLD2DUP: { 3149 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 3150 ARM::VLD2DUPd32 }; 3151 SelectVLDDup(N, false, 2, Opcodes); 3152 return; 3153 } 3154 3155 case ARMISD::VLD3DUP: { 3156 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 3157 ARM::VLD3DUPd16Pseudo, 3158 ARM::VLD3DUPd32Pseudo }; 3159 SelectVLDDup(N, false, 3, Opcodes); 3160 return; 3161 } 3162 3163 case ARMISD::VLD4DUP: { 3164 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 3165 ARM::VLD4DUPd16Pseudo, 3166 ARM::VLD4DUPd32Pseudo }; 3167 SelectVLDDup(N, false, 4, Opcodes); 3168 return; 3169 } 3170 3171 case ARMISD::VLD2DUP_UPD: { 3172 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 3173 ARM::VLD2DUPd16wb_fixed, 3174 ARM::VLD2DUPd32wb_fixed }; 3175 SelectVLDDup(N, true, 2, Opcodes); 3176 return; 3177 } 3178 3179 case ARMISD::VLD3DUP_UPD: { 3180 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 3181 ARM::VLD3DUPd16Pseudo_UPD, 3182 ARM::VLD3DUPd32Pseudo_UPD }; 3183 SelectVLDDup(N, true, 3, Opcodes); 3184 return; 3185 } 3186 3187 case ARMISD::VLD4DUP_UPD: { 3188 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 3189 ARM::VLD4DUPd16Pseudo_UPD, 3190 ARM::VLD4DUPd32Pseudo_UPD }; 3191 SelectVLDDup(N, true, 4, Opcodes); 3192 return; 3193 } 3194 3195 case ARMISD::VLD1_UPD: { 3196 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 3197 ARM::VLD1d16wb_fixed, 3198 ARM::VLD1d32wb_fixed, 3199 ARM::VLD1d64wb_fixed }; 3200 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 3201 ARM::VLD1q16wb_fixed, 3202 ARM::VLD1q32wb_fixed, 3203 ARM::VLD1q64wb_fixed }; 3204 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 3205 return; 3206 } 3207 3208 case ARMISD::VLD2_UPD: { 3209 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, 3210 ARM::VLD2d16wb_fixed, 3211 ARM::VLD2d32wb_fixed, 3212 ARM::VLD1q64wb_fixed}; 3213 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, 3214 ARM::VLD2q16PseudoWB_fixed, 3215 ARM::VLD2q32PseudoWB_fixed }; 3216 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 3217 return; 3218 } 3219 3220 case ARMISD::VLD3_UPD: { 3221 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 3222 ARM::VLD3d16Pseudo_UPD, 3223 ARM::VLD3d32Pseudo_UPD, 3224 ARM::VLD1d64TPseudoWB_fixed}; 3225 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3226 ARM::VLD3q16Pseudo_UPD, 3227 ARM::VLD3q32Pseudo_UPD }; 3228 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 3229 ARM::VLD3q16oddPseudo_UPD, 3230 ARM::VLD3q32oddPseudo_UPD }; 3231 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3232 return; 3233 } 3234 3235 case ARMISD::VLD4_UPD: { 3236 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, 3237 ARM::VLD4d16Pseudo_UPD, 3238 ARM::VLD4d32Pseudo_UPD, 3239 ARM::VLD1d64QPseudoWB_fixed}; 3240 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3241 ARM::VLD4q16Pseudo_UPD, 3242 ARM::VLD4q32Pseudo_UPD }; 3243 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, 3244 ARM::VLD4q16oddPseudo_UPD, 3245 ARM::VLD4q32oddPseudo_UPD }; 3246 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3247 return; 3248 } 3249 3250 case ARMISD::VLD2LN_UPD: { 3251 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 3252 ARM::VLD2LNd16Pseudo_UPD, 3253 ARM::VLD2LNd32Pseudo_UPD }; 3254 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 3255 ARM::VLD2LNq32Pseudo_UPD }; 3256 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 3257 return; 3258 } 3259 3260 case ARMISD::VLD3LN_UPD: { 3261 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 3262 ARM::VLD3LNd16Pseudo_UPD, 3263 ARM::VLD3LNd32Pseudo_UPD }; 3264 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 3265 ARM::VLD3LNq32Pseudo_UPD }; 3266 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 3267 return; 3268 } 3269 3270 case ARMISD::VLD4LN_UPD: { 3271 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 3272 ARM::VLD4LNd16Pseudo_UPD, 3273 ARM::VLD4LNd32Pseudo_UPD }; 3274 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 3275 ARM::VLD4LNq32Pseudo_UPD }; 3276 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 3277 return; 3278 } 3279 3280 case ARMISD::VST1_UPD: { 3281 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 3282 ARM::VST1d16wb_fixed, 3283 ARM::VST1d32wb_fixed, 3284 ARM::VST1d64wb_fixed }; 3285 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 3286 ARM::VST1q16wb_fixed, 3287 ARM::VST1q32wb_fixed, 3288 ARM::VST1q64wb_fixed }; 3289 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 3290 return; 3291 } 3292 3293 case ARMISD::VST2_UPD: { 3294 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, 3295 ARM::VST2d16wb_fixed, 3296 ARM::VST2d32wb_fixed, 3297 ARM::VST1q64wb_fixed}; 3298 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, 3299 ARM::VST2q16PseudoWB_fixed, 3300 ARM::VST2q32PseudoWB_fixed }; 3301 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 3302 return; 3303 } 3304 3305 case ARMISD::VST3_UPD: { 3306 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 3307 ARM::VST3d16Pseudo_UPD, 3308 ARM::VST3d32Pseudo_UPD, 3309 ARM::VST1d64TPseudoWB_fixed}; 3310 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3311 ARM::VST3q16Pseudo_UPD, 3312 ARM::VST3q32Pseudo_UPD }; 3313 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 3314 ARM::VST3q16oddPseudo_UPD, 3315 ARM::VST3q32oddPseudo_UPD }; 3316 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 3317 return; 3318 } 3319 3320 case ARMISD::VST4_UPD: { 3321 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, 3322 ARM::VST4d16Pseudo_UPD, 3323 ARM::VST4d32Pseudo_UPD, 3324 ARM::VST1d64QPseudoWB_fixed}; 3325 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3326 ARM::VST4q16Pseudo_UPD, 3327 ARM::VST4q32Pseudo_UPD }; 3328 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, 3329 ARM::VST4q16oddPseudo_UPD, 3330 ARM::VST4q32oddPseudo_UPD }; 3331 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 3332 return; 3333 } 3334 3335 case ARMISD::VST2LN_UPD: { 3336 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 3337 ARM::VST2LNd16Pseudo_UPD, 3338 ARM::VST2LNd32Pseudo_UPD }; 3339 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 3340 ARM::VST2LNq32Pseudo_UPD }; 3341 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 3342 return; 3343 } 3344 3345 case ARMISD::VST3LN_UPD: { 3346 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 3347 ARM::VST3LNd16Pseudo_UPD, 3348 ARM::VST3LNd32Pseudo_UPD }; 3349 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 3350 ARM::VST3LNq32Pseudo_UPD }; 3351 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 3352 return; 3353 } 3354 3355 case ARMISD::VST4LN_UPD: { 3356 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 3357 ARM::VST4LNd16Pseudo_UPD, 3358 ARM::VST4LNd32Pseudo_UPD }; 3359 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 3360 ARM::VST4LNq32Pseudo_UPD }; 3361 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 3362 return; 3363 } 3364 3365 case ISD::INTRINSIC_VOID: 3366 case ISD::INTRINSIC_W_CHAIN: { 3367 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3368 switch (IntNo) { 3369 default: 3370 break; 3371 3372 case Intrinsic::arm_mrrc: 3373 case Intrinsic::arm_mrrc2: { 3374 SDLoc dl(N); 3375 SDValue Chain = N->getOperand(0); 3376 unsigned Opc; 3377 3378 if (Subtarget->isThumb()) 3379 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 3380 else 3381 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 3382 3383 SmallVector<SDValue, 5> Ops; 3384 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 3385 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 3386 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 3387 3388 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 3389 // instruction will always be '1111' but it is possible in assembly language to specify 3390 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 3391 if (Opc != ARM::MRRC2) { 3392 Ops.push_back(getAL(CurDAG, dl)); 3393 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3394 } 3395 3396 Ops.push_back(Chain); 3397 3398 // Writes to two registers. 3399 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 3400 3401 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 3402 return; 3403 } 3404 case Intrinsic::arm_ldaexd: 3405 case Intrinsic::arm_ldrexd: { 3406 SDLoc dl(N); 3407 SDValue Chain = N->getOperand(0); 3408 SDValue MemAddr = N->getOperand(2); 3409 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 3410 3411 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 3412 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 3413 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 3414 3415 // arm_ldrexd returns a i64 value in {i32, i32} 3416 std::vector<EVT> ResTys; 3417 if (isThumb) { 3418 ResTys.push_back(MVT::i32); 3419 ResTys.push_back(MVT::i32); 3420 } else 3421 ResTys.push_back(MVT::Untyped); 3422 ResTys.push_back(MVT::Other); 3423 3424 // Place arguments in the right order. 3425 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 3426 CurDAG->getRegister(0, MVT::i32), Chain}; 3427 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3428 // Transfer memoperands. 3429 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3430 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3431 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); 3432 3433 // Remap uses. 3434 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 3435 if (!SDValue(N, 0).use_empty()) { 3436 SDValue Result; 3437 if (isThumb) 3438 Result = SDValue(Ld, 0); 3439 else { 3440 SDValue SubRegIdx = 3441 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 3442 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3443 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3444 Result = SDValue(ResNode,0); 3445 } 3446 ReplaceUses(SDValue(N, 0), Result); 3447 } 3448 if (!SDValue(N, 1).use_empty()) { 3449 SDValue Result; 3450 if (isThumb) 3451 Result = SDValue(Ld, 1); 3452 else { 3453 SDValue SubRegIdx = 3454 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 3455 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 3456 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 3457 Result = SDValue(ResNode,0); 3458 } 3459 ReplaceUses(SDValue(N, 1), Result); 3460 } 3461 ReplaceUses(SDValue(N, 2), OutChain); 3462 CurDAG->RemoveDeadNode(N); 3463 return; 3464 } 3465 case Intrinsic::arm_stlexd: 3466 case Intrinsic::arm_strexd: { 3467 SDLoc dl(N); 3468 SDValue Chain = N->getOperand(0); 3469 SDValue Val0 = N->getOperand(2); 3470 SDValue Val1 = N->getOperand(3); 3471 SDValue MemAddr = N->getOperand(4); 3472 3473 // Store exclusive double return a i32 value which is the return status 3474 // of the issued store. 3475 const EVT ResTys[] = {MVT::i32, MVT::Other}; 3476 3477 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 3478 // Place arguments in the right order. 3479 SmallVector<SDValue, 7> Ops; 3480 if (isThumb) { 3481 Ops.push_back(Val0); 3482 Ops.push_back(Val1); 3483 } else 3484 // arm_strexd uses GPRPair. 3485 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 3486 Ops.push_back(MemAddr); 3487 Ops.push_back(getAL(CurDAG, dl)); 3488 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3489 Ops.push_back(Chain); 3490 3491 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 3492 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 3493 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 3494 3495 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 3496 // Transfer memoperands. 3497 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3498 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3499 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); 3500 3501 ReplaceNode(N, St); 3502 return; 3503 } 3504 3505 case Intrinsic::arm_neon_vld1: { 3506 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 3507 ARM::VLD1d32, ARM::VLD1d64 }; 3508 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 3509 ARM::VLD1q32, ARM::VLD1q64}; 3510 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 3511 return; 3512 } 3513 3514 case Intrinsic::arm_neon_vld2: { 3515 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 3516 ARM::VLD2d32, ARM::VLD1q64 }; 3517 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 3518 ARM::VLD2q32Pseudo }; 3519 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 3520 return; 3521 } 3522 3523 case Intrinsic::arm_neon_vld3: { 3524 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 3525 ARM::VLD3d16Pseudo, 3526 ARM::VLD3d32Pseudo, 3527 ARM::VLD1d64TPseudo }; 3528 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 3529 ARM::VLD3q16Pseudo_UPD, 3530 ARM::VLD3q32Pseudo_UPD }; 3531 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 3532 ARM::VLD3q16oddPseudo, 3533 ARM::VLD3q32oddPseudo }; 3534 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3535 return; 3536 } 3537 3538 case Intrinsic::arm_neon_vld4: { 3539 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 3540 ARM::VLD4d16Pseudo, 3541 ARM::VLD4d32Pseudo, 3542 ARM::VLD1d64QPseudo }; 3543 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 3544 ARM::VLD4q16Pseudo_UPD, 3545 ARM::VLD4q32Pseudo_UPD }; 3546 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 3547 ARM::VLD4q16oddPseudo, 3548 ARM::VLD4q32oddPseudo }; 3549 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3550 return; 3551 } 3552 3553 case Intrinsic::arm_neon_vld2lane: { 3554 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 3555 ARM::VLD2LNd16Pseudo, 3556 ARM::VLD2LNd32Pseudo }; 3557 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 3558 ARM::VLD2LNq32Pseudo }; 3559 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 3560 return; 3561 } 3562 3563 case Intrinsic::arm_neon_vld3lane: { 3564 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 3565 ARM::VLD3LNd16Pseudo, 3566 ARM::VLD3LNd32Pseudo }; 3567 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 3568 ARM::VLD3LNq32Pseudo }; 3569 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 3570 return; 3571 } 3572 3573 case Intrinsic::arm_neon_vld4lane: { 3574 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 3575 ARM::VLD4LNd16Pseudo, 3576 ARM::VLD4LNd32Pseudo }; 3577 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 3578 ARM::VLD4LNq32Pseudo }; 3579 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 3580 return; 3581 } 3582 3583 case Intrinsic::arm_neon_vst1: { 3584 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 3585 ARM::VST1d32, ARM::VST1d64 }; 3586 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 3587 ARM::VST1q32, ARM::VST1q64 }; 3588 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 3589 return; 3590 } 3591 3592 case Intrinsic::arm_neon_vst2: { 3593 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 3594 ARM::VST2d32, ARM::VST1q64 }; 3595 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 3596 ARM::VST2q32Pseudo }; 3597 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 3598 return; 3599 } 3600 3601 case Intrinsic::arm_neon_vst3: { 3602 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 3603 ARM::VST3d16Pseudo, 3604 ARM::VST3d32Pseudo, 3605 ARM::VST1d64TPseudo }; 3606 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 3607 ARM::VST3q16Pseudo_UPD, 3608 ARM::VST3q32Pseudo_UPD }; 3609 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 3610 ARM::VST3q16oddPseudo, 3611 ARM::VST3q32oddPseudo }; 3612 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 3613 return; 3614 } 3615 3616 case Intrinsic::arm_neon_vst4: { 3617 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 3618 ARM::VST4d16Pseudo, 3619 ARM::VST4d32Pseudo, 3620 ARM::VST1d64QPseudo }; 3621 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 3622 ARM::VST4q16Pseudo_UPD, 3623 ARM::VST4q32Pseudo_UPD }; 3624 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 3625 ARM::VST4q16oddPseudo, 3626 ARM::VST4q32oddPseudo }; 3627 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 3628 return; 3629 } 3630 3631 case Intrinsic::arm_neon_vst2lane: { 3632 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 3633 ARM::VST2LNd16Pseudo, 3634 ARM::VST2LNd32Pseudo }; 3635 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 3636 ARM::VST2LNq32Pseudo }; 3637 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 3638 return; 3639 } 3640 3641 case Intrinsic::arm_neon_vst3lane: { 3642 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 3643 ARM::VST3LNd16Pseudo, 3644 ARM::VST3LNd32Pseudo }; 3645 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 3646 ARM::VST3LNq32Pseudo }; 3647 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 3648 return; 3649 } 3650 3651 case Intrinsic::arm_neon_vst4lane: { 3652 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 3653 ARM::VST4LNd16Pseudo, 3654 ARM::VST4LNd32Pseudo }; 3655 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 3656 ARM::VST4LNq32Pseudo }; 3657 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 3658 return; 3659 } 3660 } 3661 break; 3662 } 3663 3664 case ISD::INTRINSIC_WO_CHAIN: { 3665 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 3666 switch (IntNo) { 3667 default: 3668 break; 3669 3670 case Intrinsic::arm_neon_vtbl2: 3671 SelectVTBL(N, false, 2, ARM::VTBL2); 3672 return; 3673 case Intrinsic::arm_neon_vtbl3: 3674 SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); 3675 return; 3676 case Intrinsic::arm_neon_vtbl4: 3677 SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); 3678 return; 3679 3680 case Intrinsic::arm_neon_vtbx2: 3681 SelectVTBL(N, true, 2, ARM::VTBX2); 3682 return; 3683 case Intrinsic::arm_neon_vtbx3: 3684 SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); 3685 return; 3686 case Intrinsic::arm_neon_vtbx4: 3687 SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); 3688 return; 3689 } 3690 break; 3691 } 3692 3693 case ARMISD::VTBL1: { 3694 SDLoc dl(N); 3695 EVT VT = N->getValueType(0); 3696 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), 3697 getAL(CurDAG, dl), // Predicate 3698 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register 3699 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); 3700 return; 3701 } 3702 case ARMISD::VTBL2: { 3703 SDLoc dl(N); 3704 EVT VT = N->getValueType(0); 3705 3706 // Form a REG_SEQUENCE to force register allocation. 3707 SDValue V0 = N->getOperand(0); 3708 SDValue V1 = N->getOperand(1); 3709 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); 3710 3711 SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate 3712 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register 3713 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); 3714 return; 3715 } 3716 3717 case ISD::CONCAT_VECTORS: 3718 SelectConcatVector(N); 3719 return; 3720 3721 case ISD::ATOMIC_CMP_SWAP: 3722 SelectCMP_SWAP(N); 3723 return; 3724 } 3725 3726 SelectCode(N); 3727} 3728 3729// Inspect a register string of the form 3730// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 3731// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 3732// and obtain the integer operands from them, adding these operands to the 3733// provided vector. 3734static void getIntOperandsFromRegisterString(StringRef RegString, 3735 SelectionDAG *CurDAG, 3736 const SDLoc &DL, 3737 std::vector<SDValue> &Ops) { 3738 SmallVector<StringRef, 5> Fields; 3739 RegString.split(Fields, ':'); 3740 3741 if (Fields.size() > 1) { 3742 bool AllIntFields = true; 3743 3744 for (StringRef Field : Fields) { 3745 // Need to trim out leading 'cp' characters and get the integer field. 3746 unsigned IntField; 3747 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 3748 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 3749 } 3750 3751 assert(AllIntFields && 3752 "Unexpected non-integer value in special register string."); 3753 } 3754} 3755 3756// Maps a Banked Register string to its mask value. The mask value returned is 3757// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 3758// mask operand, which expresses which register is to be used, e.g. r8, and in 3759// which mode it is to be used, e.g. usr. Returns -1 to signify that the string 3760// was invalid. 3761static inline int getBankedRegisterMask(StringRef RegString) { 3762 return StringSwitch<int>(RegString.lower()) 3763 .Case("r8_usr", 0x00) 3764 .Case("r9_usr", 0x01) 3765 .Case("r10_usr", 0x02) 3766 .Case("r11_usr", 0x03) 3767 .Case("r12_usr", 0x04) 3768 .Case("sp_usr", 0x05) 3769 .Case("lr_usr", 0x06) 3770 .Case("r8_fiq", 0x08) 3771 .Case("r9_fiq", 0x09) 3772 .Case("r10_fiq", 0x0a) 3773 .Case("r11_fiq", 0x0b) 3774 .Case("r12_fiq", 0x0c) 3775 .Case("sp_fiq", 0x0d) 3776 .Case("lr_fiq", 0x0e) 3777 .Case("lr_irq", 0x10) 3778 .Case("sp_irq", 0x11) 3779 .Case("lr_svc", 0x12) 3780 .Case("sp_svc", 0x13) 3781 .Case("lr_abt", 0x14) 3782 .Case("sp_abt", 0x15) 3783 .Case("lr_und", 0x16) 3784 .Case("sp_und", 0x17) 3785 .Case("lr_mon", 0x1c) 3786 .Case("sp_mon", 0x1d) 3787 .Case("elr_hyp", 0x1e) 3788 .Case("sp_hyp", 0x1f) 3789 .Case("spsr_fiq", 0x2e) 3790 .Case("spsr_irq", 0x30) 3791 .Case("spsr_svc", 0x32) 3792 .Case("spsr_abt", 0x34) 3793 .Case("spsr_und", 0x36) 3794 .Case("spsr_mon", 0x3c) 3795 .Case("spsr_hyp", 0x3e) 3796 .Default(-1); 3797} 3798 3799// Maps a MClass special register string to its value for use in the 3800// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. 3801// Returns -1 to signify that the string was invalid. 3802static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { 3803 return StringSwitch<int>(RegString.lower()) 3804 .Case("apsr", 0x0) 3805 .Case("iapsr", 0x1) 3806 .Case("eapsr", 0x2) 3807 .Case("xpsr", 0x3) 3808 .Case("ipsr", 0x5) 3809 .Case("epsr", 0x6) 3810 .Case("iepsr", 0x7) 3811 .Case("msp", 0x8) 3812 .Case("psp", 0x9) 3813 .Case("primask", 0x10) 3814 .Case("basepri", 0x11) 3815 .Case("basepri_max", 0x12) 3816 .Case("faultmask", 0x13) 3817 .Case("control", 0x14) 3818 .Case("msplim", 0x0a) 3819 .Case("psplim", 0x0b) 3820 .Case("sp", 0x18) 3821 .Default(-1); 3822} 3823 3824// The flags here are common to those allowed for apsr in the A class cores and 3825// those allowed for the special registers in the M class cores. Returns a 3826// value representing which flags were present, -1 if invalid. 3827static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) { 3828 if (Flags.empty()) 3829 return 0x2 | (int)hasDSP; 3830 3831 return StringSwitch<int>(Flags) 3832 .Case("g", 0x1) 3833 .Case("nzcvq", 0x2) 3834 .Case("nzcvqg", 0x3) 3835 .Default(-1); 3836} 3837 3838static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, 3839 const ARMSubtarget *Subtarget) { 3840 // Ensure that the register (without flags) was a valid M Class special 3841 // register. 3842 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); 3843 if (SYSmvalue == -1) 3844 return -1; 3845 3846 // basepri, basepri_max and faultmask are only valid for V7m. 3847 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) 3848 return -1; 3849 3850 if (Subtarget->has8MSecExt() && Flags.lower() == "ns") { 3851 Flags = ""; 3852 SYSmvalue |= 0x80; 3853 } 3854 3855 if (!Subtarget->has8MSecExt() && 3856 (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14)) 3857 return -1; 3858 3859 if (!Subtarget->hasV8MMainlineOps() && 3860 (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 || 3861 SYSmvalue == 0x93)) 3862 return -1; 3863 3864 // If it was a read then we won't be expecting flags and so at this point 3865 // we can return the mask. 3866 if (IsRead) { 3867 if (Flags.empty()) 3868 return SYSmvalue; 3869 else 3870 return -1; 3871 } 3872 3873 // We know we are now handling a write so need to get the mask for the flags. 3874 int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP()); 3875 3876 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values 3877 // shouldn't have flags present. 3878 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) 3879 return -1; 3880 3881 // The _g and _nzcvqg versions are only valid if the DSP extension is 3882 // available. 3883 if (!Subtarget->hasDSP() && (Mask & 0x1)) 3884 return -1; 3885 3886 // The register was valid so need to put the mask in the correct place 3887 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to 3888 // construct the operand for the instruction node. 3889 if (SYSmvalue < 0x4) 3890 return SYSmvalue | Mask << 10; 3891 3892 return SYSmvalue; 3893} 3894 3895static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 3896 // The mask operand contains the special register (R Bit) in bit 4, whether 3897 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 3898 // bits 3-0 contains the fields to be accessed in the special register, set by 3899 // the flags provided with the register. 3900 int Mask = 0; 3901 if (Reg == "apsr") { 3902 // The flags permitted for apsr are the same flags that are allowed in 3903 // M class registers. We get the flag value and then shift the flags into 3904 // the correct place to combine with the mask. 3905 Mask = getMClassFlagsMask(Flags, true); 3906 if (Mask == -1) 3907 return -1; 3908 return Mask << 2; 3909 } 3910 3911 if (Reg != "cpsr" && Reg != "spsr") { 3912 return -1; 3913 } 3914 3915 // This is the same as if the flags were "fc" 3916 if (Flags.empty() || Flags == "all") 3917 return Mask | 0x9; 3918 3919 // Inspect the supplied flags string and set the bits in the mask for 3920 // the relevant and valid flags allowed for cpsr and spsr. 3921 for (char Flag : Flags) { 3922 int FlagVal; 3923 switch (Flag) { 3924 case 'c': 3925 FlagVal = 0x1; 3926 break; 3927 case 'x': 3928 FlagVal = 0x2; 3929 break; 3930 case 's': 3931 FlagVal = 0x4; 3932 break; 3933 case 'f': 3934 FlagVal = 0x8; 3935 break; 3936 default: 3937 FlagVal = 0; 3938 } 3939 3940 // This avoids allowing strings where the same flag bit appears twice. 3941 if (!FlagVal || (Mask & FlagVal)) 3942 return -1; 3943 Mask |= FlagVal; 3944 } 3945 3946 // If the register is spsr then we need to set the R bit. 3947 if (Reg == "spsr") 3948 Mask |= 0x10; 3949 3950 return Mask; 3951} 3952 3953// Lower the read_register intrinsic to ARM specific DAG nodes 3954// using the supplied metadata string to select the instruction node to use 3955// and the registers/masks to construct as operands for the node. 3956bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 3957 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 3958 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 3959 bool IsThumb2 = Subtarget->isThumb2(); 3960 SDLoc DL(N); 3961 3962 std::vector<SDValue> Ops; 3963 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 3964 3965 if (!Ops.empty()) { 3966 // If the special register string was constructed of fields (as defined 3967 // in the ACLE) then need to lower to MRC node (32 bit) or 3968 // MRRC node(64 bit), we can make the distinction based on the number of 3969 // operands we have. 3970 unsigned Opcode; 3971 SmallVector<EVT, 3> ResTypes; 3972 if (Ops.size() == 5){ 3973 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 3974 ResTypes.append({ MVT::i32, MVT::Other }); 3975 } else { 3976 assert(Ops.size() == 3 && 3977 "Invalid number of fields in special register string."); 3978 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 3979 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 3980 } 3981 3982 Ops.push_back(getAL(CurDAG, DL)); 3983 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 3984 Ops.push_back(N->getOperand(0)); 3985 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 3986 return true; 3987 } 3988 3989 std::string SpecialReg = RegString->getString().lower(); 3990 3991 int BankedReg = getBankedRegisterMask(SpecialReg); 3992 if (BankedReg != -1) { 3993 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 3994 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 3995 N->getOperand(0) }; 3996 ReplaceNode( 3997 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 3998 DL, MVT::i32, MVT::Other, Ops)); 3999 return true; 4000 } 4001 4002 // The VFP registers are read by creating SelectionDAG nodes with opcodes 4003 // corresponding to the register that is being read from. So we switch on the 4004 // string to find which opcode we need to use. 4005 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4006 .Case("fpscr", ARM::VMRS) 4007 .Case("fpexc", ARM::VMRS_FPEXC) 4008 .Case("fpsid", ARM::VMRS_FPSID) 4009 .Case("mvfr0", ARM::VMRS_MVFR0) 4010 .Case("mvfr1", ARM::VMRS_MVFR1) 4011 .Case("mvfr2", ARM::VMRS_MVFR2) 4012 .Case("fpinst", ARM::VMRS_FPINST) 4013 .Case("fpinst2", ARM::VMRS_FPINST2) 4014 .Default(0); 4015 4016 // If an opcode was found then we can lower the read to a VFP instruction. 4017 if (Opcode) { 4018 if (!Subtarget->hasVFP2()) 4019 return false; 4020 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) 4021 return false; 4022 4023 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4024 N->getOperand(0) }; 4025 ReplaceNode(N, 4026 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 4027 return true; 4028 } 4029 4030 // If the target is M Class then need to validate that the register string 4031 // is an acceptable value, so check that a mask can be constructed from the 4032 // string. 4033 if (Subtarget->isMClass()) { 4034 StringRef Flags = "", Reg = SpecialReg; 4035 if (Reg.endswith("_ns")) { 4036 Flags = "ns"; 4037 Reg = Reg.drop_back(3); 4038 } 4039 4040 int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget); 4041 if (SYSmValue == -1) 4042 return false; 4043 4044 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4045 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4046 N->getOperand(0) }; 4047 ReplaceNode( 4048 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 4049 return true; 4050 } 4051 4052 // Here we know the target is not M Class so we need to check if it is one 4053 // of the remaining possible values which are apsr, cpsr or spsr. 4054 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 4055 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4056 N->getOperand(0) }; 4057 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 4058 DL, MVT::i32, MVT::Other, Ops)); 4059 return true; 4060 } 4061 4062 if (SpecialReg == "spsr") { 4063 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4064 N->getOperand(0) }; 4065 ReplaceNode( 4066 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 4067 MVT::i32, MVT::Other, Ops)); 4068 return true; 4069 } 4070 4071 return false; 4072} 4073 4074// Lower the write_register intrinsic to ARM specific DAG nodes 4075// using the supplied metadata string to select the instruction node to use 4076// and the registers/masks to use in the nodes 4077bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 4078 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 4079 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 4080 bool IsThumb2 = Subtarget->isThumb2(); 4081 SDLoc DL(N); 4082 4083 std::vector<SDValue> Ops; 4084 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 4085 4086 if (!Ops.empty()) { 4087 // If the special register string was constructed of fields (as defined 4088 // in the ACLE) then need to lower to MCR node (32 bit) or 4089 // MCRR node(64 bit), we can make the distinction based on the number of 4090 // operands we have. 4091 unsigned Opcode; 4092 if (Ops.size() == 5) { 4093 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 4094 Ops.insert(Ops.begin()+2, N->getOperand(2)); 4095 } else { 4096 assert(Ops.size() == 3 && 4097 "Invalid number of fields in special register string."); 4098 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 4099 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 4100 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 4101 } 4102 4103 Ops.push_back(getAL(CurDAG, DL)); 4104 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4105 Ops.push_back(N->getOperand(0)); 4106 4107 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4108 return true; 4109 } 4110 4111 std::string SpecialReg = RegString->getString().lower(); 4112 int BankedReg = getBankedRegisterMask(SpecialReg); 4113 if (BankedReg != -1) { 4114 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 4115 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4116 N->getOperand(0) }; 4117 ReplaceNode( 4118 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 4119 DL, MVT::Other, Ops)); 4120 return true; 4121 } 4122 4123 // The VFP registers are written to by creating SelectionDAG nodes with 4124 // opcodes corresponding to the register that is being written. So we switch 4125 // on the string to find which opcode we need to use. 4126 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 4127 .Case("fpscr", ARM::VMSR) 4128 .Case("fpexc", ARM::VMSR_FPEXC) 4129 .Case("fpsid", ARM::VMSR_FPSID) 4130 .Case("fpinst", ARM::VMSR_FPINST) 4131 .Case("fpinst2", ARM::VMSR_FPINST2) 4132 .Default(0); 4133 4134 if (Opcode) { 4135 if (!Subtarget->hasVFP2()) 4136 return false; 4137 Ops = { N->getOperand(2), getAL(CurDAG, DL), 4138 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4139 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 4140 return true; 4141 } 4142 4143 std::pair<StringRef, StringRef> Fields; 4144 Fields = StringRef(SpecialReg).rsplit('_'); 4145 std::string Reg = Fields.first.str(); 4146 StringRef Flags = Fields.second; 4147 4148 // If the target was M Class then need to validate the special register value 4149 // and retrieve the mask for use in the instruction node. 4150 if (Subtarget->isMClass()) { 4151 // basepri_max gets split so need to correct Reg and Flags. 4152 if (SpecialReg == "basepri_max") { 4153 Reg = SpecialReg; 4154 Flags = ""; 4155 } 4156 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); 4157 if (SYSmValue == -1) 4158 return false; 4159 4160 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 4161 N->getOperand(2), getAL(CurDAG, DL), 4162 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 4163 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 4164 return true; 4165 } 4166 4167 // We then check to see if a valid mask can be constructed for one of the 4168 // register string values permitted for the A and R class cores. These values 4169 // are apsr, spsr and cpsr; these are also valid on older cores. 4170 int Mask = getARClassRegisterMask(Reg, Flags); 4171 if (Mask != -1) { 4172 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 4173 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 4174 N->getOperand(0) }; 4175 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 4176 DL, MVT::Other, Ops)); 4177 return true; 4178 } 4179 4180 return false; 4181} 4182 4183bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 4184 std::vector<SDValue> AsmNodeOperands; 4185 unsigned Flag, Kind; 4186 bool Changed = false; 4187 unsigned NumOps = N->getNumOperands(); 4188 4189 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 4190 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 4191 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 4192 // respectively. Since there is no constraint to explicitly specify a 4193 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 4194 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 4195 // them into a GPRPair. 4196 4197 SDLoc dl(N); 4198 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 4199 : SDValue(nullptr,0); 4200 4201 SmallVector<bool, 8> OpChanged; 4202 // Glue node will be appended late. 4203 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 4204 SDValue op = N->getOperand(i); 4205 AsmNodeOperands.push_back(op); 4206 4207 if (i < InlineAsm::Op_FirstOperand) 4208 continue; 4209 4210 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 4211 Flag = C->getZExtValue(); 4212 Kind = InlineAsm::getKind(Flag); 4213 } 4214 else 4215 continue; 4216 4217 // Immediate operands to inline asm in the SelectionDAG are modeled with 4218 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 4219 // the second is a constant with the value of the immediate. If we get here 4220 // and we have a Kind_Imm, skip the next operand, and continue. 4221 if (Kind == InlineAsm::Kind_Imm) { 4222 SDValue op = N->getOperand(++i); 4223 AsmNodeOperands.push_back(op); 4224 continue; 4225 } 4226 4227 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 4228 if (NumRegs) 4229 OpChanged.push_back(false); 4230 4231 unsigned DefIdx = 0; 4232 bool IsTiedToChangedOp = false; 4233 // If it's a use that is tied with a previous def, it has no 4234 // reg class constraint. 4235 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 4236 IsTiedToChangedOp = OpChanged[DefIdx]; 4237 4238 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 4239 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 4240 continue; 4241 4242 unsigned RC; 4243 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 4244 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 4245 || NumRegs != 2) 4246 continue; 4247 4248 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 4249 SDValue V0 = N->getOperand(i+1); 4250 SDValue V1 = N->getOperand(i+2); 4251 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 4252 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 4253 SDValue PairedReg; 4254 MachineRegisterInfo &MRI = MF->getRegInfo(); 4255 4256 if (Kind == InlineAsm::Kind_RegDef || 4257 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 4258 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 4259 // the original GPRs. 4260 4261 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4262 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4263 SDValue Chain = SDValue(N,0); 4264 4265 SDNode *GU = N->getGluedUser(); 4266 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 4267 Chain.getValue(1)); 4268 4269 // Extract values from a GPRPair reg and copy to the original GPR reg. 4270 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 4271 RegCopy); 4272 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 4273 RegCopy); 4274 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 4275 RegCopy.getValue(1)); 4276 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 4277 4278 // Update the original glue user. 4279 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 4280 Ops.push_back(T1.getValue(1)); 4281 CurDAG->UpdateNodeOperands(GU, Ops); 4282 } 4283 else { 4284 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 4285 // GPRPair and then pass the GPRPair to the inline asm. 4286 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 4287 4288 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 4289 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 4290 Chain.getValue(1)); 4291 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 4292 T0.getValue(1)); 4293 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 4294 4295 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 4296 // i32 VRs of inline asm with it. 4297 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 4298 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 4299 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 4300 4301 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 4302 Glue = Chain.getValue(1); 4303 } 4304 4305 Changed = true; 4306 4307 if(PairedReg.getNode()) { 4308 OpChanged[OpChanged.size() -1 ] = true; 4309 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 4310 if (IsTiedToChangedOp) 4311 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 4312 else 4313 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 4314 // Replace the current flag. 4315 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 4316 Flag, dl, MVT::i32); 4317 // Add the new register node and skip the original two GPRs. 4318 AsmNodeOperands.push_back(PairedReg); 4319 // Skip the next two GPRs. 4320 i += 2; 4321 } 4322 } 4323 4324 if (Glue.getNode()) 4325 AsmNodeOperands.push_back(Glue); 4326 if (!Changed) 4327 return false; 4328 4329 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), 4330 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 4331 New->setNodeId(-1); 4332 ReplaceNode(N, New.getNode()); 4333 return true; 4334} 4335 4336 4337bool ARMDAGToDAGISel:: 4338SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 4339 std::vector<SDValue> &OutOps) { 4340 switch(ConstraintID) { 4341 default: 4342 llvm_unreachable("Unexpected asm memory constraint"); 4343 case InlineAsm::Constraint_i: 4344 // FIXME: It seems strange that 'i' is needed here since it's supposed to 4345 // be an immediate and not a memory constraint. 4346 // Fallthrough. 4347 case InlineAsm::Constraint_m: 4348 case InlineAsm::Constraint_o: 4349 case InlineAsm::Constraint_Q: 4350 case InlineAsm::Constraint_Um: 4351 case InlineAsm::Constraint_Un: 4352 case InlineAsm::Constraint_Uq: 4353 case InlineAsm::Constraint_Us: 4354 case InlineAsm::Constraint_Ut: 4355 case InlineAsm::Constraint_Uv: 4356 case InlineAsm::Constraint_Uy: 4357 // Require the address to be in a register. That is safe for all ARM 4358 // variants and it is hard to do anything much smarter without knowing 4359 // how the operand is used. 4360 OutOps.push_back(Op); 4361 return false; 4362 } 4363 return true; 4364} 4365 4366/// createARMISelDag - This pass converts a legalized DAG into a 4367/// ARM-specific DAG, ready for instruction scheduling. 4368/// 4369FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 4370 CodeGenOpt::Level OptLevel) { 4371 return new ARMDAGToDAGISel(TM, OptLevel); 4372} 4373