MipsSEISelLowering.cpp revision c385709d8397ca1535481c04564b67d07c66c619
1//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Subclass of MipsTargetLowering specialized for mips32/64. 11// 12//===----------------------------------------------------------------------===// 13#define DEBUG_TYPE "mips-isel" 14#include "MipsSEISelLowering.h" 15#include "MipsRegisterInfo.h" 16#include "MipsTargetMachine.h" 17#include "llvm/CodeGen/MachineInstrBuilder.h" 18#include "llvm/CodeGen/MachineRegisterInfo.h" 19#include "llvm/IR/Intrinsics.h" 20#include "llvm/Support/CommandLine.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Target/TargetInstrInfo.h" 23 24using namespace llvm; 25 26static cl::opt<bool> 27EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, 28 cl::desc("MIPS: Enable tail calls."), cl::init(false)); 29 30static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 31 cl::desc("Expand double precision loads and " 32 "stores to their single precision " 33 "counterparts")); 34 35MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) 36 : MipsTargetLowering(TM) { 37 // Set up the register classes 38 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 39 40 if (HasMips64) 41 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 42 43 if (Subtarget->hasDSP() || Subtarget->hasMSA()) { 44 // Expand all truncating stores and extending loads. 45 unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 46 unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; 47 48 for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { 49 for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) 50 setTruncStoreAction((MVT::SimpleValueType)VT0, 51 (MVT::SimpleValueType)VT1, Expand); 52 53 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 54 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 55 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); 56 } 57 } 58 59 if (Subtarget->hasDSP()) { 60 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 61 62 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 63 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 64 65 // Expand all builtin opcodes. 66 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 67 setOperationAction(Opc, VecTys[i], Expand); 68 69 setOperationAction(ISD::ADD, VecTys[i], Legal); 70 setOperationAction(ISD::SUB, VecTys[i], Legal); 71 setOperationAction(ISD::LOAD, VecTys[i], Legal); 72 setOperationAction(ISD::STORE, VecTys[i], Legal); 73 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 74 } 75 76 setTargetDAGCombine(ISD::SHL); 77 setTargetDAGCombine(ISD::SRA); 78 setTargetDAGCombine(ISD::SRL); 79 setTargetDAGCombine(ISD::SETCC); 80 setTargetDAGCombine(ISD::VSELECT); 81 } 82 83 if (Subtarget->hasDSPR2()) 84 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 85 86 if (Subtarget->hasMSA()) { 87 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 88 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 89 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 90 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 91 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 92 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 93 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 94 95 setTargetDAGCombine(ISD::AND); 96 setTargetDAGCombine(ISD::OR); 97 setTargetDAGCombine(ISD::SRA); 98 setTargetDAGCombine(ISD::VSELECT); 99 setTargetDAGCombine(ISD::XOR); 100 } 101 102 if (!Subtarget->mipsSEUsesSoftFloat()) { 103 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 104 105 // When dealing with single precision only, use libcalls 106 if (!Subtarget->isSingleFloat()) { 107 if (Subtarget->isFP64bit()) 108 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 109 else 110 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 111 } 112 } 113 114 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 115 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 116 setOperationAction(ISD::MULHS, MVT::i32, Custom); 117 setOperationAction(ISD::MULHU, MVT::i32, Custom); 118 119 if (HasMips64) { 120 setOperationAction(ISD::MULHS, MVT::i64, Custom); 121 setOperationAction(ISD::MULHU, MVT::i64, Custom); 122 setOperationAction(ISD::MUL, MVT::i64, Custom); 123 } 124 125 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 126 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 127 128 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 129 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 130 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 131 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 132 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 133 setOperationAction(ISD::LOAD, MVT::i32, Custom); 134 setOperationAction(ISD::STORE, MVT::i32, Custom); 135 136 setTargetDAGCombine(ISD::ADDE); 137 setTargetDAGCombine(ISD::SUBE); 138 setTargetDAGCombine(ISD::MUL); 139 140 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 141 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 142 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 143 144 if (NoDPLoadStore) { 145 setOperationAction(ISD::LOAD, MVT::f64, Custom); 146 setOperationAction(ISD::STORE, MVT::f64, Custom); 147 } 148 149 computeRegisterProperties(); 150} 151 152const MipsTargetLowering * 153llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { 154 return new MipsSETargetLowering(TM); 155} 156 157// Enable MSA support for the given integer type and Register class. 158void MipsSETargetLowering:: 159addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 160 addRegisterClass(Ty, RC); 161 162 // Expand all builtin opcodes. 163 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 164 setOperationAction(Opc, Ty, Expand); 165 166 setOperationAction(ISD::BITCAST, Ty, Legal); 167 setOperationAction(ISD::LOAD, Ty, Legal); 168 setOperationAction(ISD::STORE, Ty, Legal); 169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 170 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 171 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 172 173 setOperationAction(ISD::ADD, Ty, Legal); 174 setOperationAction(ISD::AND, Ty, Legal); 175 setOperationAction(ISD::CTLZ, Ty, Legal); 176 setOperationAction(ISD::CTPOP, Ty, Legal); 177 setOperationAction(ISD::MUL, Ty, Legal); 178 setOperationAction(ISD::OR, Ty, Legal); 179 setOperationAction(ISD::SDIV, Ty, Legal); 180 setOperationAction(ISD::SREM, Ty, Legal); 181 setOperationAction(ISD::SHL, Ty, Legal); 182 setOperationAction(ISD::SRA, Ty, Legal); 183 setOperationAction(ISD::SRL, Ty, Legal); 184 setOperationAction(ISD::SUB, Ty, Legal); 185 setOperationAction(ISD::UDIV, Ty, Legal); 186 setOperationAction(ISD::UREM, Ty, Legal); 187 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 188 setOperationAction(ISD::VSELECT, Ty, Legal); 189 setOperationAction(ISD::XOR, Ty, Legal); 190 191 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 192 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 193 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 194 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 195 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 196 } 197 198 setOperationAction(ISD::SETCC, Ty, Legal); 199 setCondCodeAction(ISD::SETNE, Ty, Expand); 200 setCondCodeAction(ISD::SETGE, Ty, Expand); 201 setCondCodeAction(ISD::SETGT, Ty, Expand); 202 setCondCodeAction(ISD::SETUGE, Ty, Expand); 203 setCondCodeAction(ISD::SETUGT, Ty, Expand); 204} 205 206// Enable MSA support for the given floating-point type and Register class. 207void MipsSETargetLowering:: 208addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 209 addRegisterClass(Ty, RC); 210 211 // Expand all builtin opcodes. 212 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 213 setOperationAction(Opc, Ty, Expand); 214 215 setOperationAction(ISD::LOAD, Ty, Legal); 216 setOperationAction(ISD::STORE, Ty, Legal); 217 setOperationAction(ISD::BITCAST, Ty, Legal); 218 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 219 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 220 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 221 222 if (Ty != MVT::v8f16) { 223 setOperationAction(ISD::FABS, Ty, Legal); 224 setOperationAction(ISD::FADD, Ty, Legal); 225 setOperationAction(ISD::FDIV, Ty, Legal); 226 setOperationAction(ISD::FEXP2, Ty, Legal); 227 setOperationAction(ISD::FLOG2, Ty, Legal); 228 setOperationAction(ISD::FMA, Ty, Legal); 229 setOperationAction(ISD::FMUL, Ty, Legal); 230 setOperationAction(ISD::FRINT, Ty, Legal); 231 setOperationAction(ISD::FSQRT, Ty, Legal); 232 setOperationAction(ISD::FSUB, Ty, Legal); 233 setOperationAction(ISD::VSELECT, Ty, Legal); 234 235 setOperationAction(ISD::SETCC, Ty, Legal); 236 setCondCodeAction(ISD::SETOGE, Ty, Expand); 237 setCondCodeAction(ISD::SETOGT, Ty, Expand); 238 setCondCodeAction(ISD::SETUGE, Ty, Expand); 239 setCondCodeAction(ISD::SETUGT, Ty, Expand); 240 setCondCodeAction(ISD::SETGE, Ty, Expand); 241 setCondCodeAction(ISD::SETGT, Ty, Expand); 242 } 243} 244 245bool 246MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 247 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 248 249 switch (SVT) { 250 case MVT::i64: 251 case MVT::i32: 252 if (Fast) 253 *Fast = true; 254 return true; 255 default: 256 return false; 257 } 258} 259 260SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 261 SelectionDAG &DAG) const { 262 switch(Op.getOpcode()) { 263 case ISD::LOAD: return lowerLOAD(Op, DAG); 264 case ISD::STORE: return lowerSTORE(Op, DAG); 265 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 266 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 267 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 268 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 269 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 270 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 271 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 272 DAG); 273 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 274 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 275 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 276 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 277 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 278 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 279 } 280 281 return MipsTargetLowering::LowerOperation(Op, DAG); 282} 283 284// selectMADD - 285// Transforms a subgraph in CurDAG if the following pattern is found: 286// (addc multLo, Lo0), (adde multHi, Hi0), 287// where, 288// multHi/Lo: product of multiplication 289// Lo0: initial value of Lo register 290// Hi0: initial value of Hi register 291// Return true if pattern matching was successful. 292static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { 293 // ADDENode's second operand must be a flag output of an ADDC node in order 294 // for the matching to be successful. 295 SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); 296 297 if (ADDCNode->getOpcode() != ISD::ADDC) 298 return false; 299 300 SDValue MultHi = ADDENode->getOperand(0); 301 SDValue MultLo = ADDCNode->getOperand(0); 302 SDNode *MultNode = MultHi.getNode(); 303 unsigned MultOpc = MultHi.getOpcode(); 304 305 // MultHi and MultLo must be generated by the same node, 306 if (MultLo.getNode() != MultNode) 307 return false; 308 309 // and it must be a multiplication. 310 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 311 return false; 312 313 // MultLo amd MultHi must be the first and second output of MultNode 314 // respectively. 315 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 316 return false; 317 318 // Transform this to a MADD only if ADDENode and ADDCNode are the only users 319 // of the values of MultNode, in which case MultNode will be removed in later 320 // phases. 321 // If there exist users other than ADDENode or ADDCNode, this function returns 322 // here, which will result in MultNode being mapped to a single MULT 323 // instruction node rather than a pair of MULT and MADD instructions being 324 // produced. 325 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 326 return false; 327 328 SDLoc DL(ADDENode); 329 330 // Initialize accumulator. 331 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 332 ADDCNode->getOperand(1), 333 ADDENode->getOperand(1)); 334 335 // create MipsMAdd(u) node 336 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; 337 338 SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, 339 MultNode->getOperand(0),// Factor 0 340 MultNode->getOperand(1),// Factor 1 341 ACCIn); 342 343 // replace uses of adde and addc here 344 if (!SDValue(ADDCNode, 0).use_empty()) { 345 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); 346 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); 347 } 348 if (!SDValue(ADDENode, 0).use_empty()) { 349 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); 350 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); 351 } 352 353 return true; 354} 355 356// selectMSUB - 357// Transforms a subgraph in CurDAG if the following pattern is found: 358// (addc Lo0, multLo), (sube Hi0, multHi), 359// where, 360// multHi/Lo: product of multiplication 361// Lo0: initial value of Lo register 362// Hi0: initial value of Hi register 363// Return true if pattern matching was successful. 364static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { 365 // SUBENode's second operand must be a flag output of an SUBC node in order 366 // for the matching to be successful. 367 SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); 368 369 if (SUBCNode->getOpcode() != ISD::SUBC) 370 return false; 371 372 SDValue MultHi = SUBENode->getOperand(1); 373 SDValue MultLo = SUBCNode->getOperand(1); 374 SDNode *MultNode = MultHi.getNode(); 375 unsigned MultOpc = MultHi.getOpcode(); 376 377 // MultHi and MultLo must be generated by the same node, 378 if (MultLo.getNode() != MultNode) 379 return false; 380 381 // and it must be a multiplication. 382 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 383 return false; 384 385 // MultLo amd MultHi must be the first and second output of MultNode 386 // respectively. 387 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 388 return false; 389 390 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users 391 // of the values of MultNode, in which case MultNode will be removed in later 392 // phases. 393 // If there exist users other than SUBENode or SUBCNode, this function returns 394 // here, which will result in MultNode being mapped to a single MULT 395 // instruction node rather than a pair of MULT and MSUB instructions being 396 // produced. 397 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 398 return false; 399 400 SDLoc DL(SUBENode); 401 402 // Initialize accumulator. 403 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 404 SUBCNode->getOperand(0), 405 SUBENode->getOperand(0)); 406 407 // create MipsSub(u) node 408 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; 409 410 SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, 411 MultNode->getOperand(0),// Factor 0 412 MultNode->getOperand(1),// Factor 1 413 ACCIn); 414 415 // replace uses of sube and subc here 416 if (!SDValue(SUBCNode, 0).use_empty()) { 417 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); 418 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); 419 } 420 if (!SDValue(SUBENode, 0).use_empty()) { 421 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); 422 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); 423 } 424 425 return true; 426} 427 428static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, 429 TargetLowering::DAGCombinerInfo &DCI, 430 const MipsSubtarget *Subtarget) { 431 if (DCI.isBeforeLegalize()) 432 return SDValue(); 433 434 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 435 selectMADD(N, &DAG)) 436 return SDValue(N, 0); 437 438 return SDValue(); 439} 440 441// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 442// 443// Performs the following transformations: 444// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 445// sign/zero-extension is completely overwritten by the new one performed by 446// the ISD::AND. 447// - Removes redundant zero extensions performed by an ISD::AND. 448static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 449 TargetLowering::DAGCombinerInfo &DCI, 450 const MipsSubtarget *Subtarget) { 451 if (!Subtarget->hasMSA()) 452 return SDValue(); 453 454 SDValue Op0 = N->getOperand(0); 455 SDValue Op1 = N->getOperand(1); 456 unsigned Op0Opcode = Op0->getOpcode(); 457 458 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 459 // where $d + 1 == 2^n and n == 32 460 // or $d + 1 == 2^n and n <= 32 and ZExt 461 // -> (MipsVExtractZExt $a, $b, $c) 462 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 463 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 464 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 465 466 if (!Mask) 467 return SDValue(); 468 469 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 470 471 if (Log2IfPositive <= 0) 472 return SDValue(); // Mask+1 is not a power of 2 473 474 SDValue Op0Op2 = Op0->getOperand(2); 475 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 476 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 477 unsigned Log2 = Log2IfPositive; 478 479 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 480 Log2 == ExtendTySize) { 481 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 482 DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, 483 Op0->getVTList(), Ops, Op0->getNumOperands()); 484 return Op0; 485 } 486 } 487 488 return SDValue(); 489} 490 491// Determine if the specified node is a constant vector splat. 492// 493// Returns true and sets Imm if: 494// * N is a ISD::BUILD_VECTOR representing a constant splat 495// 496// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 497// differences are that it assumes the MSA has already been checked and the 498// arbitrary requirement for a maximum of 32-bit integers isn't applied (and 499// must not be in order for binsri.d to be selectable). 500static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 501 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 502 503 if (Node == NULL) 504 return false; 505 506 APInt SplatValue, SplatUndef; 507 unsigned SplatBitSize; 508 bool HasAnyUndefs; 509 510 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 511 8, !IsLittleEndian)) 512 return false; 513 514 Imm = SplatValue; 515 516 return true; 517} 518 519// Test whether the given node is an all-ones build_vector. 520static bool isVectorAllOnes(SDValue N) { 521 // Look through bitcasts. Endianness doesn't matter because we are looking 522 // for an all-ones value. 523 if (N->getOpcode() == ISD::BITCAST) 524 N = N->getOperand(0); 525 526 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 527 528 if (!BVN) 529 return false; 530 531 APInt SplatValue, SplatUndef; 532 unsigned SplatBitSize; 533 bool HasAnyUndefs; 534 535 // Endianness doesn't matter in this context because we are looking for 536 // an all-ones value. 537 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 538 return SplatValue.isAllOnesValue(); 539 540 return false; 541} 542 543// Test whether N is the bitwise inverse of OfNode. 544static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 545 if (N->getOpcode() != ISD::XOR) 546 return false; 547 548 if (isVectorAllOnes(N->getOperand(0))) 549 return N->getOperand(1) == OfNode; 550 551 if (isVectorAllOnes(N->getOperand(1))) 552 return N->getOperand(0) == OfNode; 553 554 return false; 555} 556 557// Perform combines where ISD::OR is the root node. 558// 559// Performs the following transformations: 560// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 561// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 562// vector type. 563static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 564 TargetLowering::DAGCombinerInfo &DCI, 565 const MipsSubtarget *Subtarget) { 566 if (!Subtarget->hasMSA()) 567 return SDValue(); 568 569 EVT Ty = N->getValueType(0); 570 571 if (!Ty.is128BitVector()) 572 return SDValue(); 573 574 SDValue Op0 = N->getOperand(0); 575 SDValue Op1 = N->getOperand(1); 576 577 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 578 SDValue Op0Op0 = Op0->getOperand(0); 579 SDValue Op0Op1 = Op0->getOperand(1); 580 SDValue Op1Op0 = Op1->getOperand(0); 581 SDValue Op1Op1 = Op1->getOperand(1); 582 bool IsLittleEndian = !Subtarget->isLittle(); 583 584 SDValue IfSet, IfClr, Cond; 585 bool IsConstantMask = false; 586 APInt Mask, InvMask; 587 588 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 589 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 590 // looking. 591 // IfClr will be set if we find a valid match. 592 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 593 Cond = Op0Op0; 594 IfSet = Op0Op1; 595 596 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask) 597 IfClr = Op1Op1; 598 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask) 599 IfClr = Op1Op0; 600 601 IsConstantMask = true; 602 } 603 604 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 605 // thing again using this mask. 606 // IfClr will be set if we find a valid match. 607 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 608 Cond = Op0Op1; 609 IfSet = Op0Op0; 610 611 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask) 612 IfClr = Op1Op1; 613 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask) 614 IfClr = Op1Op0; 615 616 IsConstantMask = true; 617 } 618 619 // If IfClr is not yet set, try looking for a non-constant match. 620 // IfClr will be set if we find a valid match amongst the eight 621 // possibilities. 622 if (!IfClr.getNode()) { 623 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 624 Cond = Op1Op0; 625 IfSet = Op1Op1; 626 IfClr = Op0Op1; 627 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 628 Cond = Op1Op0; 629 IfSet = Op1Op1; 630 IfClr = Op0Op0; 631 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 632 Cond = Op1Op1; 633 IfSet = Op1Op0; 634 IfClr = Op0Op1; 635 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 636 Cond = Op1Op1; 637 IfSet = Op1Op0; 638 IfClr = Op0Op0; 639 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 640 Cond = Op0Op0; 641 IfSet = Op0Op1; 642 IfClr = Op1Op1; 643 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 644 Cond = Op0Op0; 645 IfSet = Op0Op1; 646 IfClr = Op1Op0; 647 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 648 Cond = Op0Op1; 649 IfSet = Op0Op0; 650 IfClr = Op1Op1; 651 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 652 Cond = Op0Op1; 653 IfSet = Op0Op0; 654 IfClr = Op1Op0; 655 } 656 } 657 658 // At this point, IfClr will be set if we have a valid match. 659 if (!IfClr.getNode()) 660 return SDValue(); 661 662 assert(Cond.getNode() && IfSet.getNode()); 663 664 // Fold degenerate cases. 665 if (IsConstantMask) { 666 if (Mask.isAllOnesValue()) 667 return IfSet; 668 else if (Mask == 0) 669 return IfClr; 670 } 671 672 // Transform the DAG into an equivalent VSELECT. 673 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet); 674 } 675 676 return SDValue(); 677} 678 679static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, 680 TargetLowering::DAGCombinerInfo &DCI, 681 const MipsSubtarget *Subtarget) { 682 if (DCI.isBeforeLegalize()) 683 return SDValue(); 684 685 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 686 selectMSUB(N, &DAG)) 687 return SDValue(N, 0); 688 689 return SDValue(); 690} 691 692static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, 693 EVT ShiftTy, SelectionDAG &DAG) { 694 // Clear the upper (64 - VT.sizeInBits) bits. 695 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 696 697 // Return 0. 698 if (C == 0) 699 return DAG.getConstant(0, VT); 700 701 // Return x. 702 if (C == 1) 703 return X; 704 705 // If c is power of 2, return (shl x, log2(c)). 706 if (isPowerOf2_64(C)) 707 return DAG.getNode(ISD::SHL, DL, VT, X, 708 DAG.getConstant(Log2_64(C), ShiftTy)); 709 710 unsigned Log2Ceil = Log2_64_Ceil(C); 711 uint64_t Floor = 1LL << Log2_64(C); 712 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 713 714 // If |c - floor_c| <= |c - ceil_c|, 715 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 716 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 717 if (C - Floor <= Ceil - C) { 718 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 719 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 720 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 721 } 722 723 // If |c - floor_c| > |c - ceil_c|, 724 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 725 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 726 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 727 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 728} 729 730static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 731 const TargetLowering::DAGCombinerInfo &DCI, 732 const MipsSETargetLowering *TL) { 733 EVT VT = N->getValueType(0); 734 735 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 736 if (!VT.isVector()) 737 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), 738 VT, TL->getScalarShiftAmountTy(VT), DAG); 739 740 return SDValue(N, 0); 741} 742 743static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 744 SelectionDAG &DAG, 745 const MipsSubtarget *Subtarget) { 746 // See if this is a vector splat immediate node. 747 APInt SplatValue, SplatUndef; 748 unsigned SplatBitSize; 749 bool HasAnyUndefs; 750 unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); 751 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 752 753 if (!BV || 754 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 755 EltSize, !Subtarget->isLittle()) || 756 (SplatBitSize != EltSize) || 757 (SplatValue.getZExtValue() >= EltSize)) 758 return SDValue(); 759 760 return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0), 761 DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); 762} 763 764static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 765 TargetLowering::DAGCombinerInfo &DCI, 766 const MipsSubtarget *Subtarget) { 767 EVT Ty = N->getValueType(0); 768 769 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 770 return SDValue(); 771 772 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 773} 774 775// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 776// constant splats into MipsISD::SHRA_DSP for DSPr2. 777// 778// Performs the following transformations: 779// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 780// sign/zero-extension is completely overwritten by the new one performed by 781// the ISD::SRA and ISD::SHL nodes. 782// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 783// sequence. 784// 785// See performDSPShiftCombine for more information about the transformation 786// used for DSPr2. 787static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 788 TargetLowering::DAGCombinerInfo &DCI, 789 const MipsSubtarget *Subtarget) { 790 EVT Ty = N->getValueType(0); 791 792 if (Subtarget->hasMSA()) { 793 SDValue Op0 = N->getOperand(0); 794 SDValue Op1 = N->getOperand(1); 795 796 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 797 // where $d + sizeof($c) == 32 798 // or $d + sizeof($c) <= 32 and SExt 799 // -> (MipsVExtractSExt $a, $b, $c) 800 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 801 SDValue Op0Op0 = Op0->getOperand(0); 802 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 803 804 if (!ShAmount) 805 return SDValue(); 806 807 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 808 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 809 return SDValue(); 810 811 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 812 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 813 814 if (TotalBits == 32 || 815 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 816 TotalBits <= 32)) { 817 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 818 Op0Op0->getOperand(2) }; 819 DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, 820 Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); 821 return Op0Op0; 822 } 823 } 824 } 825 826 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) 827 return SDValue(); 828 829 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 830} 831 832 833static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 834 TargetLowering::DAGCombinerInfo &DCI, 835 const MipsSubtarget *Subtarget) { 836 EVT Ty = N->getValueType(0); 837 838 if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) 839 return SDValue(); 840 841 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 842} 843 844static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 845 bool IsV216 = (Ty == MVT::v2i16); 846 847 switch (CC) { 848 case ISD::SETEQ: 849 case ISD::SETNE: return true; 850 case ISD::SETLT: 851 case ISD::SETLE: 852 case ISD::SETGT: 853 case ISD::SETGE: return IsV216; 854 case ISD::SETULT: 855 case ISD::SETULE: 856 case ISD::SETUGT: 857 case ISD::SETUGE: return !IsV216; 858 default: return false; 859 } 860} 861 862static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 863 EVT Ty = N->getValueType(0); 864 865 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 866 return SDValue(); 867 868 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 869 return SDValue(); 870 871 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 872 N->getOperand(1), N->getOperand(2)); 873} 874 875static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 876 EVT Ty = N->getValueType(0); 877 878 if (Ty.is128BitVector() && Ty.isInteger()) { 879 // Try the following combines: 880 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 881 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 882 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 883 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 884 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 885 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 886 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 887 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 888 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 889 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 890 // legalizer. 891 SDValue Op0 = N->getOperand(0); 892 893 if (Op0->getOpcode() != ISD::SETCC) 894 return SDValue(); 895 896 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 897 bool Signed; 898 899 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 900 Signed = true; 901 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 902 Signed = false; 903 else 904 return SDValue(); 905 906 SDValue Op1 = N->getOperand(1); 907 SDValue Op2 = N->getOperand(2); 908 SDValue Op0Op0 = Op0->getOperand(0); 909 SDValue Op0Op1 = Op0->getOperand(1); 910 911 if (Op1 == Op0Op0 && Op2 == Op0Op1) 912 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 913 Ty, Op1, Op2); 914 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 915 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 916 Ty, Op1, Op2); 917 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 918 SDValue SetCC = N->getOperand(0); 919 920 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 921 return SDValue(); 922 923 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 924 SetCC.getOperand(0), SetCC.getOperand(1), 925 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 926 } 927 928 return SDValue(); 929} 930 931static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 932 const MipsSubtarget *Subtarget) { 933 EVT Ty = N->getValueType(0); 934 935 if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 936 // Try the following combines: 937 // (xor (or $a, $b), (build_vector allones)) 938 // (xor (or $a, $b), (bitcast (build_vector allones))) 939 SDValue Op0 = N->getOperand(0); 940 SDValue Op1 = N->getOperand(1); 941 SDValue NotOp; 942 943 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 944 NotOp = Op1; 945 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 946 NotOp = Op0; 947 else 948 return SDValue(); 949 950 if (NotOp->getOpcode() == ISD::OR) 951 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 952 NotOp->getOperand(1)); 953 } 954 955 return SDValue(); 956} 957 958SDValue 959MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 960 SelectionDAG &DAG = DCI.DAG; 961 SDValue Val; 962 963 switch (N->getOpcode()) { 964 case ISD::ADDE: 965 return performADDECombine(N, DAG, DCI, Subtarget); 966 case ISD::AND: 967 Val = performANDCombine(N, DAG, DCI, Subtarget); 968 break; 969 case ISD::OR: 970 Val = performORCombine(N, DAG, DCI, Subtarget); 971 break; 972 case ISD::SUBE: 973 return performSUBECombine(N, DAG, DCI, Subtarget); 974 case ISD::MUL: 975 return performMULCombine(N, DAG, DCI, this); 976 case ISD::SHL: 977 return performSHLCombine(N, DAG, DCI, Subtarget); 978 case ISD::SRA: 979 return performSRACombine(N, DAG, DCI, Subtarget); 980 case ISD::SRL: 981 return performSRLCombine(N, DAG, DCI, Subtarget); 982 case ISD::VSELECT: 983 return performVSELECTCombine(N, DAG); 984 case ISD::XOR: 985 Val = performXORCombine(N, DAG, Subtarget); 986 break; 987 case ISD::SETCC: 988 Val = performSETCCCombine(N, DAG); 989 break; 990 } 991 992 if (Val.getNode()) { 993 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 994 N->printrWithDepth(dbgs(), &DAG); 995 dbgs() << "\n=> \n"; 996 Val.getNode()->printrWithDepth(dbgs(), &DAG); 997 dbgs() << "\n"); 998 return Val; 999 } 1000 1001 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1002} 1003 1004MachineBasicBlock * 1005MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 1006 MachineBasicBlock *BB) const { 1007 switch (MI->getOpcode()) { 1008 default: 1009 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1010 case Mips::BPOSGE32_PSEUDO: 1011 return emitBPOSGE32(MI, BB); 1012 case Mips::SNZ_B_PSEUDO: 1013 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1014 case Mips::SNZ_H_PSEUDO: 1015 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1016 case Mips::SNZ_W_PSEUDO: 1017 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1018 case Mips::SNZ_D_PSEUDO: 1019 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1020 case Mips::SNZ_V_PSEUDO: 1021 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1022 case Mips::SZ_B_PSEUDO: 1023 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1024 case Mips::SZ_H_PSEUDO: 1025 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1026 case Mips::SZ_W_PSEUDO: 1027 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1028 case Mips::SZ_D_PSEUDO: 1029 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1030 case Mips::SZ_V_PSEUDO: 1031 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1032 case Mips::COPY_FW_PSEUDO: 1033 return emitCOPY_FW(MI, BB); 1034 case Mips::COPY_FD_PSEUDO: 1035 return emitCOPY_FD(MI, BB); 1036 case Mips::INSERT_FW_PSEUDO: 1037 return emitINSERT_FW(MI, BB); 1038 case Mips::INSERT_FD_PSEUDO: 1039 return emitINSERT_FD(MI, BB); 1040 case Mips::FILL_FW_PSEUDO: 1041 return emitFILL_FW(MI, BB); 1042 case Mips::FILL_FD_PSEUDO: 1043 return emitFILL_FD(MI, BB); 1044 case Mips::FEXP2_W_1_PSEUDO: 1045 return emitFEXP2_W_1(MI, BB); 1046 case Mips::FEXP2_D_1_PSEUDO: 1047 return emitFEXP2_D_1(MI, BB); 1048 } 1049} 1050 1051bool MipsSETargetLowering:: 1052isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, 1053 unsigned NextStackOffset, 1054 const MipsFunctionInfo& FI) const { 1055 if (!EnableMipsTailCalls) 1056 return false; 1057 1058 // Return false if either the callee or caller has a byval argument. 1059 if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) 1060 return false; 1061 1062 // Return true if the callee's argument area is no larger than the 1063 // caller's. 1064 return NextStackOffset <= FI.getIncomingArgSize(); 1065} 1066 1067void MipsSETargetLowering:: 1068getOpndList(SmallVectorImpl<SDValue> &Ops, 1069 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 1070 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1071 CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { 1072 // T9 should contain the address of the callee function if 1073 // -reloction-model=pic or it is an indirect call. 1074 if (IsPICCall || !GlobalOrExternal) { 1075 unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; 1076 RegsToPass.push_front(std::make_pair(T9Reg, Callee)); 1077 } else 1078 Ops.push_back(Callee); 1079 1080 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1081 InternalLinkage, CLI, Callee, Chain); 1082} 1083 1084SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1085 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1086 1087 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1088 return MipsTargetLowering::lowerLOAD(Op, DAG); 1089 1090 // Replace a double precision load with two i32 loads and a buildpair64. 1091 SDLoc DL(Op); 1092 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1093 EVT PtrVT = Ptr.getValueType(); 1094 1095 // i32 load from lower address. 1096 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, 1097 MachinePointerInfo(), Nd.isVolatile(), 1098 Nd.isNonTemporal(), Nd.isInvariant(), 1099 Nd.getAlignment()); 1100 1101 // i32 load from higher address. 1102 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 1103 SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, 1104 MachinePointerInfo(), Nd.isVolatile(), 1105 Nd.isNonTemporal(), Nd.isInvariant(), 1106 std::min(Nd.getAlignment(), 4U)); 1107 1108 if (!Subtarget->isLittle()) 1109 std::swap(Lo, Hi); 1110 1111 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1112 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1113 return DAG.getMergeValues(Ops, 2, DL); 1114} 1115 1116SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1117 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1118 1119 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1120 return MipsTargetLowering::lowerSTORE(Op, DAG); 1121 1122 // Replace a double precision store with two extractelement64s and i32 stores. 1123 SDLoc DL(Op); 1124 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1125 EVT PtrVT = Ptr.getValueType(); 1126 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1127 Val, DAG.getConstant(0, MVT::i32)); 1128 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1129 Val, DAG.getConstant(1, MVT::i32)); 1130 1131 if (!Subtarget->isLittle()) 1132 std::swap(Lo, Hi); 1133 1134 // i32 store to lower address. 1135 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), 1136 Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), 1137 Nd.getTBAAInfo()); 1138 1139 // i32 store to higher address. 1140 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 1141 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1142 Nd.isVolatile(), Nd.isNonTemporal(), 1143 std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); 1144} 1145 1146SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1147 bool HasLo, bool HasHi, 1148 SelectionDAG &DAG) const { 1149 EVT Ty = Op.getOperand(0).getValueType(); 1150 SDLoc DL(Op); 1151 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1152 Op.getOperand(0), Op.getOperand(1)); 1153 SDValue Lo, Hi; 1154 1155 if (HasLo) 1156 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1157 if (HasHi) 1158 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1159 1160 if (!HasLo || !HasHi) 1161 return HasLo ? Lo : Hi; 1162 1163 SDValue Vals[] = { Lo, Hi }; 1164 return DAG.getMergeValues(Vals, 2, DL); 1165} 1166 1167 1168static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) { 1169 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1170 DAG.getConstant(0, MVT::i32)); 1171 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1172 DAG.getConstant(1, MVT::i32)); 1173 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1174} 1175 1176static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) { 1177 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1178 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1179 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1180} 1181 1182// This function expands mips intrinsic nodes which have 64-bit input operands 1183// or output values. 1184// 1185// out64 = intrinsic-node in64 1186// => 1187// lo = copy (extract-element (in64, 0)) 1188// hi = copy (extract-element (in64, 1)) 1189// mips-specific-node 1190// v0 = copy lo 1191// v1 = copy hi 1192// out64 = merge-values (v0, v1) 1193// 1194static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1195 SDLoc DL(Op); 1196 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1197 SmallVector<SDValue, 3> Ops; 1198 unsigned OpNo = 0; 1199 1200 // See if Op has a chain input. 1201 if (HasChainIn) 1202 Ops.push_back(Op->getOperand(OpNo++)); 1203 1204 // The next operand is the intrinsic opcode. 1205 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1206 1207 // See if the next operand has type i64. 1208 SDValue Opnd = Op->getOperand(++OpNo), In64; 1209 1210 if (Opnd.getValueType() == MVT::i64) 1211 In64 = initAccumulator(Opnd, DL, DAG); 1212 else 1213 Ops.push_back(Opnd); 1214 1215 // Push the remaining operands. 1216 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1217 Ops.push_back(Op->getOperand(OpNo)); 1218 1219 // Add In64 to the end of the list. 1220 if (In64.getNode()) 1221 Ops.push_back(In64); 1222 1223 // Scan output. 1224 SmallVector<EVT, 2> ResTys; 1225 1226 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1227 I != E; ++I) 1228 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1229 1230 // Create node. 1231 SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); 1232 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1233 1234 if (!HasChainIn) 1235 return Out; 1236 1237 assert(Val->getValueType(1) == MVT::Other); 1238 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1239 return DAG.getMergeValues(Vals, 2, DL); 1240} 1241 1242// Lower an MSA copy intrinsic into the specified SelectionDAG node 1243static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1244 SDLoc DL(Op); 1245 SDValue Vec = Op->getOperand(1); 1246 SDValue Idx = Op->getOperand(2); 1247 EVT ResTy = Op->getValueType(0); 1248 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1249 1250 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1251 DAG.getValueType(EltTy)); 1252 1253 return Result; 1254} 1255 1256static SDValue 1257lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) { 1258 EVT ViaVecTy = ResTy; 1259 SmallVector<SDValue, 16> Ops; 1260 SDValue ImmHiOp; 1261 1262 if (ViaVecTy == MVT::v2i64) { 1263 ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, 1264 DAG.getConstant(31, MVT::i32)); 1265 for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) { 1266 Ops.push_back(ImmHiOp); 1267 Ops.push_back(ImmOp); 1268 } 1269 ViaVecTy = MVT::v4i32; 1270 } else { 1271 for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) 1272 Ops.push_back(ImmOp); 1273 } 1274 1275 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0], 1276 Ops.size()); 1277 1278 if (ResTy != ViaVecTy) 1279 Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); 1280 1281 return Result; 1282} 1283 1284static SDValue 1285lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { 1286 return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0), 1287 Op->getOperand(ImmOp), DAG); 1288} 1289 1290SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1291 SelectionDAG &DAG) const { 1292 SDLoc DL(Op); 1293 1294 switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { 1295 default: 1296 return SDValue(); 1297 case Intrinsic::mips_shilo: 1298 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1299 case Intrinsic::mips_dpau_h_qbl: 1300 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1301 case Intrinsic::mips_dpau_h_qbr: 1302 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1303 case Intrinsic::mips_dpsu_h_qbl: 1304 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1305 case Intrinsic::mips_dpsu_h_qbr: 1306 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1307 case Intrinsic::mips_dpa_w_ph: 1308 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1309 case Intrinsic::mips_dps_w_ph: 1310 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1311 case Intrinsic::mips_dpax_w_ph: 1312 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1313 case Intrinsic::mips_dpsx_w_ph: 1314 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1315 case Intrinsic::mips_mulsa_w_ph: 1316 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1317 case Intrinsic::mips_mult: 1318 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1319 case Intrinsic::mips_multu: 1320 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1321 case Intrinsic::mips_madd: 1322 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1323 case Intrinsic::mips_maddu: 1324 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1325 case Intrinsic::mips_msub: 1326 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1327 case Intrinsic::mips_msubu: 1328 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1329 case Intrinsic::mips_addv_b: 1330 case Intrinsic::mips_addv_h: 1331 case Intrinsic::mips_addv_w: 1332 case Intrinsic::mips_addv_d: 1333 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1334 Op->getOperand(2)); 1335 case Intrinsic::mips_addvi_b: 1336 case Intrinsic::mips_addvi_h: 1337 case Intrinsic::mips_addvi_w: 1338 case Intrinsic::mips_addvi_d: 1339 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1340 lowerMSASplatImm(Op, 2, DAG)); 1341 case Intrinsic::mips_and_v: 1342 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1343 Op->getOperand(2)); 1344 case Intrinsic::mips_andi_b: 1345 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1346 lowerMSASplatImm(Op, 2, DAG)); 1347 case Intrinsic::mips_binsli_b: 1348 case Intrinsic::mips_binsli_h: 1349 case Intrinsic::mips_binsli_w: 1350 case Intrinsic::mips_binsli_d: { 1351 EVT VecTy = Op->getValueType(0); 1352 EVT EltTy = VecTy.getVectorElementType(); 1353 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1354 Op->getConstantOperandVal(3)); 1355 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1356 DAG.getConstant(Mask, VecTy, true), Op->getOperand(1), 1357 Op->getOperand(2)); 1358 } 1359 case Intrinsic::mips_binsri_b: 1360 case Intrinsic::mips_binsri_h: 1361 case Intrinsic::mips_binsri_w: 1362 case Intrinsic::mips_binsri_d: { 1363 EVT VecTy = Op->getValueType(0); 1364 EVT EltTy = VecTy.getVectorElementType(); 1365 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1366 Op->getConstantOperandVal(3)); 1367 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1368 DAG.getConstant(Mask, VecTy, true), Op->getOperand(1), 1369 Op->getOperand(2)); 1370 } 1371 case Intrinsic::mips_bmnz_v: 1372 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1373 Op->getOperand(2), Op->getOperand(1)); 1374 case Intrinsic::mips_bmnzi_b: 1375 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1376 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1377 Op->getOperand(1)); 1378 case Intrinsic::mips_bmz_v: 1379 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1380 Op->getOperand(1), Op->getOperand(2)); 1381 case Intrinsic::mips_bmzi_b: 1382 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1383 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1384 Op->getOperand(2)); 1385 case Intrinsic::mips_bnz_b: 1386 case Intrinsic::mips_bnz_h: 1387 case Intrinsic::mips_bnz_w: 1388 case Intrinsic::mips_bnz_d: 1389 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1390 Op->getOperand(1)); 1391 case Intrinsic::mips_bnz_v: 1392 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1393 Op->getOperand(1)); 1394 case Intrinsic::mips_bsel_v: 1395 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1396 Op->getOperand(1), Op->getOperand(2), 1397 Op->getOperand(3)); 1398 case Intrinsic::mips_bseli_b: 1399 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1400 Op->getOperand(1), Op->getOperand(2), 1401 lowerMSASplatImm(Op, 3, DAG)); 1402 case Intrinsic::mips_bz_b: 1403 case Intrinsic::mips_bz_h: 1404 case Intrinsic::mips_bz_w: 1405 case Intrinsic::mips_bz_d: 1406 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1407 Op->getOperand(1)); 1408 case Intrinsic::mips_bz_v: 1409 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1410 Op->getOperand(1)); 1411 case Intrinsic::mips_ceq_b: 1412 case Intrinsic::mips_ceq_h: 1413 case Intrinsic::mips_ceq_w: 1414 case Intrinsic::mips_ceq_d: 1415 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1416 Op->getOperand(2), ISD::SETEQ); 1417 case Intrinsic::mips_ceqi_b: 1418 case Intrinsic::mips_ceqi_h: 1419 case Intrinsic::mips_ceqi_w: 1420 case Intrinsic::mips_ceqi_d: 1421 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1422 lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); 1423 case Intrinsic::mips_cle_s_b: 1424 case Intrinsic::mips_cle_s_h: 1425 case Intrinsic::mips_cle_s_w: 1426 case Intrinsic::mips_cle_s_d: 1427 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1428 Op->getOperand(2), ISD::SETLE); 1429 case Intrinsic::mips_clei_s_b: 1430 case Intrinsic::mips_clei_s_h: 1431 case Intrinsic::mips_clei_s_w: 1432 case Intrinsic::mips_clei_s_d: 1433 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1434 lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); 1435 case Intrinsic::mips_cle_u_b: 1436 case Intrinsic::mips_cle_u_h: 1437 case Intrinsic::mips_cle_u_w: 1438 case Intrinsic::mips_cle_u_d: 1439 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1440 Op->getOperand(2), ISD::SETULE); 1441 case Intrinsic::mips_clei_u_b: 1442 case Intrinsic::mips_clei_u_h: 1443 case Intrinsic::mips_clei_u_w: 1444 case Intrinsic::mips_clei_u_d: 1445 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1446 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1447 case Intrinsic::mips_clt_s_b: 1448 case Intrinsic::mips_clt_s_h: 1449 case Intrinsic::mips_clt_s_w: 1450 case Intrinsic::mips_clt_s_d: 1451 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1452 Op->getOperand(2), ISD::SETLT); 1453 case Intrinsic::mips_clti_s_b: 1454 case Intrinsic::mips_clti_s_h: 1455 case Intrinsic::mips_clti_s_w: 1456 case Intrinsic::mips_clti_s_d: 1457 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1458 lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); 1459 case Intrinsic::mips_clt_u_b: 1460 case Intrinsic::mips_clt_u_h: 1461 case Intrinsic::mips_clt_u_w: 1462 case Intrinsic::mips_clt_u_d: 1463 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1464 Op->getOperand(2), ISD::SETULT); 1465 case Intrinsic::mips_clti_u_b: 1466 case Intrinsic::mips_clti_u_h: 1467 case Intrinsic::mips_clti_u_w: 1468 case Intrinsic::mips_clti_u_d: 1469 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1470 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1471 case Intrinsic::mips_copy_s_b: 1472 case Intrinsic::mips_copy_s_h: 1473 case Intrinsic::mips_copy_s_w: 1474 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1475 case Intrinsic::mips_copy_s_d: 1476 // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal. 1477 // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type 1478 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1479 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1480 Op->getOperand(1), Op->getOperand(2)); 1481 case Intrinsic::mips_copy_u_b: 1482 case Intrinsic::mips_copy_u_h: 1483 case Intrinsic::mips_copy_u_w: 1484 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1485 case Intrinsic::mips_copy_u_d: 1486 // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal. 1487 // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type 1488 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1489 // 1490 // Note: When i64 is illegal, this results in copy_s.w instructions instead 1491 // of copy_u.w instructions. This makes no difference to the behaviour 1492 // since i64 is only illegal when the register file is 32-bit. 1493 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1494 Op->getOperand(1), Op->getOperand(2)); 1495 case Intrinsic::mips_div_s_b: 1496 case Intrinsic::mips_div_s_h: 1497 case Intrinsic::mips_div_s_w: 1498 case Intrinsic::mips_div_s_d: 1499 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1500 Op->getOperand(2)); 1501 case Intrinsic::mips_div_u_b: 1502 case Intrinsic::mips_div_u_h: 1503 case Intrinsic::mips_div_u_w: 1504 case Intrinsic::mips_div_u_d: 1505 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1506 Op->getOperand(2)); 1507 case Intrinsic::mips_fadd_w: 1508 case Intrinsic::mips_fadd_d: 1509 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1510 Op->getOperand(2)); 1511 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1512 case Intrinsic::mips_fceq_w: 1513 case Intrinsic::mips_fceq_d: 1514 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1515 Op->getOperand(2), ISD::SETOEQ); 1516 case Intrinsic::mips_fcle_w: 1517 case Intrinsic::mips_fcle_d: 1518 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1519 Op->getOperand(2), ISD::SETOLE); 1520 case Intrinsic::mips_fclt_w: 1521 case Intrinsic::mips_fclt_d: 1522 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1523 Op->getOperand(2), ISD::SETOLT); 1524 case Intrinsic::mips_fcne_w: 1525 case Intrinsic::mips_fcne_d: 1526 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1527 Op->getOperand(2), ISD::SETONE); 1528 case Intrinsic::mips_fcor_w: 1529 case Intrinsic::mips_fcor_d: 1530 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1531 Op->getOperand(2), ISD::SETO); 1532 case Intrinsic::mips_fcueq_w: 1533 case Intrinsic::mips_fcueq_d: 1534 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1535 Op->getOperand(2), ISD::SETUEQ); 1536 case Intrinsic::mips_fcule_w: 1537 case Intrinsic::mips_fcule_d: 1538 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1539 Op->getOperand(2), ISD::SETULE); 1540 case Intrinsic::mips_fcult_w: 1541 case Intrinsic::mips_fcult_d: 1542 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1543 Op->getOperand(2), ISD::SETULT); 1544 case Intrinsic::mips_fcun_w: 1545 case Intrinsic::mips_fcun_d: 1546 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1547 Op->getOperand(2), ISD::SETUO); 1548 case Intrinsic::mips_fcune_w: 1549 case Intrinsic::mips_fcune_d: 1550 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1551 Op->getOperand(2), ISD::SETUNE); 1552 case Intrinsic::mips_fdiv_w: 1553 case Intrinsic::mips_fdiv_d: 1554 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1555 Op->getOperand(2)); 1556 case Intrinsic::mips_ffint_u_w: 1557 case Intrinsic::mips_ffint_u_d: 1558 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1559 Op->getOperand(1)); 1560 case Intrinsic::mips_ffint_s_w: 1561 case Intrinsic::mips_ffint_s_d: 1562 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1563 Op->getOperand(1)); 1564 case Intrinsic::mips_fill_b: 1565 case Intrinsic::mips_fill_h: 1566 case Intrinsic::mips_fill_w: 1567 case Intrinsic::mips_fill_d: { 1568 SmallVector<SDValue, 16> Ops; 1569 EVT ResTy = Op->getValueType(0); 1570 1571 for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) 1572 Ops.push_back(Op->getOperand(1)); 1573 1574 // If ResTy is v2i64 then the type legalizer will break this node down into 1575 // an equivalent v4i32. 1576 return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size()); 1577 } 1578 case Intrinsic::mips_fexp2_w: 1579 case Intrinsic::mips_fexp2_d: { 1580 EVT ResTy = Op->getValueType(0); 1581 return DAG.getNode( 1582 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1583 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1584 } 1585 case Intrinsic::mips_flog2_w: 1586 case Intrinsic::mips_flog2_d: 1587 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1588 case Intrinsic::mips_fmadd_w: 1589 case Intrinsic::mips_fmadd_d: 1590 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1591 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1592 case Intrinsic::mips_fmul_w: 1593 case Intrinsic::mips_fmul_d: 1594 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1595 Op->getOperand(2)); 1596 case Intrinsic::mips_fmsub_w: 1597 case Intrinsic::mips_fmsub_d: { 1598 EVT ResTy = Op->getValueType(0); 1599 return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), 1600 DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, 1601 Op->getOperand(2), Op->getOperand(3))); 1602 } 1603 case Intrinsic::mips_frint_w: 1604 case Intrinsic::mips_frint_d: 1605 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1606 case Intrinsic::mips_fsqrt_w: 1607 case Intrinsic::mips_fsqrt_d: 1608 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1609 case Intrinsic::mips_fsub_w: 1610 case Intrinsic::mips_fsub_d: 1611 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1612 Op->getOperand(2)); 1613 case Intrinsic::mips_ftrunc_u_w: 1614 case Intrinsic::mips_ftrunc_u_d: 1615 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1616 Op->getOperand(1)); 1617 case Intrinsic::mips_ftrunc_s_w: 1618 case Intrinsic::mips_ftrunc_s_d: 1619 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1620 Op->getOperand(1)); 1621 case Intrinsic::mips_ilvev_b: 1622 case Intrinsic::mips_ilvev_h: 1623 case Intrinsic::mips_ilvev_w: 1624 case Intrinsic::mips_ilvev_d: 1625 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1626 Op->getOperand(1), Op->getOperand(2)); 1627 case Intrinsic::mips_ilvl_b: 1628 case Intrinsic::mips_ilvl_h: 1629 case Intrinsic::mips_ilvl_w: 1630 case Intrinsic::mips_ilvl_d: 1631 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1632 Op->getOperand(1), Op->getOperand(2)); 1633 case Intrinsic::mips_ilvod_b: 1634 case Intrinsic::mips_ilvod_h: 1635 case Intrinsic::mips_ilvod_w: 1636 case Intrinsic::mips_ilvod_d: 1637 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1638 Op->getOperand(1), Op->getOperand(2)); 1639 case Intrinsic::mips_ilvr_b: 1640 case Intrinsic::mips_ilvr_h: 1641 case Intrinsic::mips_ilvr_w: 1642 case Intrinsic::mips_ilvr_d: 1643 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1644 Op->getOperand(1), Op->getOperand(2)); 1645 case Intrinsic::mips_insert_b: 1646 case Intrinsic::mips_insert_h: 1647 case Intrinsic::mips_insert_w: 1648 case Intrinsic::mips_insert_d: 1649 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1650 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1651 case Intrinsic::mips_ldi_b: 1652 case Intrinsic::mips_ldi_h: 1653 case Intrinsic::mips_ldi_w: 1654 case Intrinsic::mips_ldi_d: 1655 return lowerMSASplatImm(Op, 1, DAG); 1656 case Intrinsic::mips_lsa: { 1657 EVT ResTy = Op->getValueType(0); 1658 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1659 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1660 Op->getOperand(2), Op->getOperand(3))); 1661 } 1662 case Intrinsic::mips_maddv_b: 1663 case Intrinsic::mips_maddv_h: 1664 case Intrinsic::mips_maddv_w: 1665 case Intrinsic::mips_maddv_d: { 1666 EVT ResTy = Op->getValueType(0); 1667 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1668 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1669 Op->getOperand(2), Op->getOperand(3))); 1670 } 1671 case Intrinsic::mips_max_s_b: 1672 case Intrinsic::mips_max_s_h: 1673 case Intrinsic::mips_max_s_w: 1674 case Intrinsic::mips_max_s_d: 1675 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1676 Op->getOperand(1), Op->getOperand(2)); 1677 case Intrinsic::mips_max_u_b: 1678 case Intrinsic::mips_max_u_h: 1679 case Intrinsic::mips_max_u_w: 1680 case Intrinsic::mips_max_u_d: 1681 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1682 Op->getOperand(1), Op->getOperand(2)); 1683 case Intrinsic::mips_maxi_s_b: 1684 case Intrinsic::mips_maxi_s_h: 1685 case Intrinsic::mips_maxi_s_w: 1686 case Intrinsic::mips_maxi_s_d: 1687 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1688 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1689 case Intrinsic::mips_maxi_u_b: 1690 case Intrinsic::mips_maxi_u_h: 1691 case Intrinsic::mips_maxi_u_w: 1692 case Intrinsic::mips_maxi_u_d: 1693 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1694 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1695 case Intrinsic::mips_min_s_b: 1696 case Intrinsic::mips_min_s_h: 1697 case Intrinsic::mips_min_s_w: 1698 case Intrinsic::mips_min_s_d: 1699 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1700 Op->getOperand(1), Op->getOperand(2)); 1701 case Intrinsic::mips_min_u_b: 1702 case Intrinsic::mips_min_u_h: 1703 case Intrinsic::mips_min_u_w: 1704 case Intrinsic::mips_min_u_d: 1705 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1706 Op->getOperand(1), Op->getOperand(2)); 1707 case Intrinsic::mips_mini_s_b: 1708 case Intrinsic::mips_mini_s_h: 1709 case Intrinsic::mips_mini_s_w: 1710 case Intrinsic::mips_mini_s_d: 1711 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1712 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1713 case Intrinsic::mips_mini_u_b: 1714 case Intrinsic::mips_mini_u_h: 1715 case Intrinsic::mips_mini_u_w: 1716 case Intrinsic::mips_mini_u_d: 1717 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1718 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1719 case Intrinsic::mips_mod_s_b: 1720 case Intrinsic::mips_mod_s_h: 1721 case Intrinsic::mips_mod_s_w: 1722 case Intrinsic::mips_mod_s_d: 1723 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 1724 Op->getOperand(2)); 1725 case Intrinsic::mips_mod_u_b: 1726 case Intrinsic::mips_mod_u_h: 1727 case Intrinsic::mips_mod_u_w: 1728 case Intrinsic::mips_mod_u_d: 1729 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 1730 Op->getOperand(2)); 1731 case Intrinsic::mips_mulv_b: 1732 case Intrinsic::mips_mulv_h: 1733 case Intrinsic::mips_mulv_w: 1734 case Intrinsic::mips_mulv_d: 1735 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 1736 Op->getOperand(2)); 1737 case Intrinsic::mips_msubv_b: 1738 case Intrinsic::mips_msubv_h: 1739 case Intrinsic::mips_msubv_w: 1740 case Intrinsic::mips_msubv_d: { 1741 EVT ResTy = Op->getValueType(0); 1742 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 1743 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1744 Op->getOperand(2), Op->getOperand(3))); 1745 } 1746 case Intrinsic::mips_nlzc_b: 1747 case Intrinsic::mips_nlzc_h: 1748 case Intrinsic::mips_nlzc_w: 1749 case Intrinsic::mips_nlzc_d: 1750 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 1751 case Intrinsic::mips_nor_v: { 1752 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1753 Op->getOperand(1), Op->getOperand(2)); 1754 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1755 } 1756 case Intrinsic::mips_nori_b: { 1757 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1758 Op->getOperand(1), 1759 lowerMSASplatImm(Op, 2, DAG)); 1760 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1761 } 1762 case Intrinsic::mips_or_v: 1763 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 1764 Op->getOperand(2)); 1765 case Intrinsic::mips_ori_b: 1766 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1767 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1768 case Intrinsic::mips_pckev_b: 1769 case Intrinsic::mips_pckev_h: 1770 case Intrinsic::mips_pckev_w: 1771 case Intrinsic::mips_pckev_d: 1772 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 1773 Op->getOperand(1), Op->getOperand(2)); 1774 case Intrinsic::mips_pckod_b: 1775 case Intrinsic::mips_pckod_h: 1776 case Intrinsic::mips_pckod_w: 1777 case Intrinsic::mips_pckod_d: 1778 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 1779 Op->getOperand(1), Op->getOperand(2)); 1780 case Intrinsic::mips_pcnt_b: 1781 case Intrinsic::mips_pcnt_h: 1782 case Intrinsic::mips_pcnt_w: 1783 case Intrinsic::mips_pcnt_d: 1784 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 1785 case Intrinsic::mips_shf_b: 1786 case Intrinsic::mips_shf_h: 1787 case Intrinsic::mips_shf_w: 1788 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 1789 Op->getOperand(2), Op->getOperand(1)); 1790 case Intrinsic::mips_sll_b: 1791 case Intrinsic::mips_sll_h: 1792 case Intrinsic::mips_sll_w: 1793 case Intrinsic::mips_sll_d: 1794 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 1795 Op->getOperand(2)); 1796 case Intrinsic::mips_slli_b: 1797 case Intrinsic::mips_slli_h: 1798 case Intrinsic::mips_slli_w: 1799 case Intrinsic::mips_slli_d: 1800 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 1801 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1802 case Intrinsic::mips_splat_b: 1803 case Intrinsic::mips_splat_h: 1804 case Intrinsic::mips_splat_w: 1805 case Intrinsic::mips_splat_d: 1806 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 1807 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 1808 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 1809 // Instead we lower to MipsISD::VSHF and match from there. 1810 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 1811 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 1812 Op->getOperand(1)); 1813 case Intrinsic::mips_splati_b: 1814 case Intrinsic::mips_splati_h: 1815 case Intrinsic::mips_splati_w: 1816 case Intrinsic::mips_splati_d: 1817 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 1818 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 1819 Op->getOperand(1)); 1820 case Intrinsic::mips_sra_b: 1821 case Intrinsic::mips_sra_h: 1822 case Intrinsic::mips_sra_w: 1823 case Intrinsic::mips_sra_d: 1824 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 1825 Op->getOperand(2)); 1826 case Intrinsic::mips_srai_b: 1827 case Intrinsic::mips_srai_h: 1828 case Intrinsic::mips_srai_w: 1829 case Intrinsic::mips_srai_d: 1830 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 1831 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1832 case Intrinsic::mips_srl_b: 1833 case Intrinsic::mips_srl_h: 1834 case Intrinsic::mips_srl_w: 1835 case Intrinsic::mips_srl_d: 1836 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 1837 Op->getOperand(2)); 1838 case Intrinsic::mips_srli_b: 1839 case Intrinsic::mips_srli_h: 1840 case Intrinsic::mips_srli_w: 1841 case Intrinsic::mips_srli_d: 1842 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 1843 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1844 case Intrinsic::mips_subv_b: 1845 case Intrinsic::mips_subv_h: 1846 case Intrinsic::mips_subv_w: 1847 case Intrinsic::mips_subv_d: 1848 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 1849 Op->getOperand(2)); 1850 case Intrinsic::mips_subvi_b: 1851 case Intrinsic::mips_subvi_h: 1852 case Intrinsic::mips_subvi_w: 1853 case Intrinsic::mips_subvi_d: 1854 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 1855 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1856 case Intrinsic::mips_vshf_b: 1857 case Intrinsic::mips_vshf_h: 1858 case Intrinsic::mips_vshf_w: 1859 case Intrinsic::mips_vshf_d: 1860 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 1861 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1862 case Intrinsic::mips_xor_v: 1863 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 1864 Op->getOperand(2)); 1865 case Intrinsic::mips_xori_b: 1866 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 1867 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1868 } 1869} 1870 1871static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 1872 SDLoc DL(Op); 1873 SDValue ChainIn = Op->getOperand(0); 1874 SDValue Address = Op->getOperand(2); 1875 SDValue Offset = Op->getOperand(3); 1876 EVT ResTy = Op->getValueType(0); 1877 EVT PtrTy = Address->getValueType(0); 1878 1879 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 1880 1881 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, 1882 false, false, 16); 1883} 1884 1885SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 1886 SelectionDAG &DAG) const { 1887 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 1888 switch (Intr) { 1889 default: 1890 return SDValue(); 1891 case Intrinsic::mips_extp: 1892 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 1893 case Intrinsic::mips_extpdp: 1894 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 1895 case Intrinsic::mips_extr_w: 1896 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 1897 case Intrinsic::mips_extr_r_w: 1898 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 1899 case Intrinsic::mips_extr_rs_w: 1900 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 1901 case Intrinsic::mips_extr_s_h: 1902 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 1903 case Intrinsic::mips_mthlip: 1904 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 1905 case Intrinsic::mips_mulsaq_s_w_ph: 1906 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 1907 case Intrinsic::mips_maq_s_w_phl: 1908 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 1909 case Intrinsic::mips_maq_s_w_phr: 1910 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 1911 case Intrinsic::mips_maq_sa_w_phl: 1912 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 1913 case Intrinsic::mips_maq_sa_w_phr: 1914 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 1915 case Intrinsic::mips_dpaq_s_w_ph: 1916 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 1917 case Intrinsic::mips_dpsq_s_w_ph: 1918 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 1919 case Intrinsic::mips_dpaq_sa_l_w: 1920 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 1921 case Intrinsic::mips_dpsq_sa_l_w: 1922 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 1923 case Intrinsic::mips_dpaqx_s_w_ph: 1924 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 1925 case Intrinsic::mips_dpaqx_sa_w_ph: 1926 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 1927 case Intrinsic::mips_dpsqx_s_w_ph: 1928 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 1929 case Intrinsic::mips_dpsqx_sa_w_ph: 1930 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 1931 case Intrinsic::mips_ld_b: 1932 case Intrinsic::mips_ld_h: 1933 case Intrinsic::mips_ld_w: 1934 case Intrinsic::mips_ld_d: 1935 return lowerMSALoadIntr(Op, DAG, Intr); 1936 } 1937} 1938 1939static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 1940 SDLoc DL(Op); 1941 SDValue ChainIn = Op->getOperand(0); 1942 SDValue Value = Op->getOperand(2); 1943 SDValue Address = Op->getOperand(3); 1944 SDValue Offset = Op->getOperand(4); 1945 EVT PtrTy = Address->getValueType(0); 1946 1947 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 1948 1949 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, 1950 false, 16); 1951} 1952 1953SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 1954 SelectionDAG &DAG) const { 1955 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 1956 switch (Intr) { 1957 default: 1958 return SDValue(); 1959 case Intrinsic::mips_st_b: 1960 case Intrinsic::mips_st_h: 1961 case Intrinsic::mips_st_w: 1962 case Intrinsic::mips_st_d: 1963 return lowerMSAStoreIntr(Op, DAG, Intr); 1964 } 1965} 1966 1967/// \brief Check if the given BuildVectorSDNode is a splat. 1968/// This method currently relies on DAG nodes being reused when equivalent, 1969/// so it's possible for this to return false even when isConstantSplat returns 1970/// true. 1971static bool isSplatVector(const BuildVectorSDNode *N) { 1972 unsigned int nOps = N->getNumOperands(); 1973 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 1974 1975 SDValue Operand0 = N->getOperand(0); 1976 1977 for (unsigned int i = 1; i < nOps; ++i) { 1978 if (N->getOperand(i) != Operand0) 1979 return false; 1980 } 1981 1982 return true; 1983} 1984 1985// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 1986// 1987// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 1988// choose to sign-extend but we could have equally chosen zero-extend. The 1989// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 1990// result into this node later (possibly changing it to a zero-extend in the 1991// process). 1992SDValue MipsSETargetLowering:: 1993lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 1994 SDLoc DL(Op); 1995 EVT ResTy = Op->getValueType(0); 1996 SDValue Op0 = Op->getOperand(0); 1997 EVT VecTy = Op0->getValueType(0); 1998 1999 if (!VecTy.is128BitVector()) 2000 return SDValue(); 2001 2002 if (ResTy.isInteger()) { 2003 SDValue Op1 = Op->getOperand(1); 2004 EVT EltTy = VecTy.getVectorElementType(); 2005 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2006 DAG.getValueType(EltTy)); 2007 } 2008 2009 return Op; 2010} 2011 2012static bool isConstantOrUndef(const SDValue Op) { 2013 if (Op->getOpcode() == ISD::UNDEF) 2014 return true; 2015 if (dyn_cast<ConstantSDNode>(Op)) 2016 return true; 2017 if (dyn_cast<ConstantFPSDNode>(Op)) 2018 return true; 2019 return false; 2020} 2021 2022static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2023 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2024 if (isConstantOrUndef(Op->getOperand(i))) 2025 return true; 2026 return false; 2027} 2028 2029// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2030// backend. 2031// 2032// Lowers according to the following rules: 2033// - Constant splats are legal as-is as long as the SplatBitSize is a power of 2034// 2 less than or equal to 64 and the value fits into a signed 10-bit 2035// immediate 2036// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2037// is a power of 2 less than or equal to 64 and the value does not fit into a 2038// signed 10-bit immediate 2039// - Non-constant splats are legal as-is. 2040// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2041// - All others are illegal and must be expanded. 2042SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2043 SelectionDAG &DAG) const { 2044 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2045 EVT ResTy = Op->getValueType(0); 2046 SDLoc DL(Op); 2047 APInt SplatValue, SplatUndef; 2048 unsigned SplatBitSize; 2049 bool HasAnyUndefs; 2050 2051 if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) 2052 return SDValue(); 2053 2054 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2055 HasAnyUndefs, 8, 2056 !Subtarget->isLittle()) && SplatBitSize <= 64) { 2057 // We can only cope with 8, 16, 32, or 64-bit elements 2058 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2059 SplatBitSize != 64) 2060 return SDValue(); 2061 2062 // If the value fits into a simm10 then we can use ldi.[bhwd] 2063 if (SplatValue.isSignedIntN(10)) 2064 return Op; 2065 2066 EVT ViaVecTy; 2067 2068 switch (SplatBitSize) { 2069 default: 2070 return SDValue(); 2071 case 8: 2072 ViaVecTy = MVT::v16i8; 2073 break; 2074 case 16: 2075 ViaVecTy = MVT::v8i16; 2076 break; 2077 case 32: 2078 ViaVecTy = MVT::v4i32; 2079 break; 2080 case 64: 2081 // There's no fill.d to fall back on for 64-bit values 2082 return SDValue(); 2083 } 2084 2085 SmallVector<SDValue, 16> Ops; 2086 SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32); 2087 2088 for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) 2089 Ops.push_back(Constant); 2090 2091 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy, 2092 &Ops[0], Ops.size()); 2093 2094 if (ViaVecTy != ResTy) 2095 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2096 2097 return Result; 2098 } else if (isSplatVector(Node)) 2099 return Op; 2100 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2101 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2102 // The resulting code is the same length as the expansion, but it doesn't 2103 // use memory operations 2104 EVT ResTy = Node->getValueType(0); 2105 2106 assert(ResTy.isVector()); 2107 2108 unsigned NumElts = ResTy.getVectorNumElements(); 2109 SDValue Vector = DAG.getUNDEF(ResTy); 2110 for (unsigned i = 0; i < NumElts; ++i) { 2111 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2112 Node->getOperand(i), 2113 DAG.getConstant(i, MVT::i32)); 2114 } 2115 return Vector; 2116 } 2117 2118 return SDValue(); 2119} 2120 2121// Lower VECTOR_SHUFFLE into SHF (if possible). 2122// 2123// SHF splits the vector into blocks of four elements, then shuffles these 2124// elements according to a <4 x i2> constant (encoded as an integer immediate). 2125// 2126// It is therefore possible to lower into SHF when the mask takes the form: 2127// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2128// When undef's appear they are treated as if they were whatever value is 2129// necessary in order to fit the above form. 2130// 2131// For example: 2132// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2133// <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2134// i32 7, i32 6, i32 5, i32 4> 2135// is lowered to: 2136// (SHF_H $w0, $w1, 27) 2137// where the 27 comes from: 2138// 3 + (2 << 2) + (1 << 4) + (0 << 6) 2139static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2140 SmallVector<int, 16> Indices, 2141 SelectionDAG &DAG) { 2142 int SHFIndices[4] = { -1, -1, -1, -1 }; 2143 2144 if (Indices.size() < 4) 2145 return SDValue(); 2146 2147 for (unsigned i = 0; i < 4; ++i) { 2148 for (unsigned j = i; j < Indices.size(); j += 4) { 2149 int Idx = Indices[j]; 2150 2151 // Convert from vector index to 4-element subvector index 2152 // If an index refers to an element outside of the subvector then give up 2153 if (Idx != -1) { 2154 Idx -= 4 * (j / 4); 2155 if (Idx < 0 || Idx >= 4) 2156 return SDValue(); 2157 } 2158 2159 // If the mask has an undef, replace it with the current index. 2160 // Note that it might still be undef if the current index is also undef 2161 if (SHFIndices[i] == -1) 2162 SHFIndices[i] = Idx; 2163 2164 // Check that non-undef values are the same as in the mask. If they 2165 // aren't then give up 2166 if (!(Idx == -1 || Idx == SHFIndices[i])) 2167 return SDValue(); 2168 } 2169 } 2170 2171 // Calculate the immediate. Replace any remaining undefs with zero 2172 APInt Imm(32, 0); 2173 for (int i = 3; i >= 0; --i) { 2174 int Idx = SHFIndices[i]; 2175 2176 if (Idx == -1) 2177 Idx = 0; 2178 2179 Imm <<= 2; 2180 Imm |= Idx & 0x3; 2181 } 2182 2183 return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, 2184 DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); 2185} 2186 2187// Lower VECTOR_SHUFFLE into ILVEV (if possible). 2188// 2189// ILVEV interleaves the even elements from each vector. 2190// 2191// It is possible to lower into ILVEV when the mask takes the form: 2192// <0, n, 2, n+2, 4, n+4, ...> 2193// where n is the number of elements in the vector. 2194// 2195// When undef's appear in the mask they are treated as if they were whatever 2196// value is necessary in order to fit the above form. 2197static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2198 SmallVector<int, 16> Indices, 2199 SelectionDAG &DAG) { 2200 assert ((Indices.size() % 2) == 0); 2201 int WsIdx = 0; 2202 int WtIdx = ResTy.getVectorNumElements(); 2203 2204 for (unsigned i = 0; i < Indices.size(); i += 2) { 2205 if (Indices[i] != -1 && Indices[i] != WsIdx) 2206 return SDValue(); 2207 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2208 return SDValue(); 2209 WsIdx += 2; 2210 WtIdx += 2; 2211 } 2212 2213 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), 2214 Op->getOperand(1)); 2215} 2216 2217// Lower VECTOR_SHUFFLE into ILVOD (if possible). 2218// 2219// ILVOD interleaves the odd elements from each vector. 2220// 2221// It is possible to lower into ILVOD when the mask takes the form: 2222// <1, n+1, 3, n+3, 5, n+5, ...> 2223// where n is the number of elements in the vector. 2224// 2225// When undef's appear in the mask they are treated as if they were whatever 2226// value is necessary in order to fit the above form. 2227static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2228 SmallVector<int, 16> Indices, 2229 SelectionDAG &DAG) { 2230 assert ((Indices.size() % 2) == 0); 2231 int WsIdx = 1; 2232 int WtIdx = ResTy.getVectorNumElements() + 1; 2233 2234 for (unsigned i = 0; i < Indices.size(); i += 2) { 2235 if (Indices[i] != -1 && Indices[i] != WsIdx) 2236 return SDValue(); 2237 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2238 return SDValue(); 2239 WsIdx += 2; 2240 WtIdx += 2; 2241 } 2242 2243 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), 2244 Op->getOperand(1)); 2245} 2246 2247// Lower VECTOR_SHUFFLE into ILVL (if possible). 2248// 2249// ILVL interleaves consecutive elements from the left half of each vector. 2250// 2251// It is possible to lower into ILVL when the mask takes the form: 2252// <0, n, 1, n+1, 2, n+2, ...> 2253// where n is the number of elements in the vector. 2254// 2255// When undef's appear in the mask they are treated as if they were whatever 2256// value is necessary in order to fit the above form. 2257static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2258 SmallVector<int, 16> Indices, 2259 SelectionDAG &DAG) { 2260 assert ((Indices.size() % 2) == 0); 2261 int WsIdx = 0; 2262 int WtIdx = ResTy.getVectorNumElements(); 2263 2264 for (unsigned i = 0; i < Indices.size(); i += 2) { 2265 if (Indices[i] != -1 && Indices[i] != WsIdx) 2266 return SDValue(); 2267 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2268 return SDValue(); 2269 WsIdx ++; 2270 WtIdx ++; 2271 } 2272 2273 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), 2274 Op->getOperand(1)); 2275} 2276 2277// Lower VECTOR_SHUFFLE into ILVR (if possible). 2278// 2279// ILVR interleaves consecutive elements from the right half of each vector. 2280// 2281// It is possible to lower into ILVR when the mask takes the form: 2282// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2283// where n is the number of elements in the vector and x is half n. 2284// 2285// When undef's appear in the mask they are treated as if they were whatever 2286// value is necessary in order to fit the above form. 2287static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2288 SmallVector<int, 16> Indices, 2289 SelectionDAG &DAG) { 2290 assert ((Indices.size() % 2) == 0); 2291 unsigned NumElts = ResTy.getVectorNumElements(); 2292 int WsIdx = NumElts / 2; 2293 int WtIdx = NumElts + NumElts / 2; 2294 2295 for (unsigned i = 0; i < Indices.size(); i += 2) { 2296 if (Indices[i] != -1 && Indices[i] != WsIdx) 2297 return SDValue(); 2298 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2299 return SDValue(); 2300 WsIdx ++; 2301 WtIdx ++; 2302 } 2303 2304 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), 2305 Op->getOperand(1)); 2306} 2307 2308// Lower VECTOR_SHUFFLE into PCKEV (if possible). 2309// 2310// PCKEV copies the even elements of each vector into the result vector. 2311// 2312// It is possible to lower into PCKEV when the mask takes the form: 2313// <0, 2, 4, ..., n, n+2, n+4, ...> 2314// where n is the number of elements in the vector. 2315// 2316// When undef's appear in the mask they are treated as if they were whatever 2317// value is necessary in order to fit the above form. 2318static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2319 SmallVector<int, 16> Indices, 2320 SelectionDAG &DAG) { 2321 assert ((Indices.size() % 2) == 0); 2322 int Idx = 0; 2323 2324 for (unsigned i = 0; i < Indices.size(); ++i) { 2325 if (Indices[i] != -1 && Indices[i] != Idx) 2326 return SDValue(); 2327 Idx += 2; 2328 } 2329 2330 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), 2331 Op->getOperand(1)); 2332} 2333 2334// Lower VECTOR_SHUFFLE into PCKOD (if possible). 2335// 2336// PCKOD copies the odd elements of each vector into the result vector. 2337// 2338// It is possible to lower into PCKOD when the mask takes the form: 2339// <1, 3, 5, ..., n+1, n+3, n+5, ...> 2340// where n is the number of elements in the vector. 2341// 2342// When undef's appear in the mask they are treated as if they were whatever 2343// value is necessary in order to fit the above form. 2344static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2345 SmallVector<int, 16> Indices, 2346 SelectionDAG &DAG) { 2347 assert ((Indices.size() % 2) == 0); 2348 int Idx = 1; 2349 2350 for (unsigned i = 0; i < Indices.size(); ++i) { 2351 if (Indices[i] != -1 && Indices[i] != Idx) 2352 return SDValue(); 2353 Idx += 2; 2354 } 2355 2356 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), 2357 Op->getOperand(1)); 2358} 2359 2360// Lower VECTOR_SHUFFLE into VSHF. 2361// 2362// This mostly consists of converting the shuffle indices in Indices into a 2363// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2364// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2365// if the type is v8i16 and all the indices are less than 8 then the second 2366// operand is unused and can be replaced with anything. We choose to replace it 2367// with the used operand since this reduces the number of instructions overall. 2368static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2369 SmallVector<int, 16> Indices, 2370 SelectionDAG &DAG) { 2371 SmallVector<SDValue, 16> Ops; 2372 SDValue Op0; 2373 SDValue Op1; 2374 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2375 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2376 bool Using1stVec = false; 2377 bool Using2ndVec = false; 2378 SDLoc DL(Op); 2379 int ResTyNumElts = ResTy.getVectorNumElements(); 2380 2381 for (int i = 0; i < ResTyNumElts; ++i) { 2382 // Idx == -1 means UNDEF 2383 int Idx = Indices[i]; 2384 2385 if (0 <= Idx && Idx < ResTyNumElts) 2386 Using1stVec = true; 2387 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2388 Using2ndVec = true; 2389 } 2390 2391 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2392 ++I) 2393 Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); 2394 2395 SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0], 2396 Ops.size()); 2397 2398 if (Using1stVec && Using2ndVec) { 2399 Op0 = Op->getOperand(0); 2400 Op1 = Op->getOperand(1); 2401 } else if (Using1stVec) 2402 Op0 = Op1 = Op->getOperand(0); 2403 else if (Using2ndVec) 2404 Op0 = Op1 = Op->getOperand(1); 2405 else 2406 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2407 2408 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1); 2409} 2410 2411// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2412// indices in the shuffle. 2413SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2414 SelectionDAG &DAG) const { 2415 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2416 EVT ResTy = Op->getValueType(0); 2417 2418 if (!ResTy.is128BitVector()) 2419 return SDValue(); 2420 2421 int ResTyNumElts = ResTy.getVectorNumElements(); 2422 SmallVector<int, 16> Indices; 2423 2424 for (int i = 0; i < ResTyNumElts; ++i) 2425 Indices.push_back(Node->getMaskElt(i)); 2426 2427 SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); 2428 if (Result.getNode()) 2429 return Result; 2430 Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); 2431 if (Result.getNode()) 2432 return Result; 2433 Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); 2434 if (Result.getNode()) 2435 return Result; 2436 Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); 2437 if (Result.getNode()) 2438 return Result; 2439 Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); 2440 if (Result.getNode()) 2441 return Result; 2442 Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); 2443 if (Result.getNode()) 2444 return Result; 2445 Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); 2446 if (Result.getNode()) 2447 return Result; 2448 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2449} 2450 2451MachineBasicBlock * MipsSETargetLowering:: 2452emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ 2453 // $bb: 2454 // bposge32_pseudo $vr0 2455 // => 2456 // $bb: 2457 // bposge32 $tbb 2458 // $fbb: 2459 // li $vr2, 0 2460 // b $sink 2461 // $tbb: 2462 // li $vr1, 1 2463 // $sink: 2464 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 2465 2466 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2467 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2468 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2469 DebugLoc DL = MI->getDebugLoc(); 2470 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2471 MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); 2472 MachineFunction *F = BB->getParent(); 2473 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2474 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2475 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2476 F->insert(It, FBB); 2477 F->insert(It, TBB); 2478 F->insert(It, Sink); 2479 2480 // Transfer the remainder of BB and its successor edges to Sink. 2481 Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), 2482 BB->end()); 2483 Sink->transferSuccessorsAndUpdatePHIs(BB); 2484 2485 // Add successors. 2486 BB->addSuccessor(FBB); 2487 BB->addSuccessor(TBB); 2488 FBB->addSuccessor(Sink); 2489 TBB->addSuccessor(Sink); 2490 2491 // Insert the real bposge32 instruction to $BB. 2492 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 2493 2494 // Fill $FBB. 2495 unsigned VR2 = RegInfo.createVirtualRegister(RC); 2496 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 2497 .addReg(Mips::ZERO).addImm(0); 2498 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2499 2500 // Fill $TBB. 2501 unsigned VR1 = RegInfo.createVirtualRegister(RC); 2502 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 2503 .addReg(Mips::ZERO).addImm(1); 2504 2505 // Insert phi function to $Sink. 2506 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2507 MI->getOperand(0).getReg()) 2508 .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); 2509 2510 MI->eraseFromParent(); // The pseudo instruction is gone now. 2511 return Sink; 2512} 2513 2514MachineBasicBlock * MipsSETargetLowering:: 2515emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, 2516 unsigned BranchOp) const{ 2517 // $bb: 2518 // vany_nonzero $rd, $ws 2519 // => 2520 // $bb: 2521 // bnz.b $ws, $tbb 2522 // b $fbb 2523 // $fbb: 2524 // li $rd1, 0 2525 // b $sink 2526 // $tbb: 2527 // li $rd2, 1 2528 // $sink: 2529 // $rd = phi($rd1, $fbb, $rd2, $tbb) 2530 2531 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2532 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2533 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2534 DebugLoc DL = MI->getDebugLoc(); 2535 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2536 MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); 2537 MachineFunction *F = BB->getParent(); 2538 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2539 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2540 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2541 F->insert(It, FBB); 2542 F->insert(It, TBB); 2543 F->insert(It, Sink); 2544 2545 // Transfer the remainder of BB and its successor edges to Sink. 2546 Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), 2547 BB->end()); 2548 Sink->transferSuccessorsAndUpdatePHIs(BB); 2549 2550 // Add successors. 2551 BB->addSuccessor(FBB); 2552 BB->addSuccessor(TBB); 2553 FBB->addSuccessor(Sink); 2554 TBB->addSuccessor(Sink); 2555 2556 // Insert the real bnz.b instruction to $BB. 2557 BuildMI(BB, DL, TII->get(BranchOp)) 2558 .addReg(MI->getOperand(1).getReg()) 2559 .addMBB(TBB); 2560 2561 // Fill $FBB. 2562 unsigned RD1 = RegInfo.createVirtualRegister(RC); 2563 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 2564 .addReg(Mips::ZERO).addImm(0); 2565 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2566 2567 // Fill $TBB. 2568 unsigned RD2 = RegInfo.createVirtualRegister(RC); 2569 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 2570 .addReg(Mips::ZERO).addImm(1); 2571 2572 // Insert phi function to $Sink. 2573 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2574 MI->getOperand(0).getReg()) 2575 .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB); 2576 2577 MI->eraseFromParent(); // The pseudo instruction is gone now. 2578 return Sink; 2579} 2580 2581// Emit the COPY_FW pseudo instruction. 2582// 2583// copy_fw_pseudo $fd, $ws, n 2584// => 2585// copy_u_w $rt, $ws, $n 2586// mtc1 $rt, $fd 2587// 2588// When n is zero, the equivalent operation can be performed with (potentially) 2589// zero instructions due to register overlaps. This optimization is never valid 2590// for lane 1 because it would require FR=0 mode which isn't supported by MSA. 2591MachineBasicBlock * MipsSETargetLowering:: 2592emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ 2593 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2594 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2595 DebugLoc DL = MI->getDebugLoc(); 2596 unsigned Fd = MI->getOperand(0).getReg(); 2597 unsigned Ws = MI->getOperand(1).getReg(); 2598 unsigned Lane = MI->getOperand(2).getImm(); 2599 2600 if (Lane == 0) 2601 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); 2602 else { 2603 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2604 2605 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(1); 2606 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 2607 } 2608 2609 MI->eraseFromParent(); // The pseudo instruction is gone now. 2610 return BB; 2611} 2612 2613// Emit the COPY_FD pseudo instruction. 2614// 2615// copy_fd_pseudo $fd, $ws, n 2616// => 2617// splati.d $wt, $ws, $n 2618// copy $fd, $wt:sub_64 2619// 2620// When n is zero, the equivalent operation can be performed with (potentially) 2621// zero instructions due to register overlaps. This optimization is always 2622// valid because FR=1 mode which is the only supported mode in MSA. 2623MachineBasicBlock * MipsSETargetLowering:: 2624emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ 2625 assert(Subtarget->isFP64bit()); 2626 2627 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2628 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2629 unsigned Fd = MI->getOperand(0).getReg(); 2630 unsigned Ws = MI->getOperand(1).getReg(); 2631 unsigned Lane = MI->getOperand(2).getImm() * 2; 2632 DebugLoc DL = MI->getDebugLoc(); 2633 2634 if (Lane == 0) 2635 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 2636 else { 2637 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2638 2639 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 2640 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 2641 } 2642 2643 MI->eraseFromParent(); // The pseudo instruction is gone now. 2644 return BB; 2645} 2646 2647// Emit the INSERT_FW pseudo instruction. 2648// 2649// insert_fw_pseudo $wd, $wd_in, $n, $fs 2650// => 2651// subreg_to_reg $wt:sub_lo, $fs 2652// insve_w $wd[$n], $wd_in, $wt[0] 2653MachineBasicBlock * 2654MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, 2655 MachineBasicBlock *BB) const { 2656 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2657 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2658 DebugLoc DL = MI->getDebugLoc(); 2659 unsigned Wd = MI->getOperand(0).getReg(); 2660 unsigned Wd_in = MI->getOperand(1).getReg(); 2661 unsigned Lane = MI->getOperand(2).getImm(); 2662 unsigned Fs = MI->getOperand(3).getReg(); 2663 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2664 2665 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2666 .addImm(0) 2667 .addReg(Fs) 2668 .addImm(Mips::sub_lo); 2669 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 2670 .addReg(Wd_in) 2671 .addImm(Lane) 2672 .addReg(Wt); 2673 2674 MI->eraseFromParent(); // The pseudo instruction is gone now. 2675 return BB; 2676} 2677 2678// Emit the INSERT_FD pseudo instruction. 2679// 2680// insert_fd_pseudo $wd, $fs, n 2681// => 2682// subreg_to_reg $wt:sub_64, $fs 2683// insve_d $wd[$n], $wd_in, $wt[0] 2684MachineBasicBlock * 2685MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, 2686 MachineBasicBlock *BB) const { 2687 assert(Subtarget->isFP64bit()); 2688 2689 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2690 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2691 DebugLoc DL = MI->getDebugLoc(); 2692 unsigned Wd = MI->getOperand(0).getReg(); 2693 unsigned Wd_in = MI->getOperand(1).getReg(); 2694 unsigned Lane = MI->getOperand(2).getImm(); 2695 unsigned Fs = MI->getOperand(3).getReg(); 2696 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2697 2698 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2699 .addImm(0) 2700 .addReg(Fs) 2701 .addImm(Mips::sub_64); 2702 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 2703 .addReg(Wd_in) 2704 .addImm(Lane) 2705 .addReg(Wt); 2706 2707 MI->eraseFromParent(); // The pseudo instruction is gone now. 2708 return BB; 2709} 2710 2711// Emit the FILL_FW pseudo instruction. 2712// 2713// fill_fw_pseudo $wd, $fs 2714// => 2715// implicit_def $wt1 2716// insert_subreg $wt2:subreg_lo, $wt1, $fs 2717// splati.w $wd, $wt2[0] 2718MachineBasicBlock * 2719MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, 2720 MachineBasicBlock *BB) const { 2721 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2722 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2723 DebugLoc DL = MI->getDebugLoc(); 2724 unsigned Wd = MI->getOperand(0).getReg(); 2725 unsigned Fs = MI->getOperand(1).getReg(); 2726 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2727 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2728 2729 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 2730 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 2731 .addReg(Wt1) 2732 .addReg(Fs) 2733 .addImm(Mips::sub_lo); 2734 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 2735 2736 MI->eraseFromParent(); // The pseudo instruction is gone now. 2737 return BB; 2738} 2739 2740// Emit the FILL_FD pseudo instruction. 2741// 2742// fill_fd_pseudo $wd, $fs 2743// => 2744// implicit_def $wt1 2745// insert_subreg $wt2:subreg_64, $wt1, $fs 2746// splati.d $wd, $wt2[0] 2747MachineBasicBlock * 2748MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, 2749 MachineBasicBlock *BB) const { 2750 assert(Subtarget->isFP64bit()); 2751 2752 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2753 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2754 DebugLoc DL = MI->getDebugLoc(); 2755 unsigned Wd = MI->getOperand(0).getReg(); 2756 unsigned Fs = MI->getOperand(1).getReg(); 2757 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2758 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2759 2760 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 2761 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 2762 .addReg(Wt1) 2763 .addReg(Fs) 2764 .addImm(Mips::sub_64); 2765 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 2766 2767 MI->eraseFromParent(); // The pseudo instruction is gone now. 2768 return BB; 2769} 2770 2771// Emit the FEXP2_W_1 pseudo instructions. 2772// 2773// fexp2_w_1_pseudo $wd, $wt 2774// => 2775// ldi.w $ws, 1 2776// fexp2.w $wd, $ws, $wt 2777MachineBasicBlock * 2778MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, 2779 MachineBasicBlock *BB) const { 2780 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2781 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2782 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 2783 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 2784 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 2785 DebugLoc DL = MI->getDebugLoc(); 2786 2787 // Splat 1.0 into a vector 2788 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 2789 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 2790 2791 // Emit 1.0 * fexp2(Wt) 2792 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg()) 2793 .addReg(Ws2) 2794 .addReg(MI->getOperand(1).getReg()); 2795 2796 MI->eraseFromParent(); // The pseudo instruction is gone now. 2797 return BB; 2798} 2799 2800// Emit the FEXP2_D_1 pseudo instructions. 2801// 2802// fexp2_d_1_pseudo $wd, $wt 2803// => 2804// ldi.d $ws, 1 2805// fexp2.d $wd, $ws, $wt 2806MachineBasicBlock * 2807MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI, 2808 MachineBasicBlock *BB) const { 2809 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2810 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2811 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 2812 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 2813 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 2814 DebugLoc DL = MI->getDebugLoc(); 2815 2816 // Splat 1.0 into a vector 2817 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 2818 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 2819 2820 // Emit 1.0 * fexp2(Wt) 2821 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg()) 2822 .addReg(Ws2) 2823 .addReg(MI->getOperand(1).getReg()); 2824 2825 MI->eraseFromParent(); // The pseudo instruction is gone now. 2826 return BB; 2827} 2828