MipsSEISelLowering.cpp revision 243702b95a471ffb7d2374dfad3d7f8b11bee7e7
1//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Subclass of MipsTargetLowering specialized for mips32/64. 11// 12//===----------------------------------------------------------------------===// 13#include "MipsSEISelLowering.h" 14#include "MipsRegisterInfo.h" 15#include "MipsTargetMachine.h" 16#include "llvm/CodeGen/MachineInstrBuilder.h" 17#include "llvm/CodeGen/MachineRegisterInfo.h" 18#include "llvm/IR/Intrinsics.h" 19#include "llvm/Support/CommandLine.h" 20#include "llvm/Target/TargetInstrInfo.h" 21 22using namespace llvm; 23 24static cl::opt<bool> 25EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, 26 cl::desc("MIPS: Enable tail calls."), cl::init(false)); 27 28static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 29 cl::desc("Expand double precision loads and " 30 "stores to their single precision " 31 "counterparts")); 32 33MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) 34 : MipsTargetLowering(TM) { 35 // Set up the register classes 36 37 clearRegisterClasses(); 38 39 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 40 41 if (HasMips64) 42 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 43 44 if (Subtarget->hasDSP() || Subtarget->hasMSA()) { 45 // Expand all truncating stores and extending loads. 46 unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 47 unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; 48 49 for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { 50 for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) 51 setTruncStoreAction((MVT::SimpleValueType)VT0, 52 (MVT::SimpleValueType)VT1, Expand); 53 54 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 55 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 56 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); 57 } 58 } 59 60 if (Subtarget->hasDSP()) { 61 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 62 63 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 64 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 65 66 // Expand all builtin opcodes. 67 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 68 setOperationAction(Opc, VecTys[i], Expand); 69 70 setOperationAction(ISD::ADD, VecTys[i], Legal); 71 setOperationAction(ISD::SUB, VecTys[i], Legal); 72 setOperationAction(ISD::LOAD, VecTys[i], Legal); 73 setOperationAction(ISD::STORE, VecTys[i], Legal); 74 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 75 } 76 77 setTargetDAGCombine(ISD::SHL); 78 setTargetDAGCombine(ISD::SRA); 79 setTargetDAGCombine(ISD::SRL); 80 setTargetDAGCombine(ISD::SETCC); 81 setTargetDAGCombine(ISD::VSELECT); 82 } 83 84 if (Subtarget->hasDSPR2()) 85 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 86 87 if (Subtarget->hasMSA()) { 88 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 89 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 90 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 91 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 92 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 93 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 94 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 95 96 setTargetDAGCombine(ISD::AND); 97 setTargetDAGCombine(ISD::SRA); 98 setTargetDAGCombine(ISD::VSELECT); 99 setTargetDAGCombine(ISD::XOR); 100 } 101 102 if (!Subtarget->mipsSEUsesSoftFloat()) { 103 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 104 105 // When dealing with single precision only, use libcalls 106 if (!Subtarget->isSingleFloat()) { 107 if (Subtarget->isFP64bit()) 108 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 109 else 110 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 111 } 112 } 113 114 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 115 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 116 setOperationAction(ISD::MULHS, MVT::i32, Custom); 117 setOperationAction(ISD::MULHU, MVT::i32, Custom); 118 119 if (HasMips64) { 120 setOperationAction(ISD::MULHS, MVT::i64, Custom); 121 setOperationAction(ISD::MULHU, MVT::i64, Custom); 122 setOperationAction(ISD::MUL, MVT::i64, Custom); 123 } 124 125 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 126 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 127 128 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 129 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 130 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 131 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 132 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 133 setOperationAction(ISD::LOAD, MVT::i32, Custom); 134 setOperationAction(ISD::STORE, MVT::i32, Custom); 135 136 setTargetDAGCombine(ISD::ADDE); 137 setTargetDAGCombine(ISD::SUBE); 138 setTargetDAGCombine(ISD::MUL); 139 140 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 141 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 142 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 143 144 if (NoDPLoadStore) { 145 setOperationAction(ISD::LOAD, MVT::f64, Custom); 146 setOperationAction(ISD::STORE, MVT::f64, Custom); 147 } 148 149 computeRegisterProperties(); 150} 151 152const MipsTargetLowering * 153llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { 154 return new MipsSETargetLowering(TM); 155} 156 157// Enable MSA support for the given integer type and Register class. 158void MipsSETargetLowering:: 159addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 160 addRegisterClass(Ty, RC); 161 162 // Expand all builtin opcodes. 163 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 164 setOperationAction(Opc, Ty, Expand); 165 166 setOperationAction(ISD::BITCAST, Ty, Legal); 167 setOperationAction(ISD::LOAD, Ty, Legal); 168 setOperationAction(ISD::STORE, Ty, Legal); 169 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 170 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 171 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 172 173 setOperationAction(ISD::ADD, Ty, Legal); 174 setOperationAction(ISD::AND, Ty, Legal); 175 setOperationAction(ISD::CTLZ, Ty, Legal); 176 setOperationAction(ISD::CTPOP, Ty, Legal); 177 setOperationAction(ISD::MUL, Ty, Legal); 178 setOperationAction(ISD::OR, Ty, Legal); 179 setOperationAction(ISD::SDIV, Ty, Legal); 180 setOperationAction(ISD::SREM, Ty, Legal); 181 setOperationAction(ISD::SHL, Ty, Legal); 182 setOperationAction(ISD::SRA, Ty, Legal); 183 setOperationAction(ISD::SRL, Ty, Legal); 184 setOperationAction(ISD::SUB, Ty, Legal); 185 setOperationAction(ISD::UDIV, Ty, Legal); 186 setOperationAction(ISD::UREM, Ty, Legal); 187 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 188 setOperationAction(ISD::VSELECT, Ty, Legal); 189 setOperationAction(ISD::XOR, Ty, Legal); 190 191 setOperationAction(ISD::SETCC, Ty, Legal); 192 setCondCodeAction(ISD::SETNE, Ty, Expand); 193 setCondCodeAction(ISD::SETGE, Ty, Expand); 194 setCondCodeAction(ISD::SETGT, Ty, Expand); 195 setCondCodeAction(ISD::SETUGE, Ty, Expand); 196 setCondCodeAction(ISD::SETUGT, Ty, Expand); 197} 198 199// Enable MSA support for the given floating-point type and Register class. 200void MipsSETargetLowering:: 201addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 202 addRegisterClass(Ty, RC); 203 204 // Expand all builtin opcodes. 205 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 206 setOperationAction(Opc, Ty, Expand); 207 208 setOperationAction(ISD::LOAD, Ty, Legal); 209 setOperationAction(ISD::STORE, Ty, Legal); 210 setOperationAction(ISD::BITCAST, Ty, Legal); 211 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 212 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 213 214 if (Ty != MVT::v8f16) { 215 setOperationAction(ISD::FABS, Ty, Legal); 216 setOperationAction(ISD::FADD, Ty, Legal); 217 setOperationAction(ISD::FDIV, Ty, Legal); 218 setOperationAction(ISD::FLOG2, Ty, Legal); 219 setOperationAction(ISD::FMUL, Ty, Legal); 220 setOperationAction(ISD::FRINT, Ty, Legal); 221 setOperationAction(ISD::FSQRT, Ty, Legal); 222 setOperationAction(ISD::FSUB, Ty, Legal); 223 setOperationAction(ISD::VSELECT, Ty, Legal); 224 225 setOperationAction(ISD::SETCC, Ty, Legal); 226 setCondCodeAction(ISD::SETOGE, Ty, Expand); 227 setCondCodeAction(ISD::SETOGT, Ty, Expand); 228 setCondCodeAction(ISD::SETUGE, Ty, Expand); 229 setCondCodeAction(ISD::SETUGT, Ty, Expand); 230 setCondCodeAction(ISD::SETGE, Ty, Expand); 231 setCondCodeAction(ISD::SETGT, Ty, Expand); 232 } 233} 234 235bool 236MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 237 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 238 239 switch (SVT) { 240 case MVT::i64: 241 case MVT::i32: 242 if (Fast) 243 *Fast = true; 244 return true; 245 default: 246 return false; 247 } 248} 249 250SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 251 SelectionDAG &DAG) const { 252 switch(Op.getOpcode()) { 253 case ISD::LOAD: return lowerLOAD(Op, DAG); 254 case ISD::STORE: return lowerSTORE(Op, DAG); 255 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 256 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 257 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 258 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 259 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 260 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 261 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 262 DAG); 263 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 264 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 265 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 266 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 267 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 268 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 269 } 270 271 return MipsTargetLowering::LowerOperation(Op, DAG); 272} 273 274// selectMADD - 275// Transforms a subgraph in CurDAG if the following pattern is found: 276// (addc multLo, Lo0), (adde multHi, Hi0), 277// where, 278// multHi/Lo: product of multiplication 279// Lo0: initial value of Lo register 280// Hi0: initial value of Hi register 281// Return true if pattern matching was successful. 282static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { 283 // ADDENode's second operand must be a flag output of an ADDC node in order 284 // for the matching to be successful. 285 SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); 286 287 if (ADDCNode->getOpcode() != ISD::ADDC) 288 return false; 289 290 SDValue MultHi = ADDENode->getOperand(0); 291 SDValue MultLo = ADDCNode->getOperand(0); 292 SDNode *MultNode = MultHi.getNode(); 293 unsigned MultOpc = MultHi.getOpcode(); 294 295 // MultHi and MultLo must be generated by the same node, 296 if (MultLo.getNode() != MultNode) 297 return false; 298 299 // and it must be a multiplication. 300 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 301 return false; 302 303 // MultLo amd MultHi must be the first and second output of MultNode 304 // respectively. 305 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 306 return false; 307 308 // Transform this to a MADD only if ADDENode and ADDCNode are the only users 309 // of the values of MultNode, in which case MultNode will be removed in later 310 // phases. 311 // If there exist users other than ADDENode or ADDCNode, this function returns 312 // here, which will result in MultNode being mapped to a single MULT 313 // instruction node rather than a pair of MULT and MADD instructions being 314 // produced. 315 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 316 return false; 317 318 SDLoc DL(ADDENode); 319 320 // Initialize accumulator. 321 SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, 322 ADDCNode->getOperand(1), 323 ADDENode->getOperand(1)); 324 325 // create MipsMAdd(u) node 326 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; 327 328 SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, 329 MultNode->getOperand(0),// Factor 0 330 MultNode->getOperand(1),// Factor 1 331 ACCIn); 332 333 // replace uses of adde and addc here 334 if (!SDValue(ADDCNode, 0).use_empty()) { 335 SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLO, DL, MVT::i32, MAdd); 336 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); 337 } 338 if (!SDValue(ADDENode, 0).use_empty()) { 339 SDValue HiOut = CurDAG->getNode(MipsISD::ExtractHI, DL, MVT::i32, MAdd); 340 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); 341 } 342 343 return true; 344} 345 346// selectMSUB - 347// Transforms a subgraph in CurDAG if the following pattern is found: 348// (addc Lo0, multLo), (sube Hi0, multHi), 349// where, 350// multHi/Lo: product of multiplication 351// Lo0: initial value of Lo register 352// Hi0: initial value of Hi register 353// Return true if pattern matching was successful. 354static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { 355 // SUBENode's second operand must be a flag output of an SUBC node in order 356 // for the matching to be successful. 357 SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); 358 359 if (SUBCNode->getOpcode() != ISD::SUBC) 360 return false; 361 362 SDValue MultHi = SUBENode->getOperand(1); 363 SDValue MultLo = SUBCNode->getOperand(1); 364 SDNode *MultNode = MultHi.getNode(); 365 unsigned MultOpc = MultHi.getOpcode(); 366 367 // MultHi and MultLo must be generated by the same node, 368 if (MultLo.getNode() != MultNode) 369 return false; 370 371 // and it must be a multiplication. 372 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 373 return false; 374 375 // MultLo amd MultHi must be the first and second output of MultNode 376 // respectively. 377 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 378 return false; 379 380 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users 381 // of the values of MultNode, in which case MultNode will be removed in later 382 // phases. 383 // If there exist users other than SUBENode or SUBCNode, this function returns 384 // here, which will result in MultNode being mapped to a single MULT 385 // instruction node rather than a pair of MULT and MSUB instructions being 386 // produced. 387 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 388 return false; 389 390 SDLoc DL(SUBENode); 391 392 // Initialize accumulator. 393 SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, 394 SUBCNode->getOperand(0), 395 SUBENode->getOperand(0)); 396 397 // create MipsSub(u) node 398 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; 399 400 SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, 401 MultNode->getOperand(0),// Factor 0 402 MultNode->getOperand(1),// Factor 1 403 ACCIn); 404 405 // replace uses of sube and subc here 406 if (!SDValue(SUBCNode, 0).use_empty()) { 407 SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLO, DL, MVT::i32, MSub); 408 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); 409 } 410 if (!SDValue(SUBENode, 0).use_empty()) { 411 SDValue HiOut = CurDAG->getNode(MipsISD::ExtractHI, DL, MVT::i32, MSub); 412 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); 413 } 414 415 return true; 416} 417 418static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, 419 TargetLowering::DAGCombinerInfo &DCI, 420 const MipsSubtarget *Subtarget) { 421 if (DCI.isBeforeLegalize()) 422 return SDValue(); 423 424 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 425 selectMADD(N, &DAG)) 426 return SDValue(N, 0); 427 428 return SDValue(); 429} 430 431// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 432// 433// Performs the following transformations: 434// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 435// sign/zero-extension is completely overwritten by the new one performed by 436// the ISD::AND. 437// - Removes redundant zero extensions performed by an ISD::AND. 438static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 439 TargetLowering::DAGCombinerInfo &DCI, 440 const MipsSubtarget *Subtarget) { 441 if (!Subtarget->hasMSA()) 442 return SDValue(); 443 444 SDValue Op0 = N->getOperand(0); 445 SDValue Op1 = N->getOperand(1); 446 unsigned Op0Opcode = Op0->getOpcode(); 447 448 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 449 // where $d + 1 == 2^n and n == 32 450 // or $d + 1 == 2^n and n <= 32 and ZExt 451 // -> (MipsVExtractZExt $a, $b, $c) 452 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 453 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 454 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 455 456 if (!Mask) 457 return SDValue(); 458 459 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 460 461 if (Log2IfPositive <= 0) 462 return SDValue(); // Mask+1 is not a power of 2 463 464 SDValue Op0Op2 = Op0->getOperand(2); 465 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 466 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 467 unsigned Log2 = Log2IfPositive; 468 469 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 470 Log2 == ExtendTySize) { 471 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 472 DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, 473 Op0->getVTList(), Ops, Op0->getNumOperands()); 474 return Op0; 475 } 476 } 477 478 return SDValue(); 479} 480 481static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, 482 TargetLowering::DAGCombinerInfo &DCI, 483 const MipsSubtarget *Subtarget) { 484 if (DCI.isBeforeLegalize()) 485 return SDValue(); 486 487 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 488 selectMSUB(N, &DAG)) 489 return SDValue(N, 0); 490 491 return SDValue(); 492} 493 494static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, 495 EVT ShiftTy, SelectionDAG &DAG) { 496 // Clear the upper (64 - VT.sizeInBits) bits. 497 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 498 499 // Return 0. 500 if (C == 0) 501 return DAG.getConstant(0, VT); 502 503 // Return x. 504 if (C == 1) 505 return X; 506 507 // If c is power of 2, return (shl x, log2(c)). 508 if (isPowerOf2_64(C)) 509 return DAG.getNode(ISD::SHL, DL, VT, X, 510 DAG.getConstant(Log2_64(C), ShiftTy)); 511 512 unsigned Log2Ceil = Log2_64_Ceil(C); 513 uint64_t Floor = 1LL << Log2_64(C); 514 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 515 516 // If |c - floor_c| <= |c - ceil_c|, 517 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 518 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 519 if (C - Floor <= Ceil - C) { 520 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 521 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 522 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 523 } 524 525 // If |c - floor_c| > |c - ceil_c|, 526 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 527 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 528 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 529 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 530} 531 532static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 533 const TargetLowering::DAGCombinerInfo &DCI, 534 const MipsSETargetLowering *TL) { 535 EVT VT = N->getValueType(0); 536 537 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 538 if (!VT.isVector()) 539 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), 540 VT, TL->getScalarShiftAmountTy(VT), DAG); 541 542 return SDValue(N, 0); 543} 544 545static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 546 SelectionDAG &DAG, 547 const MipsSubtarget *Subtarget) { 548 // See if this is a vector splat immediate node. 549 APInt SplatValue, SplatUndef; 550 unsigned SplatBitSize; 551 bool HasAnyUndefs; 552 unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); 553 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 554 555 if (!BV || 556 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 557 EltSize, !Subtarget->isLittle()) || 558 (SplatBitSize != EltSize) || 559 (SplatValue.getZExtValue() >= EltSize)) 560 return SDValue(); 561 562 return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0), 563 DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); 564} 565 566static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 567 TargetLowering::DAGCombinerInfo &DCI, 568 const MipsSubtarget *Subtarget) { 569 EVT Ty = N->getValueType(0); 570 571 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 572 return SDValue(); 573 574 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 575} 576 577// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 578// constant splats into MipsISD::SHRA_DSP for DSPr2. 579// 580// Performs the following transformations: 581// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 582// sign/zero-extension is completely overwritten by the new one performed by 583// the ISD::SRA and ISD::SHL nodes. 584// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 585// sequence. 586// 587// See performDSPShiftCombine for more information about the transformation 588// used for DSPr2. 589static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 590 TargetLowering::DAGCombinerInfo &DCI, 591 const MipsSubtarget *Subtarget) { 592 EVT Ty = N->getValueType(0); 593 594 if (Subtarget->hasMSA()) { 595 SDValue Op0 = N->getOperand(0); 596 SDValue Op1 = N->getOperand(1); 597 598 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 599 // where $d + sizeof($c) == 32 600 // or $d + sizeof($c) <= 32 and SExt 601 // -> (MipsVExtractSExt $a, $b, $c) 602 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 603 SDValue Op0Op0 = Op0->getOperand(0); 604 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 605 606 if (!ShAmount) 607 return SDValue(); 608 609 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 610 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 611 return SDValue(); 612 613 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 614 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 615 616 if (TotalBits == 32 || 617 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 618 TotalBits <= 32)) { 619 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 620 Op0Op0->getOperand(2) }; 621 DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, 622 Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); 623 return Op0Op0; 624 } 625 } 626 } 627 628 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) 629 return SDValue(); 630 631 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 632} 633 634 635static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 636 TargetLowering::DAGCombinerInfo &DCI, 637 const MipsSubtarget *Subtarget) { 638 EVT Ty = N->getValueType(0); 639 640 if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) 641 return SDValue(); 642 643 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 644} 645 646static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 647 bool IsV216 = (Ty == MVT::v2i16); 648 649 switch (CC) { 650 case ISD::SETEQ: 651 case ISD::SETNE: return true; 652 case ISD::SETLT: 653 case ISD::SETLE: 654 case ISD::SETGT: 655 case ISD::SETGE: return IsV216; 656 case ISD::SETULT: 657 case ISD::SETULE: 658 case ISD::SETUGT: 659 case ISD::SETUGE: return !IsV216; 660 default: return false; 661 } 662} 663 664static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 665 EVT Ty = N->getValueType(0); 666 667 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 668 return SDValue(); 669 670 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 671 return SDValue(); 672 673 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 674 N->getOperand(1), N->getOperand(2)); 675} 676 677static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 678 EVT Ty = N->getValueType(0); 679 680 if (Ty.is128BitVector() && Ty.isInteger()) { 681 // Try the following combines: 682 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 683 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 684 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 685 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 686 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 687 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 688 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 689 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 690 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 691 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 692 // legalizer. 693 SDValue Op0 = N->getOperand(0); 694 695 if (Op0->getOpcode() != ISD::SETCC) 696 return SDValue(); 697 698 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 699 bool Signed; 700 701 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 702 Signed = true; 703 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 704 Signed = false; 705 else 706 return SDValue(); 707 708 SDValue Op1 = N->getOperand(1); 709 SDValue Op2 = N->getOperand(2); 710 SDValue Op0Op0 = Op0->getOperand(0); 711 SDValue Op0Op1 = Op0->getOperand(1); 712 713 if (Op1 == Op0Op0 && Op2 == Op0Op1) 714 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 715 Ty, Op1, Op2); 716 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 717 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 718 Ty, Op1, Op2); 719 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 720 SDValue SetCC = N->getOperand(0); 721 722 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 723 return SDValue(); 724 725 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 726 SetCC.getOperand(0), SetCC.getOperand(1), 727 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 728 } 729 730 return SDValue(); 731} 732 733static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 734 const MipsSubtarget *Subtarget) { 735 EVT Ty = N->getValueType(0); 736 737 if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 738 // Try the following combines: 739 // (xor (or $a, $b), (build_vector allones)) 740 // (xor (or $a, $b), (bitcast (build_vector allones))) 741 SDValue Op0 = N->getOperand(0); 742 SDValue Op1 = N->getOperand(1); 743 SDValue NotOp; 744 745 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 746 NotOp = Op1; 747 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 748 NotOp = Op0; 749 else 750 return SDValue(); 751 752 if (NotOp->getOpcode() == ISD::OR) 753 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 754 NotOp->getOperand(1)); 755 } 756 757 return SDValue(); 758} 759 760SDValue 761MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 762 SelectionDAG &DAG = DCI.DAG; 763 SDValue Val; 764 765 switch (N->getOpcode()) { 766 case ISD::ADDE: 767 return performADDECombine(N, DAG, DCI, Subtarget); 768 case ISD::AND: 769 Val = performANDCombine(N, DAG, DCI, Subtarget); 770 break; 771 case ISD::SUBE: 772 return performSUBECombine(N, DAG, DCI, Subtarget); 773 case ISD::MUL: 774 return performMULCombine(N, DAG, DCI, this); 775 case ISD::SHL: 776 return performSHLCombine(N, DAG, DCI, Subtarget); 777 case ISD::SRA: 778 return performSRACombine(N, DAG, DCI, Subtarget); 779 case ISD::SRL: 780 return performSRLCombine(N, DAG, DCI, Subtarget); 781 case ISD::VSELECT: 782 return performVSELECTCombine(N, DAG); 783 case ISD::XOR: 784 Val = performXORCombine(N, DAG, Subtarget); 785 break; 786 case ISD::SETCC: 787 Val = performSETCCCombine(N, DAG); 788 break; 789 } 790 791 if (Val.getNode()) 792 return Val; 793 794 return MipsTargetLowering::PerformDAGCombine(N, DCI); 795} 796 797MachineBasicBlock * 798MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 799 MachineBasicBlock *BB) const { 800 switch (MI->getOpcode()) { 801 default: 802 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 803 case Mips::BPOSGE32_PSEUDO: 804 return emitBPOSGE32(MI, BB); 805 case Mips::SNZ_B_PSEUDO: 806 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 807 case Mips::SNZ_H_PSEUDO: 808 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 809 case Mips::SNZ_W_PSEUDO: 810 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 811 case Mips::SNZ_D_PSEUDO: 812 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 813 case Mips::SNZ_V_PSEUDO: 814 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 815 case Mips::SZ_B_PSEUDO: 816 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 817 case Mips::SZ_H_PSEUDO: 818 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 819 case Mips::SZ_W_PSEUDO: 820 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 821 case Mips::SZ_D_PSEUDO: 822 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 823 case Mips::SZ_V_PSEUDO: 824 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 825 case Mips::COPY_FW_PSEUDO: 826 return emitCOPY_FW(MI, BB); 827 case Mips::COPY_FD_PSEUDO: 828 return emitCOPY_FD(MI, BB); 829 case Mips::INSERT_FW_PSEUDO: 830 return emitINSERT_FW(MI, BB); 831 case Mips::INSERT_FD_PSEUDO: 832 return emitINSERT_FD(MI, BB); 833 } 834} 835 836bool MipsSETargetLowering:: 837isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, 838 unsigned NextStackOffset, 839 const MipsFunctionInfo& FI) const { 840 if (!EnableMipsTailCalls) 841 return false; 842 843 // Return false if either the callee or caller has a byval argument. 844 if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) 845 return false; 846 847 // Return true if the callee's argument area is no larger than the 848 // caller's. 849 return NextStackOffset <= FI.getIncomingArgSize(); 850} 851 852void MipsSETargetLowering:: 853getOpndList(SmallVectorImpl<SDValue> &Ops, 854 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 855 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 856 CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { 857 // T9 should contain the address of the callee function if 858 // -reloction-model=pic or it is an indirect call. 859 if (IsPICCall || !GlobalOrExternal) { 860 unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; 861 RegsToPass.push_front(std::make_pair(T9Reg, Callee)); 862 } else 863 Ops.push_back(Callee); 864 865 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 866 InternalLinkage, CLI, Callee, Chain); 867} 868 869SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 870 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 871 872 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 873 return MipsTargetLowering::lowerLOAD(Op, DAG); 874 875 // Replace a double precision load with two i32 loads and a buildpair64. 876 SDLoc DL(Op); 877 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 878 EVT PtrVT = Ptr.getValueType(); 879 880 // i32 load from lower address. 881 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, 882 MachinePointerInfo(), Nd.isVolatile(), 883 Nd.isNonTemporal(), Nd.isInvariant(), 884 Nd.getAlignment()); 885 886 // i32 load from higher address. 887 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 888 SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, 889 MachinePointerInfo(), Nd.isVolatile(), 890 Nd.isNonTemporal(), Nd.isInvariant(), 891 std::min(Nd.getAlignment(), 4U)); 892 893 if (!Subtarget->isLittle()) 894 std::swap(Lo, Hi); 895 896 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 897 SDValue Ops[2] = {BP, Hi.getValue(1)}; 898 return DAG.getMergeValues(Ops, 2, DL); 899} 900 901SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 902 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 903 904 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 905 return MipsTargetLowering::lowerSTORE(Op, DAG); 906 907 // Replace a double precision store with two extractelement64s and i32 stores. 908 SDLoc DL(Op); 909 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 910 EVT PtrVT = Ptr.getValueType(); 911 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 912 Val, DAG.getConstant(0, MVT::i32)); 913 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 914 Val, DAG.getConstant(1, MVT::i32)); 915 916 if (!Subtarget->isLittle()) 917 std::swap(Lo, Hi); 918 919 // i32 store to lower address. 920 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), 921 Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), 922 Nd.getTBAAInfo()); 923 924 // i32 store to higher address. 925 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 926 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 927 Nd.isVolatile(), Nd.isNonTemporal(), 928 std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); 929} 930 931SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 932 bool HasLo, bool HasHi, 933 SelectionDAG &DAG) const { 934 EVT Ty = Op.getOperand(0).getValueType(); 935 SDLoc DL(Op); 936 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 937 Op.getOperand(0), Op.getOperand(1)); 938 SDValue Lo, Hi; 939 940 if (HasLo) 941 Lo = DAG.getNode(MipsISD::ExtractLO, DL, Ty, Mult); 942 if (HasHi) 943 Hi = DAG.getNode(MipsISD::ExtractHI, DL, Ty, Mult); 944 945 if (!HasLo || !HasHi) 946 return HasLo ? Lo : Hi; 947 948 SDValue Vals[] = { Lo, Hi }; 949 return DAG.getMergeValues(Vals, 2, DL); 950} 951 952 953static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) { 954 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 955 DAG.getConstant(0, MVT::i32)); 956 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 957 DAG.getConstant(1, MVT::i32)); 958 return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi); 959} 960 961static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) { 962 SDValue Lo = DAG.getNode(MipsISD::ExtractLO, DL, MVT::i32, Op); 963 SDValue Hi = DAG.getNode(MipsISD::ExtractHI, DL, MVT::i32, Op); 964 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 965} 966 967// This function expands mips intrinsic nodes which have 64-bit input operands 968// or output values. 969// 970// out64 = intrinsic-node in64 971// => 972// lo = copy (extract-element (in64, 0)) 973// hi = copy (extract-element (in64, 1)) 974// mips-specific-node 975// v0 = copy lo 976// v1 = copy hi 977// out64 = merge-values (v0, v1) 978// 979static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 980 SDLoc DL(Op); 981 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 982 SmallVector<SDValue, 3> Ops; 983 unsigned OpNo = 0; 984 985 // See if Op has a chain input. 986 if (HasChainIn) 987 Ops.push_back(Op->getOperand(OpNo++)); 988 989 // The next operand is the intrinsic opcode. 990 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 991 992 // See if the next operand has type i64. 993 SDValue Opnd = Op->getOperand(++OpNo), In64; 994 995 if (Opnd.getValueType() == MVT::i64) 996 In64 = initAccumulator(Opnd, DL, DAG); 997 else 998 Ops.push_back(Opnd); 999 1000 // Push the remaining operands. 1001 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1002 Ops.push_back(Op->getOperand(OpNo)); 1003 1004 // Add In64 to the end of the list. 1005 if (In64.getNode()) 1006 Ops.push_back(In64); 1007 1008 // Scan output. 1009 SmallVector<EVT, 2> ResTys; 1010 1011 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1012 I != E; ++I) 1013 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1014 1015 // Create node. 1016 SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); 1017 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1018 1019 if (!HasChainIn) 1020 return Out; 1021 1022 assert(Val->getValueType(1) == MVT::Other); 1023 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1024 return DAG.getMergeValues(Vals, 2, DL); 1025} 1026 1027// Lower an MSA copy intrinsic into the specified SelectionDAG node 1028static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1029 SDLoc DL(Op); 1030 SDValue Vec = Op->getOperand(1); 1031 SDValue Idx = Op->getOperand(2); 1032 EVT ResTy = Op->getValueType(0); 1033 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1034 1035 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1036 DAG.getValueType(EltTy)); 1037 1038 return Result; 1039} 1040 1041static SDValue 1042lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) { 1043 EVT ViaVecTy = ResTy; 1044 SmallVector<SDValue, 16> Ops; 1045 SDValue ImmHiOp; 1046 1047 if (ViaVecTy == MVT::v2i64) { 1048 ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, 1049 DAG.getConstant(31, MVT::i32)); 1050 for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) { 1051 Ops.push_back(ImmHiOp); 1052 Ops.push_back(ImmOp); 1053 } 1054 ViaVecTy = MVT::v4i32; 1055 } else { 1056 for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) 1057 Ops.push_back(ImmOp); 1058 } 1059 1060 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0], 1061 Ops.size()); 1062 1063 if (ResTy != ViaVecTy) 1064 Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); 1065 1066 return Result; 1067} 1068 1069static SDValue 1070lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { 1071 return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0), 1072 Op->getOperand(ImmOp), DAG); 1073} 1074 1075SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1076 SelectionDAG &DAG) const { 1077 SDLoc DL(Op); 1078 1079 switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { 1080 default: 1081 return SDValue(); 1082 case Intrinsic::mips_shilo: 1083 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1084 case Intrinsic::mips_dpau_h_qbl: 1085 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1086 case Intrinsic::mips_dpau_h_qbr: 1087 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1088 case Intrinsic::mips_dpsu_h_qbl: 1089 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1090 case Intrinsic::mips_dpsu_h_qbr: 1091 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1092 case Intrinsic::mips_dpa_w_ph: 1093 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1094 case Intrinsic::mips_dps_w_ph: 1095 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1096 case Intrinsic::mips_dpax_w_ph: 1097 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1098 case Intrinsic::mips_dpsx_w_ph: 1099 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1100 case Intrinsic::mips_mulsa_w_ph: 1101 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1102 case Intrinsic::mips_mult: 1103 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1104 case Intrinsic::mips_multu: 1105 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1106 case Intrinsic::mips_madd: 1107 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1108 case Intrinsic::mips_maddu: 1109 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1110 case Intrinsic::mips_msub: 1111 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1112 case Intrinsic::mips_msubu: 1113 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1114 case Intrinsic::mips_addv_b: 1115 case Intrinsic::mips_addv_h: 1116 case Intrinsic::mips_addv_w: 1117 case Intrinsic::mips_addv_d: 1118 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1119 Op->getOperand(2)); 1120 case Intrinsic::mips_addvi_b: 1121 case Intrinsic::mips_addvi_h: 1122 case Intrinsic::mips_addvi_w: 1123 case Intrinsic::mips_addvi_d: 1124 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1125 lowerMSASplatImm(Op, 2, DAG)); 1126 case Intrinsic::mips_and_v: 1127 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1128 Op->getOperand(2)); 1129 case Intrinsic::mips_andi_b: 1130 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1131 lowerMSASplatImm(Op, 2, DAG)); 1132 case Intrinsic::mips_bnz_b: 1133 case Intrinsic::mips_bnz_h: 1134 case Intrinsic::mips_bnz_w: 1135 case Intrinsic::mips_bnz_d: 1136 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1137 Op->getOperand(1)); 1138 case Intrinsic::mips_bnz_v: 1139 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1140 Op->getOperand(1)); 1141 case Intrinsic::mips_bsel_v: 1142 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1143 Op->getOperand(1), Op->getOperand(2), 1144 Op->getOperand(3)); 1145 case Intrinsic::mips_bseli_b: 1146 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1147 Op->getOperand(1), Op->getOperand(2), 1148 lowerMSASplatImm(Op, 3, DAG)); 1149 case Intrinsic::mips_bz_b: 1150 case Intrinsic::mips_bz_h: 1151 case Intrinsic::mips_bz_w: 1152 case Intrinsic::mips_bz_d: 1153 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1154 Op->getOperand(1)); 1155 case Intrinsic::mips_bz_v: 1156 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1157 Op->getOperand(1)); 1158 case Intrinsic::mips_ceq_b: 1159 case Intrinsic::mips_ceq_h: 1160 case Intrinsic::mips_ceq_w: 1161 case Intrinsic::mips_ceq_d: 1162 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1163 Op->getOperand(2), ISD::SETEQ); 1164 case Intrinsic::mips_ceqi_b: 1165 case Intrinsic::mips_ceqi_h: 1166 case Intrinsic::mips_ceqi_w: 1167 case Intrinsic::mips_ceqi_d: 1168 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1169 lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); 1170 case Intrinsic::mips_cle_s_b: 1171 case Intrinsic::mips_cle_s_h: 1172 case Intrinsic::mips_cle_s_w: 1173 case Intrinsic::mips_cle_s_d: 1174 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1175 Op->getOperand(2), ISD::SETLE); 1176 case Intrinsic::mips_clei_s_b: 1177 case Intrinsic::mips_clei_s_h: 1178 case Intrinsic::mips_clei_s_w: 1179 case Intrinsic::mips_clei_s_d: 1180 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1181 lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); 1182 case Intrinsic::mips_cle_u_b: 1183 case Intrinsic::mips_cle_u_h: 1184 case Intrinsic::mips_cle_u_w: 1185 case Intrinsic::mips_cle_u_d: 1186 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1187 Op->getOperand(2), ISD::SETULE); 1188 case Intrinsic::mips_clei_u_b: 1189 case Intrinsic::mips_clei_u_h: 1190 case Intrinsic::mips_clei_u_w: 1191 case Intrinsic::mips_clei_u_d: 1192 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1193 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1194 case Intrinsic::mips_clt_s_b: 1195 case Intrinsic::mips_clt_s_h: 1196 case Intrinsic::mips_clt_s_w: 1197 case Intrinsic::mips_clt_s_d: 1198 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1199 Op->getOperand(2), ISD::SETLT); 1200 case Intrinsic::mips_clti_s_b: 1201 case Intrinsic::mips_clti_s_h: 1202 case Intrinsic::mips_clti_s_w: 1203 case Intrinsic::mips_clti_s_d: 1204 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1205 lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); 1206 case Intrinsic::mips_clt_u_b: 1207 case Intrinsic::mips_clt_u_h: 1208 case Intrinsic::mips_clt_u_w: 1209 case Intrinsic::mips_clt_u_d: 1210 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1211 Op->getOperand(2), ISD::SETULT); 1212 case Intrinsic::mips_clti_u_b: 1213 case Intrinsic::mips_clti_u_h: 1214 case Intrinsic::mips_clti_u_w: 1215 case Intrinsic::mips_clti_u_d: 1216 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1217 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1218 case Intrinsic::mips_copy_s_b: 1219 case Intrinsic::mips_copy_s_h: 1220 case Intrinsic::mips_copy_s_w: 1221 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1222 case Intrinsic::mips_copy_s_d: 1223 // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal. 1224 // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type 1225 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1226 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1227 Op->getOperand(1), Op->getOperand(2)); 1228 case Intrinsic::mips_copy_u_b: 1229 case Intrinsic::mips_copy_u_h: 1230 case Intrinsic::mips_copy_u_w: 1231 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1232 case Intrinsic::mips_copy_u_d: 1233 // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal. 1234 // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type 1235 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1236 // 1237 // Note: When i64 is illegal, this results in copy_s.w instructions instead 1238 // of copy_u.w instructions. This makes no difference to the behaviour 1239 // since i64 is only illegal when the register file is 32-bit. 1240 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1241 Op->getOperand(1), Op->getOperand(2)); 1242 case Intrinsic::mips_div_s_b: 1243 case Intrinsic::mips_div_s_h: 1244 case Intrinsic::mips_div_s_w: 1245 case Intrinsic::mips_div_s_d: 1246 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1247 Op->getOperand(2)); 1248 case Intrinsic::mips_div_u_b: 1249 case Intrinsic::mips_div_u_h: 1250 case Intrinsic::mips_div_u_w: 1251 case Intrinsic::mips_div_u_d: 1252 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1253 Op->getOperand(2)); 1254 case Intrinsic::mips_fadd_w: 1255 case Intrinsic::mips_fadd_d: 1256 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1257 Op->getOperand(2)); 1258 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1259 case Intrinsic::mips_fceq_w: 1260 case Intrinsic::mips_fceq_d: 1261 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1262 Op->getOperand(2), ISD::SETOEQ); 1263 case Intrinsic::mips_fcle_w: 1264 case Intrinsic::mips_fcle_d: 1265 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1266 Op->getOperand(2), ISD::SETOLE); 1267 case Intrinsic::mips_fclt_w: 1268 case Intrinsic::mips_fclt_d: 1269 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1270 Op->getOperand(2), ISD::SETOLT); 1271 case Intrinsic::mips_fcne_w: 1272 case Intrinsic::mips_fcne_d: 1273 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1274 Op->getOperand(2), ISD::SETONE); 1275 case Intrinsic::mips_fcor_w: 1276 case Intrinsic::mips_fcor_d: 1277 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1278 Op->getOperand(2), ISD::SETO); 1279 case Intrinsic::mips_fcueq_w: 1280 case Intrinsic::mips_fcueq_d: 1281 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1282 Op->getOperand(2), ISD::SETUEQ); 1283 case Intrinsic::mips_fcule_w: 1284 case Intrinsic::mips_fcule_d: 1285 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1286 Op->getOperand(2), ISD::SETULE); 1287 case Intrinsic::mips_fcult_w: 1288 case Intrinsic::mips_fcult_d: 1289 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1290 Op->getOperand(2), ISD::SETULT); 1291 case Intrinsic::mips_fcun_w: 1292 case Intrinsic::mips_fcun_d: 1293 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1294 Op->getOperand(2), ISD::SETUO); 1295 case Intrinsic::mips_fcune_w: 1296 case Intrinsic::mips_fcune_d: 1297 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1298 Op->getOperand(2), ISD::SETUNE); 1299 case Intrinsic::mips_fdiv_w: 1300 case Intrinsic::mips_fdiv_d: 1301 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1302 Op->getOperand(2)); 1303 case Intrinsic::mips_fill_b: 1304 case Intrinsic::mips_fill_h: 1305 case Intrinsic::mips_fill_w: 1306 case Intrinsic::mips_fill_d: { 1307 SmallVector<SDValue, 16> Ops; 1308 EVT ResTy = Op->getValueType(0); 1309 1310 for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) 1311 Ops.push_back(Op->getOperand(1)); 1312 1313 // If ResTy is v2i64 then the type legalizer will break this node down into 1314 // an equivalent v4i32. 1315 return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size()); 1316 } 1317 case Intrinsic::mips_flog2_w: 1318 case Intrinsic::mips_flog2_d: 1319 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1320 case Intrinsic::mips_fmul_w: 1321 case Intrinsic::mips_fmul_d: 1322 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1323 Op->getOperand(2)); 1324 case Intrinsic::mips_frint_w: 1325 case Intrinsic::mips_frint_d: 1326 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1327 case Intrinsic::mips_fsqrt_w: 1328 case Intrinsic::mips_fsqrt_d: 1329 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1330 case Intrinsic::mips_fsub_w: 1331 case Intrinsic::mips_fsub_d: 1332 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1333 Op->getOperand(2)); 1334 case Intrinsic::mips_ilvev_b: 1335 case Intrinsic::mips_ilvev_h: 1336 case Intrinsic::mips_ilvev_w: 1337 case Intrinsic::mips_ilvev_d: 1338 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1339 Op->getOperand(1), Op->getOperand(2)); 1340 case Intrinsic::mips_ilvl_b: 1341 case Intrinsic::mips_ilvl_h: 1342 case Intrinsic::mips_ilvl_w: 1343 case Intrinsic::mips_ilvl_d: 1344 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1345 Op->getOperand(1), Op->getOperand(2)); 1346 case Intrinsic::mips_ilvod_b: 1347 case Intrinsic::mips_ilvod_h: 1348 case Intrinsic::mips_ilvod_w: 1349 case Intrinsic::mips_ilvod_d: 1350 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1351 Op->getOperand(1), Op->getOperand(2)); 1352 case Intrinsic::mips_ilvr_b: 1353 case Intrinsic::mips_ilvr_h: 1354 case Intrinsic::mips_ilvr_w: 1355 case Intrinsic::mips_ilvr_d: 1356 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1357 Op->getOperand(1), Op->getOperand(2)); 1358 case Intrinsic::mips_insert_b: 1359 case Intrinsic::mips_insert_h: 1360 case Intrinsic::mips_insert_w: 1361 case Intrinsic::mips_insert_d: 1362 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1363 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1364 case Intrinsic::mips_ldi_b: 1365 case Intrinsic::mips_ldi_h: 1366 case Intrinsic::mips_ldi_w: 1367 case Intrinsic::mips_ldi_d: 1368 return lowerMSASplatImm(Op, 1, DAG); 1369 case Intrinsic::mips_max_s_b: 1370 case Intrinsic::mips_max_s_h: 1371 case Intrinsic::mips_max_s_w: 1372 case Intrinsic::mips_max_s_d: 1373 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1374 Op->getOperand(1), Op->getOperand(2)); 1375 case Intrinsic::mips_max_u_b: 1376 case Intrinsic::mips_max_u_h: 1377 case Intrinsic::mips_max_u_w: 1378 case Intrinsic::mips_max_u_d: 1379 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1380 Op->getOperand(1), Op->getOperand(2)); 1381 case Intrinsic::mips_maxi_s_b: 1382 case Intrinsic::mips_maxi_s_h: 1383 case Intrinsic::mips_maxi_s_w: 1384 case Intrinsic::mips_maxi_s_d: 1385 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1386 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1387 case Intrinsic::mips_maxi_u_b: 1388 case Intrinsic::mips_maxi_u_h: 1389 case Intrinsic::mips_maxi_u_w: 1390 case Intrinsic::mips_maxi_u_d: 1391 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1392 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1393 case Intrinsic::mips_min_s_b: 1394 case Intrinsic::mips_min_s_h: 1395 case Intrinsic::mips_min_s_w: 1396 case Intrinsic::mips_min_s_d: 1397 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1398 Op->getOperand(1), Op->getOperand(2)); 1399 case Intrinsic::mips_min_u_b: 1400 case Intrinsic::mips_min_u_h: 1401 case Intrinsic::mips_min_u_w: 1402 case Intrinsic::mips_min_u_d: 1403 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1404 Op->getOperand(1), Op->getOperand(2)); 1405 case Intrinsic::mips_mini_s_b: 1406 case Intrinsic::mips_mini_s_h: 1407 case Intrinsic::mips_mini_s_w: 1408 case Intrinsic::mips_mini_s_d: 1409 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1410 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1411 case Intrinsic::mips_mini_u_b: 1412 case Intrinsic::mips_mini_u_h: 1413 case Intrinsic::mips_mini_u_w: 1414 case Intrinsic::mips_mini_u_d: 1415 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1416 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1417 case Intrinsic::mips_mod_s_b: 1418 case Intrinsic::mips_mod_s_h: 1419 case Intrinsic::mips_mod_s_w: 1420 case Intrinsic::mips_mod_s_d: 1421 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 1422 Op->getOperand(2)); 1423 case Intrinsic::mips_mod_u_b: 1424 case Intrinsic::mips_mod_u_h: 1425 case Intrinsic::mips_mod_u_w: 1426 case Intrinsic::mips_mod_u_d: 1427 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 1428 Op->getOperand(2)); 1429 case Intrinsic::mips_mulv_b: 1430 case Intrinsic::mips_mulv_h: 1431 case Intrinsic::mips_mulv_w: 1432 case Intrinsic::mips_mulv_d: 1433 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 1434 Op->getOperand(2)); 1435 case Intrinsic::mips_nlzc_b: 1436 case Intrinsic::mips_nlzc_h: 1437 case Intrinsic::mips_nlzc_w: 1438 case Intrinsic::mips_nlzc_d: 1439 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 1440 case Intrinsic::mips_nor_v: { 1441 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1442 Op->getOperand(1), Op->getOperand(2)); 1443 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1444 } 1445 case Intrinsic::mips_nori_b: { 1446 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1447 Op->getOperand(1), 1448 lowerMSASplatImm(Op, 2, DAG)); 1449 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1450 } 1451 case Intrinsic::mips_or_v: 1452 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 1453 Op->getOperand(2)); 1454 case Intrinsic::mips_ori_b: 1455 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1456 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1457 case Intrinsic::mips_pckev_b: 1458 case Intrinsic::mips_pckev_h: 1459 case Intrinsic::mips_pckev_w: 1460 case Intrinsic::mips_pckev_d: 1461 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 1462 Op->getOperand(1), Op->getOperand(2)); 1463 case Intrinsic::mips_pckod_b: 1464 case Intrinsic::mips_pckod_h: 1465 case Intrinsic::mips_pckod_w: 1466 case Intrinsic::mips_pckod_d: 1467 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 1468 Op->getOperand(1), Op->getOperand(2)); 1469 case Intrinsic::mips_pcnt_b: 1470 case Intrinsic::mips_pcnt_h: 1471 case Intrinsic::mips_pcnt_w: 1472 case Intrinsic::mips_pcnt_d: 1473 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 1474 case Intrinsic::mips_shf_b: 1475 case Intrinsic::mips_shf_h: 1476 case Intrinsic::mips_shf_w: 1477 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 1478 Op->getOperand(2), Op->getOperand(1)); 1479 case Intrinsic::mips_sll_b: 1480 case Intrinsic::mips_sll_h: 1481 case Intrinsic::mips_sll_w: 1482 case Intrinsic::mips_sll_d: 1483 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 1484 Op->getOperand(2)); 1485 case Intrinsic::mips_slli_b: 1486 case Intrinsic::mips_slli_h: 1487 case Intrinsic::mips_slli_w: 1488 case Intrinsic::mips_slli_d: 1489 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 1490 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1491 case Intrinsic::mips_splati_b: 1492 case Intrinsic::mips_splati_h: 1493 case Intrinsic::mips_splati_w: 1494 case Intrinsic::mips_splati_d: 1495 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 1496 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 1497 Op->getOperand(1)); 1498 case Intrinsic::mips_sra_b: 1499 case Intrinsic::mips_sra_h: 1500 case Intrinsic::mips_sra_w: 1501 case Intrinsic::mips_sra_d: 1502 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 1503 Op->getOperand(2)); 1504 case Intrinsic::mips_srai_b: 1505 case Intrinsic::mips_srai_h: 1506 case Intrinsic::mips_srai_w: 1507 case Intrinsic::mips_srai_d: 1508 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 1509 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1510 case Intrinsic::mips_srl_b: 1511 case Intrinsic::mips_srl_h: 1512 case Intrinsic::mips_srl_w: 1513 case Intrinsic::mips_srl_d: 1514 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 1515 Op->getOperand(2)); 1516 case Intrinsic::mips_srli_b: 1517 case Intrinsic::mips_srli_h: 1518 case Intrinsic::mips_srli_w: 1519 case Intrinsic::mips_srli_d: 1520 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 1521 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1522 case Intrinsic::mips_subv_b: 1523 case Intrinsic::mips_subv_h: 1524 case Intrinsic::mips_subv_w: 1525 case Intrinsic::mips_subv_d: 1526 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 1527 Op->getOperand(2)); 1528 case Intrinsic::mips_subvi_b: 1529 case Intrinsic::mips_subvi_h: 1530 case Intrinsic::mips_subvi_w: 1531 case Intrinsic::mips_subvi_d: 1532 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 1533 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1534 case Intrinsic::mips_vshf_b: 1535 case Intrinsic::mips_vshf_h: 1536 case Intrinsic::mips_vshf_w: 1537 case Intrinsic::mips_vshf_d: 1538 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 1539 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1540 case Intrinsic::mips_xor_v: 1541 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 1542 Op->getOperand(2)); 1543 case Intrinsic::mips_xori_b: 1544 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 1545 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1546 } 1547} 1548 1549static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 1550 SDLoc DL(Op); 1551 SDValue ChainIn = Op->getOperand(0); 1552 SDValue Address = Op->getOperand(2); 1553 SDValue Offset = Op->getOperand(3); 1554 EVT ResTy = Op->getValueType(0); 1555 EVT PtrTy = Address->getValueType(0); 1556 1557 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 1558 1559 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, 1560 false, false, 16); 1561} 1562 1563SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 1564 SelectionDAG &DAG) const { 1565 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 1566 switch (Intr) { 1567 default: 1568 return SDValue(); 1569 case Intrinsic::mips_extp: 1570 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 1571 case Intrinsic::mips_extpdp: 1572 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 1573 case Intrinsic::mips_extr_w: 1574 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 1575 case Intrinsic::mips_extr_r_w: 1576 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 1577 case Intrinsic::mips_extr_rs_w: 1578 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 1579 case Intrinsic::mips_extr_s_h: 1580 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 1581 case Intrinsic::mips_mthlip: 1582 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 1583 case Intrinsic::mips_mulsaq_s_w_ph: 1584 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 1585 case Intrinsic::mips_maq_s_w_phl: 1586 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 1587 case Intrinsic::mips_maq_s_w_phr: 1588 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 1589 case Intrinsic::mips_maq_sa_w_phl: 1590 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 1591 case Intrinsic::mips_maq_sa_w_phr: 1592 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 1593 case Intrinsic::mips_dpaq_s_w_ph: 1594 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 1595 case Intrinsic::mips_dpsq_s_w_ph: 1596 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 1597 case Intrinsic::mips_dpaq_sa_l_w: 1598 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 1599 case Intrinsic::mips_dpsq_sa_l_w: 1600 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 1601 case Intrinsic::mips_dpaqx_s_w_ph: 1602 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 1603 case Intrinsic::mips_dpaqx_sa_w_ph: 1604 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 1605 case Intrinsic::mips_dpsqx_s_w_ph: 1606 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 1607 case Intrinsic::mips_dpsqx_sa_w_ph: 1608 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 1609 case Intrinsic::mips_ld_b: 1610 case Intrinsic::mips_ld_h: 1611 case Intrinsic::mips_ld_w: 1612 case Intrinsic::mips_ld_d: 1613 case Intrinsic::mips_ldx_b: 1614 case Intrinsic::mips_ldx_h: 1615 case Intrinsic::mips_ldx_w: 1616 case Intrinsic::mips_ldx_d: 1617 return lowerMSALoadIntr(Op, DAG, Intr); 1618 } 1619} 1620 1621static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 1622 SDLoc DL(Op); 1623 SDValue ChainIn = Op->getOperand(0); 1624 SDValue Value = Op->getOperand(2); 1625 SDValue Address = Op->getOperand(3); 1626 SDValue Offset = Op->getOperand(4); 1627 EVT PtrTy = Address->getValueType(0); 1628 1629 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 1630 1631 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, 1632 false, 16); 1633} 1634 1635SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 1636 SelectionDAG &DAG) const { 1637 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 1638 switch (Intr) { 1639 default: 1640 return SDValue(); 1641 case Intrinsic::mips_st_b: 1642 case Intrinsic::mips_st_h: 1643 case Intrinsic::mips_st_w: 1644 case Intrinsic::mips_st_d: 1645 case Intrinsic::mips_stx_b: 1646 case Intrinsic::mips_stx_h: 1647 case Intrinsic::mips_stx_w: 1648 case Intrinsic::mips_stx_d: 1649 return lowerMSAStoreIntr(Op, DAG, Intr); 1650 } 1651} 1652 1653/// \brief Check if the given BuildVectorSDNode is a splat. 1654/// This method currently relies on DAG nodes being reused when equivalent, 1655/// so it's possible for this to return false even when isConstantSplat returns 1656/// true. 1657static bool isSplatVector(const BuildVectorSDNode *N) { 1658 unsigned int nOps = N->getNumOperands(); 1659 assert(nOps > 1 && "isSplat has 0 or 1 sized build vector"); 1660 1661 SDValue Operand0 = N->getOperand(0); 1662 1663 for (unsigned int i = 1; i < nOps; ++i) { 1664 if (N->getOperand(i) != Operand0) 1665 return false; 1666 } 1667 1668 return true; 1669} 1670 1671// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 1672// 1673// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 1674// choose to sign-extend but we could have equally chosen zero-extend. The 1675// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 1676// result into this node later (possibly changing it to a zero-extend in the 1677// process). 1678SDValue MipsSETargetLowering:: 1679lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 1680 SDLoc DL(Op); 1681 EVT ResTy = Op->getValueType(0); 1682 SDValue Op0 = Op->getOperand(0); 1683 EVT VecTy = Op0->getValueType(0); 1684 1685 if (!VecTy.is128BitVector()) 1686 return SDValue(); 1687 1688 if (ResTy.isInteger()) { 1689 SDValue Op1 = Op->getOperand(1); 1690 EVT EltTy = VecTy.getVectorElementType(); 1691 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 1692 DAG.getValueType(EltTy)); 1693 } 1694 1695 return Op; 1696} 1697 1698static bool isConstantOrUndef(const SDValue Op) { 1699 if (Op->getOpcode() == ISD::UNDEF) 1700 return true; 1701 if (dyn_cast<ConstantSDNode>(Op)) 1702 return true; 1703 if (dyn_cast<ConstantFPSDNode>(Op)) 1704 return true; 1705 return false; 1706} 1707 1708static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 1709 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 1710 if (isConstantOrUndef(Op->getOperand(i))) 1711 return true; 1712 return false; 1713} 1714 1715// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 1716// backend. 1717// 1718// Lowers according to the following rules: 1719// - Constant splats are legal as-is as long as the SplatBitSize is a power of 1720// 2 less than or equal to 64 and the value fits into a signed 10-bit 1721// immediate 1722// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 1723// is a power of 2 less than or equal to 64 and the value does not fit into a 1724// signed 10-bit immediate 1725// - Non-constant splats are legal as-is. 1726// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 1727// - All others are illegal and must be expanded. 1728SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 1729 SelectionDAG &DAG) const { 1730 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 1731 EVT ResTy = Op->getValueType(0); 1732 SDLoc DL(Op); 1733 APInt SplatValue, SplatUndef; 1734 unsigned SplatBitSize; 1735 bool HasAnyUndefs; 1736 1737 if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) 1738 return SDValue(); 1739 1740 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 1741 HasAnyUndefs, 8, 1742 !Subtarget->isLittle()) && SplatBitSize <= 64) { 1743 // We can only cope with 8, 16, 32, or 64-bit elements 1744 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 1745 SplatBitSize != 64) 1746 return SDValue(); 1747 1748 // If the value fits into a simm10 then we can use ldi.[bhwd] 1749 if (SplatValue.isSignedIntN(10)) 1750 return Op; 1751 1752 EVT ViaVecTy; 1753 1754 switch (SplatBitSize) { 1755 default: 1756 return SDValue(); 1757 case 8: 1758 ViaVecTy = MVT::v16i8; 1759 break; 1760 case 16: 1761 ViaVecTy = MVT::v8i16; 1762 break; 1763 case 32: 1764 ViaVecTy = MVT::v4i32; 1765 break; 1766 case 64: 1767 // There's no fill.d to fall back on for 64-bit values 1768 return SDValue(); 1769 } 1770 1771 SmallVector<SDValue, 16> Ops; 1772 SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32); 1773 1774 for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) 1775 Ops.push_back(Constant); 1776 1777 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy, 1778 &Ops[0], Ops.size()); 1779 1780 if (ViaVecTy != ResTy) 1781 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 1782 1783 return Result; 1784 } else if (isSplatVector(Node)) 1785 return Op; 1786 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 1787 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 1788 // The resulting code is the same length as the expansion, but it doesn't 1789 // use memory operations 1790 EVT ResTy = Node->getValueType(0); 1791 1792 assert(ResTy.isVector()); 1793 1794 unsigned NumElts = ResTy.getVectorNumElements(); 1795 SDValue Vector = DAG.getUNDEF(ResTy); 1796 for (unsigned i = 0; i < NumElts; ++i) { 1797 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 1798 Node->getOperand(i), 1799 DAG.getConstant(i, MVT::i32)); 1800 } 1801 return Vector; 1802 } 1803 1804 return SDValue(); 1805} 1806 1807// Lower VECTOR_SHUFFLE into SHF (if possible). 1808// 1809// SHF splits the vector into blocks of four elements, then shuffles these 1810// elements according to a <4 x i2> constant (encoded as an integer immediate). 1811// 1812// It is therefore possible to lower into SHF when the mask takes the form: 1813// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 1814// When undef's appear they are treated as if they were whatever value is 1815// necessary in order to fit the above form. 1816// 1817// For example: 1818// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 1819// <8 x i32> <i32 3, i32 2, i32 1, i32 0, 1820// i32 7, i32 6, i32 5, i32 4> 1821// is lowered to: 1822// (SHF_H $w0, $w1, 27) 1823// where the 27 comes from: 1824// 3 + (2 << 2) + (1 << 4) + (0 << 6) 1825static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 1826 SmallVector<int, 16> Indices, 1827 SelectionDAG &DAG) { 1828 int SHFIndices[4] = { -1, -1, -1, -1 }; 1829 1830 if (Indices.size() < 4) 1831 return SDValue(); 1832 1833 for (unsigned i = 0; i < 4; ++i) { 1834 for (unsigned j = i; j < Indices.size(); j += 4) { 1835 int Idx = Indices[j]; 1836 1837 // Convert from vector index to 4-element subvector index 1838 // If an index refers to an element outside of the subvector then give up 1839 if (Idx != -1) { 1840 Idx -= 4 * (j / 4); 1841 if (Idx < 0 || Idx >= 4) 1842 return SDValue(); 1843 } 1844 1845 // If the mask has an undef, replace it with the current index. 1846 // Note that it might still be undef if the current index is also undef 1847 if (SHFIndices[i] == -1) 1848 SHFIndices[i] = Idx; 1849 1850 // Check that non-undef values are the same as in the mask. If they 1851 // aren't then give up 1852 if (!(Idx == -1 || Idx == SHFIndices[i])) 1853 return SDValue(); 1854 } 1855 } 1856 1857 // Calculate the immediate. Replace any remaining undefs with zero 1858 APInt Imm(32, 0); 1859 for (int i = 3; i >= 0; --i) { 1860 int Idx = SHFIndices[i]; 1861 1862 if (Idx == -1) 1863 Idx = 0; 1864 1865 Imm <<= 2; 1866 Imm |= Idx & 0x3; 1867 } 1868 1869 return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, 1870 DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); 1871} 1872 1873// Lower VECTOR_SHUFFLE into ILVEV (if possible). 1874// 1875// ILVEV interleaves the even elements from each vector. 1876// 1877// It is possible to lower into ILVEV when the mask takes the form: 1878// <0, n, 2, n+2, 4, n+4, ...> 1879// where n is the number of elements in the vector. 1880// 1881// When undef's appear in the mask they are treated as if they were whatever 1882// value is necessary in order to fit the above form. 1883static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 1884 SmallVector<int, 16> Indices, 1885 SelectionDAG &DAG) { 1886 assert ((Indices.size() % 2) == 0); 1887 int WsIdx = 0; 1888 int WtIdx = ResTy.getVectorNumElements(); 1889 1890 for (unsigned i = 0; i < Indices.size(); i += 2) { 1891 if (Indices[i] != -1 && Indices[i] != WsIdx) 1892 return SDValue(); 1893 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 1894 return SDValue(); 1895 WsIdx += 2; 1896 WtIdx += 2; 1897 } 1898 1899 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), 1900 Op->getOperand(1)); 1901} 1902 1903// Lower VECTOR_SHUFFLE into ILVOD (if possible). 1904// 1905// ILVOD interleaves the odd elements from each vector. 1906// 1907// It is possible to lower into ILVOD when the mask takes the form: 1908// <1, n+1, 3, n+3, 5, n+5, ...> 1909// where n is the number of elements in the vector. 1910// 1911// When undef's appear in the mask they are treated as if they were whatever 1912// value is necessary in order to fit the above form. 1913static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 1914 SmallVector<int, 16> Indices, 1915 SelectionDAG &DAG) { 1916 assert ((Indices.size() % 2) == 0); 1917 int WsIdx = 1; 1918 int WtIdx = ResTy.getVectorNumElements() + 1; 1919 1920 for (unsigned i = 0; i < Indices.size(); i += 2) { 1921 if (Indices[i] != -1 && Indices[i] != WsIdx) 1922 return SDValue(); 1923 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 1924 return SDValue(); 1925 WsIdx += 2; 1926 WtIdx += 2; 1927 } 1928 1929 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), 1930 Op->getOperand(1)); 1931} 1932 1933// Lower VECTOR_SHUFFLE into ILVL (if possible). 1934// 1935// ILVL interleaves consecutive elements from the left half of each vector. 1936// 1937// It is possible to lower into ILVL when the mask takes the form: 1938// <0, n, 1, n+1, 2, n+2, ...> 1939// where n is the number of elements in the vector. 1940// 1941// When undef's appear in the mask they are treated as if they were whatever 1942// value is necessary in order to fit the above form. 1943static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 1944 SmallVector<int, 16> Indices, 1945 SelectionDAG &DAG) { 1946 assert ((Indices.size() % 2) == 0); 1947 int WsIdx = 0; 1948 int WtIdx = ResTy.getVectorNumElements(); 1949 1950 for (unsigned i = 0; i < Indices.size(); i += 2) { 1951 if (Indices[i] != -1 && Indices[i] != WsIdx) 1952 return SDValue(); 1953 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 1954 return SDValue(); 1955 WsIdx ++; 1956 WtIdx ++; 1957 } 1958 1959 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), 1960 Op->getOperand(1)); 1961} 1962 1963// Lower VECTOR_SHUFFLE into ILVR (if possible). 1964// 1965// ILVR interleaves consecutive elements from the right half of each vector. 1966// 1967// It is possible to lower into ILVR when the mask takes the form: 1968// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 1969// where n is the number of elements in the vector and x is half n. 1970// 1971// When undef's appear in the mask they are treated as if they were whatever 1972// value is necessary in order to fit the above form. 1973static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 1974 SmallVector<int, 16> Indices, 1975 SelectionDAG &DAG) { 1976 assert ((Indices.size() % 2) == 0); 1977 unsigned NumElts = ResTy.getVectorNumElements(); 1978 int WsIdx = NumElts / 2; 1979 int WtIdx = NumElts + NumElts / 2; 1980 1981 for (unsigned i = 0; i < Indices.size(); i += 2) { 1982 if (Indices[i] != -1 && Indices[i] != WsIdx) 1983 return SDValue(); 1984 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 1985 return SDValue(); 1986 WsIdx ++; 1987 WtIdx ++; 1988 } 1989 1990 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), 1991 Op->getOperand(1)); 1992} 1993 1994// Lower VECTOR_SHUFFLE into PCKEV (if possible). 1995// 1996// PCKEV copies the even elements of each vector into the result vector. 1997// 1998// It is possible to lower into PCKEV when the mask takes the form: 1999// <0, 2, 4, ..., n, n+2, n+4, ...> 2000// where n is the number of elements in the vector. 2001// 2002// When undef's appear in the mask they are treated as if they were whatever 2003// value is necessary in order to fit the above form. 2004static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2005 SmallVector<int, 16> Indices, 2006 SelectionDAG &DAG) { 2007 assert ((Indices.size() % 2) == 0); 2008 int Idx = 0; 2009 2010 for (unsigned i = 0; i < Indices.size(); ++i) { 2011 if (Indices[i] != -1 && Indices[i] != Idx) 2012 return SDValue(); 2013 Idx += 2; 2014 } 2015 2016 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), 2017 Op->getOperand(1)); 2018} 2019 2020// Lower VECTOR_SHUFFLE into PCKOD (if possible). 2021// 2022// PCKOD copies the odd elements of each vector into the result vector. 2023// 2024// It is possible to lower into PCKOD when the mask takes the form: 2025// <1, 3, 5, ..., n+1, n+3, n+5, ...> 2026// where n is the number of elements in the vector. 2027// 2028// When undef's appear in the mask they are treated as if they were whatever 2029// value is necessary in order to fit the above form. 2030static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2031 SmallVector<int, 16> Indices, 2032 SelectionDAG &DAG) { 2033 assert ((Indices.size() % 2) == 0); 2034 int Idx = 1; 2035 2036 for (unsigned i = 0; i < Indices.size(); ++i) { 2037 if (Indices[i] != -1 && Indices[i] != Idx) 2038 return SDValue(); 2039 Idx += 2; 2040 } 2041 2042 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), 2043 Op->getOperand(1)); 2044} 2045 2046// Lower VECTOR_SHUFFLE into VSHF. 2047// 2048// This mostly consists of converting the shuffle indices in Indices into a 2049// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2050// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2051// if the type is v8i16 and all the indices are less than 8 then the second 2052// operand is unused and can be replaced with anything. We choose to replace it 2053// with the used operand since this reduces the number of instructions overall. 2054static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2055 SmallVector<int, 16> Indices, 2056 SelectionDAG &DAG) { 2057 SmallVector<SDValue, 16> Ops; 2058 SDValue Op0; 2059 SDValue Op1; 2060 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2061 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2062 bool Using1stVec = false; 2063 bool Using2ndVec = false; 2064 SDLoc DL(Op); 2065 int ResTyNumElts = ResTy.getVectorNumElements(); 2066 2067 for (int i = 0; i < ResTyNumElts; ++i) { 2068 // Idx == -1 means UNDEF 2069 int Idx = Indices[i]; 2070 2071 if (0 <= Idx && Idx < ResTyNumElts) 2072 Using1stVec = true; 2073 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2074 Using2ndVec = true; 2075 } 2076 2077 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2078 ++I) 2079 Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); 2080 2081 SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0], 2082 Ops.size()); 2083 2084 if (Using1stVec && Using2ndVec) { 2085 Op0 = Op->getOperand(0); 2086 Op1 = Op->getOperand(1); 2087 } else if (Using1stVec) 2088 Op0 = Op1 = Op->getOperand(0); 2089 else if (Using2ndVec) 2090 Op0 = Op1 = Op->getOperand(1); 2091 else 2092 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2093 2094 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1); 2095} 2096 2097// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2098// indices in the shuffle. 2099SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2100 SelectionDAG &DAG) const { 2101 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2102 EVT ResTy = Op->getValueType(0); 2103 2104 if (!ResTy.is128BitVector()) 2105 return SDValue(); 2106 2107 int ResTyNumElts = ResTy.getVectorNumElements(); 2108 SmallVector<int, 16> Indices; 2109 2110 for (int i = 0; i < ResTyNumElts; ++i) 2111 Indices.push_back(Node->getMaskElt(i)); 2112 2113 SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); 2114 if (Result.getNode()) 2115 return Result; 2116 Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); 2117 if (Result.getNode()) 2118 return Result; 2119 Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); 2120 if (Result.getNode()) 2121 return Result; 2122 Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); 2123 if (Result.getNode()) 2124 return Result; 2125 Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); 2126 if (Result.getNode()) 2127 return Result; 2128 Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); 2129 if (Result.getNode()) 2130 return Result; 2131 Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); 2132 if (Result.getNode()) 2133 return Result; 2134 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2135} 2136 2137MachineBasicBlock * MipsSETargetLowering:: 2138emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ 2139 // $bb: 2140 // bposge32_pseudo $vr0 2141 // => 2142 // $bb: 2143 // bposge32 $tbb 2144 // $fbb: 2145 // li $vr2, 0 2146 // b $sink 2147 // $tbb: 2148 // li $vr1, 1 2149 // $sink: 2150 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 2151 2152 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2153 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2154 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2155 DebugLoc DL = MI->getDebugLoc(); 2156 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2157 MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); 2158 MachineFunction *F = BB->getParent(); 2159 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2160 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2161 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2162 F->insert(It, FBB); 2163 F->insert(It, TBB); 2164 F->insert(It, Sink); 2165 2166 // Transfer the remainder of BB and its successor edges to Sink. 2167 Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), 2168 BB->end()); 2169 Sink->transferSuccessorsAndUpdatePHIs(BB); 2170 2171 // Add successors. 2172 BB->addSuccessor(FBB); 2173 BB->addSuccessor(TBB); 2174 FBB->addSuccessor(Sink); 2175 TBB->addSuccessor(Sink); 2176 2177 // Insert the real bposge32 instruction to $BB. 2178 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 2179 2180 // Fill $FBB. 2181 unsigned VR2 = RegInfo.createVirtualRegister(RC); 2182 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 2183 .addReg(Mips::ZERO).addImm(0); 2184 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2185 2186 // Fill $TBB. 2187 unsigned VR1 = RegInfo.createVirtualRegister(RC); 2188 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 2189 .addReg(Mips::ZERO).addImm(1); 2190 2191 // Insert phi function to $Sink. 2192 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2193 MI->getOperand(0).getReg()) 2194 .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); 2195 2196 MI->eraseFromParent(); // The pseudo instruction is gone now. 2197 return Sink; 2198} 2199 2200MachineBasicBlock * MipsSETargetLowering:: 2201emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, 2202 unsigned BranchOp) const{ 2203 // $bb: 2204 // vany_nonzero $rd, $ws 2205 // => 2206 // $bb: 2207 // bnz.b $ws, $tbb 2208 // b $fbb 2209 // $fbb: 2210 // li $rd1, 0 2211 // b $sink 2212 // $tbb: 2213 // li $rd2, 1 2214 // $sink: 2215 // $rd = phi($rd1, $fbb, $rd2, $tbb) 2216 2217 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2218 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2219 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2220 DebugLoc DL = MI->getDebugLoc(); 2221 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2222 MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); 2223 MachineFunction *F = BB->getParent(); 2224 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2225 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2226 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2227 F->insert(It, FBB); 2228 F->insert(It, TBB); 2229 F->insert(It, Sink); 2230 2231 // Transfer the remainder of BB and its successor edges to Sink. 2232 Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), 2233 BB->end()); 2234 Sink->transferSuccessorsAndUpdatePHIs(BB); 2235 2236 // Add successors. 2237 BB->addSuccessor(FBB); 2238 BB->addSuccessor(TBB); 2239 FBB->addSuccessor(Sink); 2240 TBB->addSuccessor(Sink); 2241 2242 // Insert the real bnz.b instruction to $BB. 2243 BuildMI(BB, DL, TII->get(BranchOp)) 2244 .addReg(MI->getOperand(1).getReg()) 2245 .addMBB(TBB); 2246 2247 // Fill $FBB. 2248 unsigned RD1 = RegInfo.createVirtualRegister(RC); 2249 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 2250 .addReg(Mips::ZERO).addImm(0); 2251 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2252 2253 // Fill $TBB. 2254 unsigned RD2 = RegInfo.createVirtualRegister(RC); 2255 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 2256 .addReg(Mips::ZERO).addImm(1); 2257 2258 // Insert phi function to $Sink. 2259 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2260 MI->getOperand(0).getReg()) 2261 .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB); 2262 2263 MI->eraseFromParent(); // The pseudo instruction is gone now. 2264 return Sink; 2265} 2266 2267// Emit the COPY_FW pseudo instruction. 2268// 2269// copy_fw_pseudo $fd, $ws, n 2270// => 2271// copy_u_w $rt, $ws, $n 2272// mtc1 $rt, $fd 2273// 2274// When n is zero, the equivalent operation can be performed with (potentially) 2275// zero instructions due to register overlaps. This optimization is never valid 2276// for lane 1 because it would require FR=0 mode which isn't supported by MSA. 2277MachineBasicBlock * MipsSETargetLowering:: 2278emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ 2279 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2280 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2281 DebugLoc DL = MI->getDebugLoc(); 2282 unsigned Fd = MI->getOperand(0).getReg(); 2283 unsigned Ws = MI->getOperand(1).getReg(); 2284 unsigned Lane = MI->getOperand(2).getImm(); 2285 2286 if (Lane == 0) 2287 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); 2288 else { 2289 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2290 2291 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(1); 2292 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 2293 } 2294 2295 MI->eraseFromParent(); // The pseudo instruction is gone now. 2296 return BB; 2297} 2298 2299// Emit the COPY_FD pseudo instruction. 2300// 2301// copy_fd_pseudo $fd, $ws, n 2302// => 2303// splati.d $wt, $ws, $n 2304// copy $fd, $wt:sub_64 2305// 2306// When n is zero, the equivalent operation can be performed with (potentially) 2307// zero instructions due to register overlaps. This optimization is always 2308// valid because FR=1 mode which is the only supported mode in MSA. 2309MachineBasicBlock * MipsSETargetLowering:: 2310emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ 2311 assert(Subtarget->isFP64bit()); 2312 2313 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2314 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2315 unsigned Fd = MI->getOperand(0).getReg(); 2316 unsigned Ws = MI->getOperand(1).getReg(); 2317 unsigned Lane = MI->getOperand(2).getImm() * 2; 2318 DebugLoc DL = MI->getDebugLoc(); 2319 2320 if (Lane == 0) 2321 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 2322 else { 2323 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2324 2325 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 2326 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 2327 } 2328 2329 MI->eraseFromParent(); // The pseudo instruction is gone now. 2330 return BB; 2331} 2332 2333// Emit the INSERT_FW pseudo instruction. 2334// 2335// insert_fw_pseudo $wd, $wd_in, $n, $fs 2336// => 2337// subreg_to_reg $wt:sub_lo, $fs 2338// insve_w $wd[$n], $wd_in, $wt[0] 2339MachineBasicBlock * MipsSETargetLowering:: 2340emitINSERT_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ 2341 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2342 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2343 DebugLoc DL = MI->getDebugLoc(); 2344 unsigned Wd = MI->getOperand(0).getReg(); 2345 unsigned Wd_in = MI->getOperand(1).getReg(); 2346 unsigned Lane = MI->getOperand(2).getImm(); 2347 unsigned Fs = MI->getOperand(3).getReg(); 2348 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2349 2350 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2351 .addImm(0).addReg(Fs).addImm(Mips::sub_lo); 2352 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 2353 .addReg(Wd_in).addImm(Lane).addReg(Wt); 2354 2355 MI->eraseFromParent(); // The pseudo instruction is gone now. 2356 return BB; 2357} 2358 2359// Emit the INSERT_FD pseudo instruction. 2360// 2361// insert_fd_pseudo $wd, $fs, n 2362// => 2363// subreg_to_reg $wt:sub_64, $fs 2364// insve_d $wd[$n], $wd_in, $wt[0] 2365MachineBasicBlock * MipsSETargetLowering:: 2366emitINSERT_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ 2367 assert(Subtarget->isFP64bit()); 2368 2369 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2370 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2371 DebugLoc DL = MI->getDebugLoc(); 2372 unsigned Wd = MI->getOperand(0).getReg(); 2373 unsigned Wd_in = MI->getOperand(1).getReg(); 2374 unsigned Lane = MI->getOperand(2).getImm(); 2375 unsigned Fs = MI->getOperand(3).getReg(); 2376 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2377 2378 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2379 .addImm(0).addReg(Fs).addImm(Mips::sub_64); 2380 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 2381 .addReg(Wd_in).addImm(Lane).addReg(Wt); 2382 2383 MI->eraseFromParent(); // The pseudo instruction is gone now. 2384 return BB; 2385} 2386