MipsSEISelLowering.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Subclass of MipsTargetLowering specialized for mips32/64. 11// 12//===----------------------------------------------------------------------===// 13#include "MipsSEISelLowering.h" 14#include "MipsRegisterInfo.h" 15#include "MipsTargetMachine.h" 16#include "llvm/CodeGen/MachineInstrBuilder.h" 17#include "llvm/CodeGen/MachineRegisterInfo.h" 18#include "llvm/IR/Intrinsics.h" 19#include "llvm/Support/CommandLine.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Support/raw_ostream.h" 22#include "llvm/Target/TargetInstrInfo.h" 23 24using namespace llvm; 25 26#define DEBUG_TYPE "mips-isel" 27 28static cl::opt<bool> 29EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, 30 cl::desc("MIPS: Enable tail calls."), cl::init(false)); 31 32static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 33 cl::desc("Expand double precision loads and " 34 "stores to their single precision " 35 "counterparts")); 36 37MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) 38 : MipsTargetLowering(TM) { 39 // Set up the register classes 40 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 41 42 if (isGP64bit()) 43 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 44 45 if (Subtarget->hasDSP() || Subtarget->hasMSA()) { 46 // Expand all truncating stores and extending loads. 47 unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 48 unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; 49 50 for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { 51 for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) 52 setTruncStoreAction((MVT::SimpleValueType)VT0, 53 (MVT::SimpleValueType)VT1, Expand); 54 55 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 56 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); 57 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); 58 } 59 } 60 61 if (Subtarget->hasDSP()) { 62 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 63 64 for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { 65 addRegisterClass(VecTys[i], &Mips::DSPRRegClass); 66 67 // Expand all builtin opcodes. 68 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 69 setOperationAction(Opc, VecTys[i], Expand); 70 71 setOperationAction(ISD::ADD, VecTys[i], Legal); 72 setOperationAction(ISD::SUB, VecTys[i], Legal); 73 setOperationAction(ISD::LOAD, VecTys[i], Legal); 74 setOperationAction(ISD::STORE, VecTys[i], Legal); 75 setOperationAction(ISD::BITCAST, VecTys[i], Legal); 76 } 77 78 setTargetDAGCombine(ISD::SHL); 79 setTargetDAGCombine(ISD::SRA); 80 setTargetDAGCombine(ISD::SRL); 81 setTargetDAGCombine(ISD::SETCC); 82 setTargetDAGCombine(ISD::VSELECT); 83 } 84 85 if (Subtarget->hasDSPR2()) 86 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 87 88 if (Subtarget->hasMSA()) { 89 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 90 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 91 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 92 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 93 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 94 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 95 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 96 97 setTargetDAGCombine(ISD::AND); 98 setTargetDAGCombine(ISD::OR); 99 setTargetDAGCombine(ISD::SRA); 100 setTargetDAGCombine(ISD::VSELECT); 101 setTargetDAGCombine(ISD::XOR); 102 } 103 104 if (!Subtarget->mipsSEUsesSoftFloat()) { 105 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 106 107 // When dealing with single precision only, use libcalls 108 if (!Subtarget->isSingleFloat()) { 109 if (Subtarget->isFP64bit()) 110 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 111 else 112 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 113 } 114 } 115 116 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 117 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 118 setOperationAction(ISD::MULHS, MVT::i32, Custom); 119 setOperationAction(ISD::MULHU, MVT::i32, Custom); 120 121 if (Subtarget->hasCnMips()) 122 setOperationAction(ISD::MUL, MVT::i64, Legal); 123 else if (isGP64bit()) 124 setOperationAction(ISD::MUL, MVT::i64, Custom); 125 126 if (isGP64bit()) { 127 setOperationAction(ISD::MULHS, MVT::i64, Custom); 128 setOperationAction(ISD::MULHU, MVT::i64, Custom); 129 } 130 131 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 132 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 133 134 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 135 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 136 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 137 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 138 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 139 setOperationAction(ISD::LOAD, MVT::i32, Custom); 140 setOperationAction(ISD::STORE, MVT::i32, Custom); 141 142 setTargetDAGCombine(ISD::ADDE); 143 setTargetDAGCombine(ISD::SUBE); 144 setTargetDAGCombine(ISD::MUL); 145 146 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 147 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 148 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 149 150 if (NoDPLoadStore) { 151 setOperationAction(ISD::LOAD, MVT::f64, Custom); 152 setOperationAction(ISD::STORE, MVT::f64, Custom); 153 } 154 155 computeRegisterProperties(); 156} 157 158const MipsTargetLowering * 159llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { 160 return new MipsSETargetLowering(TM); 161} 162 163// Enable MSA support for the given integer type and Register class. 164void MipsSETargetLowering:: 165addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 166 addRegisterClass(Ty, RC); 167 168 // Expand all builtin opcodes. 169 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 170 setOperationAction(Opc, Ty, Expand); 171 172 setOperationAction(ISD::BITCAST, Ty, Legal); 173 setOperationAction(ISD::LOAD, Ty, Legal); 174 setOperationAction(ISD::STORE, Ty, Legal); 175 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 176 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 177 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 178 179 setOperationAction(ISD::ADD, Ty, Legal); 180 setOperationAction(ISD::AND, Ty, Legal); 181 setOperationAction(ISD::CTLZ, Ty, Legal); 182 setOperationAction(ISD::CTPOP, Ty, Legal); 183 setOperationAction(ISD::MUL, Ty, Legal); 184 setOperationAction(ISD::OR, Ty, Legal); 185 setOperationAction(ISD::SDIV, Ty, Legal); 186 setOperationAction(ISD::SREM, Ty, Legal); 187 setOperationAction(ISD::SHL, Ty, Legal); 188 setOperationAction(ISD::SRA, Ty, Legal); 189 setOperationAction(ISD::SRL, Ty, Legal); 190 setOperationAction(ISD::SUB, Ty, Legal); 191 setOperationAction(ISD::UDIV, Ty, Legal); 192 setOperationAction(ISD::UREM, Ty, Legal); 193 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 194 setOperationAction(ISD::VSELECT, Ty, Legal); 195 setOperationAction(ISD::XOR, Ty, Legal); 196 197 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 198 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 199 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 200 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 201 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 202 } 203 204 setOperationAction(ISD::SETCC, Ty, Legal); 205 setCondCodeAction(ISD::SETNE, Ty, Expand); 206 setCondCodeAction(ISD::SETGE, Ty, Expand); 207 setCondCodeAction(ISD::SETGT, Ty, Expand); 208 setCondCodeAction(ISD::SETUGE, Ty, Expand); 209 setCondCodeAction(ISD::SETUGT, Ty, Expand); 210} 211 212// Enable MSA support for the given floating-point type and Register class. 213void MipsSETargetLowering:: 214addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 215 addRegisterClass(Ty, RC); 216 217 // Expand all builtin opcodes. 218 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 219 setOperationAction(Opc, Ty, Expand); 220 221 setOperationAction(ISD::LOAD, Ty, Legal); 222 setOperationAction(ISD::STORE, Ty, Legal); 223 setOperationAction(ISD::BITCAST, Ty, Legal); 224 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 225 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 226 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 227 228 if (Ty != MVT::v8f16) { 229 setOperationAction(ISD::FABS, Ty, Legal); 230 setOperationAction(ISD::FADD, Ty, Legal); 231 setOperationAction(ISD::FDIV, Ty, Legal); 232 setOperationAction(ISD::FEXP2, Ty, Legal); 233 setOperationAction(ISD::FLOG2, Ty, Legal); 234 setOperationAction(ISD::FMA, Ty, Legal); 235 setOperationAction(ISD::FMUL, Ty, Legal); 236 setOperationAction(ISD::FRINT, Ty, Legal); 237 setOperationAction(ISD::FSQRT, Ty, Legal); 238 setOperationAction(ISD::FSUB, Ty, Legal); 239 setOperationAction(ISD::VSELECT, Ty, Legal); 240 241 setOperationAction(ISD::SETCC, Ty, Legal); 242 setCondCodeAction(ISD::SETOGE, Ty, Expand); 243 setCondCodeAction(ISD::SETOGT, Ty, Expand); 244 setCondCodeAction(ISD::SETUGE, Ty, Expand); 245 setCondCodeAction(ISD::SETUGT, Ty, Expand); 246 setCondCodeAction(ISD::SETGE, Ty, Expand); 247 setCondCodeAction(ISD::SETGT, Ty, Expand); 248 } 249} 250 251bool 252MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 253 unsigned, 254 bool *Fast) const { 255 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 256 257 if (Subtarget->systemSupportsUnalignedAccess()) { 258 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 259 // implementation defined whether this is handled by hardware, software, or 260 // a hybrid of the two but it's expected that most implementations will 261 // handle the majority of cases in hardware. 262 if (Fast) 263 *Fast = true; 264 return true; 265 } 266 267 switch (SVT) { 268 case MVT::i64: 269 case MVT::i32: 270 if (Fast) 271 *Fast = true; 272 return true; 273 default: 274 return false; 275 } 276} 277 278SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 279 SelectionDAG &DAG) const { 280 switch(Op.getOpcode()) { 281 case ISD::LOAD: return lowerLOAD(Op, DAG); 282 case ISD::STORE: return lowerSTORE(Op, DAG); 283 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 284 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 285 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 286 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 287 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 288 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 289 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 290 DAG); 291 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 292 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 293 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 294 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 295 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 296 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 297 } 298 299 return MipsTargetLowering::LowerOperation(Op, DAG); 300} 301 302// selectMADD - 303// Transforms a subgraph in CurDAG if the following pattern is found: 304// (addc multLo, Lo0), (adde multHi, Hi0), 305// where, 306// multHi/Lo: product of multiplication 307// Lo0: initial value of Lo register 308// Hi0: initial value of Hi register 309// Return true if pattern matching was successful. 310static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { 311 // ADDENode's second operand must be a flag output of an ADDC node in order 312 // for the matching to be successful. 313 SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); 314 315 if (ADDCNode->getOpcode() != ISD::ADDC) 316 return false; 317 318 SDValue MultHi = ADDENode->getOperand(0); 319 SDValue MultLo = ADDCNode->getOperand(0); 320 SDNode *MultNode = MultHi.getNode(); 321 unsigned MultOpc = MultHi.getOpcode(); 322 323 // MultHi and MultLo must be generated by the same node, 324 if (MultLo.getNode() != MultNode) 325 return false; 326 327 // and it must be a multiplication. 328 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 329 return false; 330 331 // MultLo amd MultHi must be the first and second output of MultNode 332 // respectively. 333 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 334 return false; 335 336 // Transform this to a MADD only if ADDENode and ADDCNode are the only users 337 // of the values of MultNode, in which case MultNode will be removed in later 338 // phases. 339 // If there exist users other than ADDENode or ADDCNode, this function returns 340 // here, which will result in MultNode being mapped to a single MULT 341 // instruction node rather than a pair of MULT and MADD instructions being 342 // produced. 343 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 344 return false; 345 346 SDLoc DL(ADDENode); 347 348 // Initialize accumulator. 349 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 350 ADDCNode->getOperand(1), 351 ADDENode->getOperand(1)); 352 353 // create MipsMAdd(u) node 354 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; 355 356 SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, 357 MultNode->getOperand(0),// Factor 0 358 MultNode->getOperand(1),// Factor 1 359 ACCIn); 360 361 // replace uses of adde and addc here 362 if (!SDValue(ADDCNode, 0).use_empty()) { 363 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); 364 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); 365 } 366 if (!SDValue(ADDENode, 0).use_empty()) { 367 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); 368 CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); 369 } 370 371 return true; 372} 373 374// selectMSUB - 375// Transforms a subgraph in CurDAG if the following pattern is found: 376// (addc Lo0, multLo), (sube Hi0, multHi), 377// where, 378// multHi/Lo: product of multiplication 379// Lo0: initial value of Lo register 380// Hi0: initial value of Hi register 381// Return true if pattern matching was successful. 382static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { 383 // SUBENode's second operand must be a flag output of an SUBC node in order 384 // for the matching to be successful. 385 SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); 386 387 if (SUBCNode->getOpcode() != ISD::SUBC) 388 return false; 389 390 SDValue MultHi = SUBENode->getOperand(1); 391 SDValue MultLo = SUBCNode->getOperand(1); 392 SDNode *MultNode = MultHi.getNode(); 393 unsigned MultOpc = MultHi.getOpcode(); 394 395 // MultHi and MultLo must be generated by the same node, 396 if (MultLo.getNode() != MultNode) 397 return false; 398 399 // and it must be a multiplication. 400 if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) 401 return false; 402 403 // MultLo amd MultHi must be the first and second output of MultNode 404 // respectively. 405 if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) 406 return false; 407 408 // Transform this to a MSUB only if SUBENode and SUBCNode are the only users 409 // of the values of MultNode, in which case MultNode will be removed in later 410 // phases. 411 // If there exist users other than SUBENode or SUBCNode, this function returns 412 // here, which will result in MultNode being mapped to a single MULT 413 // instruction node rather than a pair of MULT and MSUB instructions being 414 // produced. 415 if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) 416 return false; 417 418 SDLoc DL(SUBENode); 419 420 // Initialize accumulator. 421 SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, 422 SUBCNode->getOperand(0), 423 SUBENode->getOperand(0)); 424 425 // create MipsSub(u) node 426 MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; 427 428 SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, 429 MultNode->getOperand(0),// Factor 0 430 MultNode->getOperand(1),// Factor 1 431 ACCIn); 432 433 // replace uses of sube and subc here 434 if (!SDValue(SUBCNode, 0).use_empty()) { 435 SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); 436 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); 437 } 438 if (!SDValue(SUBENode, 0).use_empty()) { 439 SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); 440 CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); 441 } 442 443 return true; 444} 445 446static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, 447 TargetLowering::DAGCombinerInfo &DCI, 448 const MipsSubtarget *Subtarget) { 449 if (DCI.isBeforeLegalize()) 450 return SDValue(); 451 452 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 453 selectMADD(N, &DAG)) 454 return SDValue(N, 0); 455 456 return SDValue(); 457} 458 459// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 460// 461// Performs the following transformations: 462// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 463// sign/zero-extension is completely overwritten by the new one performed by 464// the ISD::AND. 465// - Removes redundant zero extensions performed by an ISD::AND. 466static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 467 TargetLowering::DAGCombinerInfo &DCI, 468 const MipsSubtarget *Subtarget) { 469 if (!Subtarget->hasMSA()) 470 return SDValue(); 471 472 SDValue Op0 = N->getOperand(0); 473 SDValue Op1 = N->getOperand(1); 474 unsigned Op0Opcode = Op0->getOpcode(); 475 476 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 477 // where $d + 1 == 2^n and n == 32 478 // or $d + 1 == 2^n and n <= 32 and ZExt 479 // -> (MipsVExtractZExt $a, $b, $c) 480 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 481 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 482 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 483 484 if (!Mask) 485 return SDValue(); 486 487 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 488 489 if (Log2IfPositive <= 0) 490 return SDValue(); // Mask+1 is not a power of 2 491 492 SDValue Op0Op2 = Op0->getOperand(2); 493 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 494 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 495 unsigned Log2 = Log2IfPositive; 496 497 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 498 Log2 == ExtendTySize) { 499 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 500 DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, 501 Op0->getVTList(), 502 makeArrayRef(Ops, Op0->getNumOperands())); 503 return Op0; 504 } 505 } 506 507 return SDValue(); 508} 509 510// Determine if the specified node is a constant vector splat. 511// 512// Returns true and sets Imm if: 513// * N is a ISD::BUILD_VECTOR representing a constant splat 514// 515// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 516// differences are that it assumes the MSA has already been checked and the 517// arbitrary requirement for a maximum of 32-bit integers isn't applied (and 518// must not be in order for binsri.d to be selectable). 519static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 520 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 521 522 if (!Node) 523 return false; 524 525 APInt SplatValue, SplatUndef; 526 unsigned SplatBitSize; 527 bool HasAnyUndefs; 528 529 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 530 8, !IsLittleEndian)) 531 return false; 532 533 Imm = SplatValue; 534 535 return true; 536} 537 538// Test whether the given node is an all-ones build_vector. 539static bool isVectorAllOnes(SDValue N) { 540 // Look through bitcasts. Endianness doesn't matter because we are looking 541 // for an all-ones value. 542 if (N->getOpcode() == ISD::BITCAST) 543 N = N->getOperand(0); 544 545 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 546 547 if (!BVN) 548 return false; 549 550 APInt SplatValue, SplatUndef; 551 unsigned SplatBitSize; 552 bool HasAnyUndefs; 553 554 // Endianness doesn't matter in this context because we are looking for 555 // an all-ones value. 556 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 557 return SplatValue.isAllOnesValue(); 558 559 return false; 560} 561 562// Test whether N is the bitwise inverse of OfNode. 563static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 564 if (N->getOpcode() != ISD::XOR) 565 return false; 566 567 if (isVectorAllOnes(N->getOperand(0))) 568 return N->getOperand(1) == OfNode; 569 570 if (isVectorAllOnes(N->getOperand(1))) 571 return N->getOperand(0) == OfNode; 572 573 return false; 574} 575 576// Perform combines where ISD::OR is the root node. 577// 578// Performs the following transformations: 579// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 580// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 581// vector type. 582static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 583 TargetLowering::DAGCombinerInfo &DCI, 584 const MipsSubtarget *Subtarget) { 585 if (!Subtarget->hasMSA()) 586 return SDValue(); 587 588 EVT Ty = N->getValueType(0); 589 590 if (!Ty.is128BitVector()) 591 return SDValue(); 592 593 SDValue Op0 = N->getOperand(0); 594 SDValue Op1 = N->getOperand(1); 595 596 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 597 SDValue Op0Op0 = Op0->getOperand(0); 598 SDValue Op0Op1 = Op0->getOperand(1); 599 SDValue Op1Op0 = Op1->getOperand(0); 600 SDValue Op1Op1 = Op1->getOperand(1); 601 bool IsLittleEndian = !Subtarget->isLittle(); 602 603 SDValue IfSet, IfClr, Cond; 604 bool IsConstantMask = false; 605 APInt Mask, InvMask; 606 607 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 608 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 609 // looking. 610 // IfClr will be set if we find a valid match. 611 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 612 Cond = Op0Op0; 613 IfSet = Op0Op1; 614 615 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 616 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 617 IfClr = Op1Op1; 618 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 619 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 620 IfClr = Op1Op0; 621 622 IsConstantMask = true; 623 } 624 625 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 626 // thing again using this mask. 627 // IfClr will be set if we find a valid match. 628 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 629 Cond = Op0Op1; 630 IfSet = Op0Op0; 631 632 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 633 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 634 IfClr = Op1Op1; 635 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 636 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 637 IfClr = Op1Op0; 638 639 IsConstantMask = true; 640 } 641 642 // If IfClr is not yet set, try looking for a non-constant match. 643 // IfClr will be set if we find a valid match amongst the eight 644 // possibilities. 645 if (!IfClr.getNode()) { 646 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 647 Cond = Op1Op0; 648 IfSet = Op1Op1; 649 IfClr = Op0Op1; 650 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 651 Cond = Op1Op0; 652 IfSet = Op1Op1; 653 IfClr = Op0Op0; 654 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 655 Cond = Op1Op1; 656 IfSet = Op1Op0; 657 IfClr = Op0Op1; 658 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 659 Cond = Op1Op1; 660 IfSet = Op1Op0; 661 IfClr = Op0Op0; 662 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 663 Cond = Op0Op0; 664 IfSet = Op0Op1; 665 IfClr = Op1Op1; 666 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 667 Cond = Op0Op0; 668 IfSet = Op0Op1; 669 IfClr = Op1Op0; 670 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 671 Cond = Op0Op1; 672 IfSet = Op0Op0; 673 IfClr = Op1Op1; 674 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 675 Cond = Op0Op1; 676 IfSet = Op0Op0; 677 IfClr = Op1Op0; 678 } 679 } 680 681 // At this point, IfClr will be set if we have a valid match. 682 if (!IfClr.getNode()) 683 return SDValue(); 684 685 assert(Cond.getNode() && IfSet.getNode()); 686 687 // Fold degenerate cases. 688 if (IsConstantMask) { 689 if (Mask.isAllOnesValue()) 690 return IfSet; 691 else if (Mask == 0) 692 return IfClr; 693 } 694 695 // Transform the DAG into an equivalent VSELECT. 696 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 697 } 698 699 return SDValue(); 700} 701 702static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, 703 TargetLowering::DAGCombinerInfo &DCI, 704 const MipsSubtarget *Subtarget) { 705 if (DCI.isBeforeLegalize()) 706 return SDValue(); 707 708 if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && 709 selectMSUB(N, &DAG)) 710 return SDValue(N, 0); 711 712 return SDValue(); 713} 714 715static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, 716 EVT ShiftTy, SelectionDAG &DAG) { 717 // Clear the upper (64 - VT.sizeInBits) bits. 718 C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); 719 720 // Return 0. 721 if (C == 0) 722 return DAG.getConstant(0, VT); 723 724 // Return x. 725 if (C == 1) 726 return X; 727 728 // If c is power of 2, return (shl x, log2(c)). 729 if (isPowerOf2_64(C)) 730 return DAG.getNode(ISD::SHL, DL, VT, X, 731 DAG.getConstant(Log2_64(C), ShiftTy)); 732 733 unsigned Log2Ceil = Log2_64_Ceil(C); 734 uint64_t Floor = 1LL << Log2_64(C); 735 uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; 736 737 // If |c - floor_c| <= |c - ceil_c|, 738 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 739 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 740 if (C - Floor <= Ceil - C) { 741 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 742 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 743 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 744 } 745 746 // If |c - floor_c| > |c - ceil_c|, 747 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 748 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 749 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 750 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 751} 752 753static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 754 const TargetLowering::DAGCombinerInfo &DCI, 755 const MipsSETargetLowering *TL) { 756 EVT VT = N->getValueType(0); 757 758 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 759 if (!VT.isVector()) 760 return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), 761 VT, TL->getScalarShiftAmountTy(VT), DAG); 762 763 return SDValue(N, 0); 764} 765 766static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 767 SelectionDAG &DAG, 768 const MipsSubtarget *Subtarget) { 769 // See if this is a vector splat immediate node. 770 APInt SplatValue, SplatUndef; 771 unsigned SplatBitSize; 772 bool HasAnyUndefs; 773 unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); 774 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 775 776 if (!Subtarget->hasDSP()) 777 return SDValue(); 778 779 if (!BV || 780 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 781 EltSize, !Subtarget->isLittle()) || 782 (SplatBitSize != EltSize) || 783 (SplatValue.getZExtValue() >= EltSize)) 784 return SDValue(); 785 786 return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0), 787 DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); 788} 789 790static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 791 TargetLowering::DAGCombinerInfo &DCI, 792 const MipsSubtarget *Subtarget) { 793 EVT Ty = N->getValueType(0); 794 795 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 796 return SDValue(); 797 798 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 799} 800 801// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 802// constant splats into MipsISD::SHRA_DSP for DSPr2. 803// 804// Performs the following transformations: 805// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 806// sign/zero-extension is completely overwritten by the new one performed by 807// the ISD::SRA and ISD::SHL nodes. 808// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 809// sequence. 810// 811// See performDSPShiftCombine for more information about the transformation 812// used for DSPr2. 813static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 814 TargetLowering::DAGCombinerInfo &DCI, 815 const MipsSubtarget *Subtarget) { 816 EVT Ty = N->getValueType(0); 817 818 if (Subtarget->hasMSA()) { 819 SDValue Op0 = N->getOperand(0); 820 SDValue Op1 = N->getOperand(1); 821 822 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 823 // where $d + sizeof($c) == 32 824 // or $d + sizeof($c) <= 32 and SExt 825 // -> (MipsVExtractSExt $a, $b, $c) 826 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 827 SDValue Op0Op0 = Op0->getOperand(0); 828 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 829 830 if (!ShAmount) 831 return SDValue(); 832 833 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 834 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 835 return SDValue(); 836 837 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 838 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 839 840 if (TotalBits == 32 || 841 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 842 TotalBits <= 32)) { 843 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 844 Op0Op0->getOperand(2) }; 845 DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, 846 Op0Op0->getVTList(), 847 makeArrayRef(Ops, Op0Op0->getNumOperands())); 848 return Op0Op0; 849 } 850 } 851 } 852 853 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) 854 return SDValue(); 855 856 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 857} 858 859 860static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 861 TargetLowering::DAGCombinerInfo &DCI, 862 const MipsSubtarget *Subtarget) { 863 EVT Ty = N->getValueType(0); 864 865 if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) 866 return SDValue(); 867 868 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 869} 870 871static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 872 bool IsV216 = (Ty == MVT::v2i16); 873 874 switch (CC) { 875 case ISD::SETEQ: 876 case ISD::SETNE: return true; 877 case ISD::SETLT: 878 case ISD::SETLE: 879 case ISD::SETGT: 880 case ISD::SETGE: return IsV216; 881 case ISD::SETULT: 882 case ISD::SETULE: 883 case ISD::SETUGT: 884 case ISD::SETUGE: return !IsV216; 885 default: return false; 886 } 887} 888 889static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 890 EVT Ty = N->getValueType(0); 891 892 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 893 return SDValue(); 894 895 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 896 return SDValue(); 897 898 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 899 N->getOperand(1), N->getOperand(2)); 900} 901 902static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 903 EVT Ty = N->getValueType(0); 904 905 if (Ty.is128BitVector() && Ty.isInteger()) { 906 // Try the following combines: 907 // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) 908 // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) 909 // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) 910 // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) 911 // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) 912 // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) 913 // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) 914 // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) 915 // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but 916 // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the 917 // legalizer. 918 SDValue Op0 = N->getOperand(0); 919 920 if (Op0->getOpcode() != ISD::SETCC) 921 return SDValue(); 922 923 ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get(); 924 bool Signed; 925 926 if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) 927 Signed = true; 928 else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) 929 Signed = false; 930 else 931 return SDValue(); 932 933 SDValue Op1 = N->getOperand(1); 934 SDValue Op2 = N->getOperand(2); 935 SDValue Op0Op0 = Op0->getOperand(0); 936 SDValue Op0Op1 = Op0->getOperand(1); 937 938 if (Op1 == Op0Op0 && Op2 == Op0Op1) 939 return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), 940 Ty, Op1, Op2); 941 else if (Op1 == Op0Op1 && Op2 == Op0Op0) 942 return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), 943 Ty, Op1, Op2); 944 } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { 945 SDValue SetCC = N->getOperand(0); 946 947 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 948 return SDValue(); 949 950 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 951 SetCC.getOperand(0), SetCC.getOperand(1), 952 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 953 } 954 955 return SDValue(); 956} 957 958static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 959 const MipsSubtarget *Subtarget) { 960 EVT Ty = N->getValueType(0); 961 962 if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 963 // Try the following combines: 964 // (xor (or $a, $b), (build_vector allones)) 965 // (xor (or $a, $b), (bitcast (build_vector allones))) 966 SDValue Op0 = N->getOperand(0); 967 SDValue Op1 = N->getOperand(1); 968 SDValue NotOp; 969 970 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 971 NotOp = Op1; 972 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 973 NotOp = Op0; 974 else 975 return SDValue(); 976 977 if (NotOp->getOpcode() == ISD::OR) 978 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 979 NotOp->getOperand(1)); 980 } 981 982 return SDValue(); 983} 984 985SDValue 986MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 987 SelectionDAG &DAG = DCI.DAG; 988 SDValue Val; 989 990 switch (N->getOpcode()) { 991 case ISD::ADDE: 992 return performADDECombine(N, DAG, DCI, Subtarget); 993 case ISD::AND: 994 Val = performANDCombine(N, DAG, DCI, Subtarget); 995 break; 996 case ISD::OR: 997 Val = performORCombine(N, DAG, DCI, Subtarget); 998 break; 999 case ISD::SUBE: 1000 return performSUBECombine(N, DAG, DCI, Subtarget); 1001 case ISD::MUL: 1002 return performMULCombine(N, DAG, DCI, this); 1003 case ISD::SHL: 1004 return performSHLCombine(N, DAG, DCI, Subtarget); 1005 case ISD::SRA: 1006 return performSRACombine(N, DAG, DCI, Subtarget); 1007 case ISD::SRL: 1008 return performSRLCombine(N, DAG, DCI, Subtarget); 1009 case ISD::VSELECT: 1010 return performVSELECTCombine(N, DAG); 1011 case ISD::XOR: 1012 Val = performXORCombine(N, DAG, Subtarget); 1013 break; 1014 case ISD::SETCC: 1015 Val = performSETCCCombine(N, DAG); 1016 break; 1017 } 1018 1019 if (Val.getNode()) { 1020 DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1021 N->printrWithDepth(dbgs(), &DAG); 1022 dbgs() << "\n=> \n"; 1023 Val.getNode()->printrWithDepth(dbgs(), &DAG); 1024 dbgs() << "\n"); 1025 return Val; 1026 } 1027 1028 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1029} 1030 1031MachineBasicBlock * 1032MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 1033 MachineBasicBlock *BB) const { 1034 switch (MI->getOpcode()) { 1035 default: 1036 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1037 case Mips::BPOSGE32_PSEUDO: 1038 return emitBPOSGE32(MI, BB); 1039 case Mips::SNZ_B_PSEUDO: 1040 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1041 case Mips::SNZ_H_PSEUDO: 1042 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1043 case Mips::SNZ_W_PSEUDO: 1044 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1045 case Mips::SNZ_D_PSEUDO: 1046 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1047 case Mips::SNZ_V_PSEUDO: 1048 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1049 case Mips::SZ_B_PSEUDO: 1050 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1051 case Mips::SZ_H_PSEUDO: 1052 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1053 case Mips::SZ_W_PSEUDO: 1054 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1055 case Mips::SZ_D_PSEUDO: 1056 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1057 case Mips::SZ_V_PSEUDO: 1058 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1059 case Mips::COPY_FW_PSEUDO: 1060 return emitCOPY_FW(MI, BB); 1061 case Mips::COPY_FD_PSEUDO: 1062 return emitCOPY_FD(MI, BB); 1063 case Mips::INSERT_FW_PSEUDO: 1064 return emitINSERT_FW(MI, BB); 1065 case Mips::INSERT_FD_PSEUDO: 1066 return emitINSERT_FD(MI, BB); 1067 case Mips::INSERT_B_VIDX_PSEUDO: 1068 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1069 case Mips::INSERT_H_VIDX_PSEUDO: 1070 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1071 case Mips::INSERT_W_VIDX_PSEUDO: 1072 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1073 case Mips::INSERT_D_VIDX_PSEUDO: 1074 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1075 case Mips::INSERT_FW_VIDX_PSEUDO: 1076 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1077 case Mips::INSERT_FD_VIDX_PSEUDO: 1078 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1079 case Mips::FILL_FW_PSEUDO: 1080 return emitFILL_FW(MI, BB); 1081 case Mips::FILL_FD_PSEUDO: 1082 return emitFILL_FD(MI, BB); 1083 case Mips::FEXP2_W_1_PSEUDO: 1084 return emitFEXP2_W_1(MI, BB); 1085 case Mips::FEXP2_D_1_PSEUDO: 1086 return emitFEXP2_D_1(MI, BB); 1087 } 1088} 1089 1090bool MipsSETargetLowering:: 1091isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, 1092 unsigned NextStackOffset, 1093 const MipsFunctionInfo& FI) const { 1094 if (!EnableMipsTailCalls) 1095 return false; 1096 1097 // Return false if either the callee or caller has a byval argument. 1098 if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) 1099 return false; 1100 1101 // Return true if the callee's argument area is no larger than the 1102 // caller's. 1103 return NextStackOffset <= FI.getIncomingArgSize(); 1104} 1105 1106void MipsSETargetLowering:: 1107getOpndList(SmallVectorImpl<SDValue> &Ops, 1108 std::deque< std::pair<unsigned, SDValue> > &RegsToPass, 1109 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1110 CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { 1111 Ops.push_back(Callee); 1112 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1113 InternalLinkage, CLI, Callee, Chain); 1114} 1115 1116SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1117 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1118 1119 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1120 return MipsTargetLowering::lowerLOAD(Op, DAG); 1121 1122 // Replace a double precision load with two i32 loads and a buildpair64. 1123 SDLoc DL(Op); 1124 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1125 EVT PtrVT = Ptr.getValueType(); 1126 1127 // i32 load from lower address. 1128 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, 1129 MachinePointerInfo(), Nd.isVolatile(), 1130 Nd.isNonTemporal(), Nd.isInvariant(), 1131 Nd.getAlignment()); 1132 1133 // i32 load from higher address. 1134 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 1135 SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, 1136 MachinePointerInfo(), Nd.isVolatile(), 1137 Nd.isNonTemporal(), Nd.isInvariant(), 1138 std::min(Nd.getAlignment(), 4U)); 1139 1140 if (!Subtarget->isLittle()) 1141 std::swap(Lo, Hi); 1142 1143 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1144 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1145 return DAG.getMergeValues(Ops, DL); 1146} 1147 1148SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1149 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1150 1151 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1152 return MipsTargetLowering::lowerSTORE(Op, DAG); 1153 1154 // Replace a double precision store with two extractelement64s and i32 stores. 1155 SDLoc DL(Op); 1156 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1157 EVT PtrVT = Ptr.getValueType(); 1158 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1159 Val, DAG.getConstant(0, MVT::i32)); 1160 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1161 Val, DAG.getConstant(1, MVT::i32)); 1162 1163 if (!Subtarget->isLittle()) 1164 std::swap(Lo, Hi); 1165 1166 // i32 store to lower address. 1167 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), 1168 Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), 1169 Nd.getTBAAInfo()); 1170 1171 // i32 store to higher address. 1172 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); 1173 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1174 Nd.isVolatile(), Nd.isNonTemporal(), 1175 std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); 1176} 1177 1178SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1179 bool HasLo, bool HasHi, 1180 SelectionDAG &DAG) const { 1181 EVT Ty = Op.getOperand(0).getValueType(); 1182 SDLoc DL(Op); 1183 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1184 Op.getOperand(0), Op.getOperand(1)); 1185 SDValue Lo, Hi; 1186 1187 if (HasLo) 1188 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1189 if (HasHi) 1190 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1191 1192 if (!HasLo || !HasHi) 1193 return HasLo ? Lo : Hi; 1194 1195 SDValue Vals[] = { Lo, Hi }; 1196 return DAG.getMergeValues(Vals, DL); 1197} 1198 1199 1200static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) { 1201 SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1202 DAG.getConstant(0, MVT::i32)); 1203 SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, 1204 DAG.getConstant(1, MVT::i32)); 1205 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1206} 1207 1208static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) { 1209 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1210 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1211 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1212} 1213 1214// This function expands mips intrinsic nodes which have 64-bit input operands 1215// or output values. 1216// 1217// out64 = intrinsic-node in64 1218// => 1219// lo = copy (extract-element (in64, 0)) 1220// hi = copy (extract-element (in64, 1)) 1221// mips-specific-node 1222// v0 = copy lo 1223// v1 = copy hi 1224// out64 = merge-values (v0, v1) 1225// 1226static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1227 SDLoc DL(Op); 1228 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1229 SmallVector<SDValue, 3> Ops; 1230 unsigned OpNo = 0; 1231 1232 // See if Op has a chain input. 1233 if (HasChainIn) 1234 Ops.push_back(Op->getOperand(OpNo++)); 1235 1236 // The next operand is the intrinsic opcode. 1237 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1238 1239 // See if the next operand has type i64. 1240 SDValue Opnd = Op->getOperand(++OpNo), In64; 1241 1242 if (Opnd.getValueType() == MVT::i64) 1243 In64 = initAccumulator(Opnd, DL, DAG); 1244 else 1245 Ops.push_back(Opnd); 1246 1247 // Push the remaining operands. 1248 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1249 Ops.push_back(Op->getOperand(OpNo)); 1250 1251 // Add In64 to the end of the list. 1252 if (In64.getNode()) 1253 Ops.push_back(In64); 1254 1255 // Scan output. 1256 SmallVector<EVT, 2> ResTys; 1257 1258 for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); 1259 I != E; ++I) 1260 ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); 1261 1262 // Create node. 1263 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1264 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1265 1266 if (!HasChainIn) 1267 return Out; 1268 1269 assert(Val->getValueType(1) == MVT::Other); 1270 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1271 return DAG.getMergeValues(Vals, DL); 1272} 1273 1274// Lower an MSA copy intrinsic into the specified SelectionDAG node 1275static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1276 SDLoc DL(Op); 1277 SDValue Vec = Op->getOperand(1); 1278 SDValue Idx = Op->getOperand(2); 1279 EVT ResTy = Op->getValueType(0); 1280 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1281 1282 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1283 DAG.getValueType(EltTy)); 1284 1285 return Result; 1286} 1287 1288static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1289 EVT ResVecTy = Op->getValueType(0); 1290 EVT ViaVecTy = ResVecTy; 1291 SDLoc DL(Op); 1292 1293 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1294 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1295 // lanes. 1296 SDValue LaneA; 1297 SDValue LaneB = Op->getOperand(2); 1298 1299 if (ResVecTy == MVT::v2i64) { 1300 LaneA = DAG.getConstant(0, MVT::i32); 1301 ViaVecTy = MVT::v4i32; 1302 } else 1303 LaneA = LaneB; 1304 1305 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1306 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1307 1308 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, 1309 makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1310 1311 if (ViaVecTy != ResVecTy) 1312 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); 1313 1314 return Result; 1315} 1316 1317static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { 1318 return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0)); 1319} 1320 1321static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1322 bool BigEndian, SelectionDAG &DAG) { 1323 EVT ViaVecTy = VecTy; 1324 SDValue SplatValueA = SplatValue; 1325 SDValue SplatValueB = SplatValue; 1326 SDLoc DL(SplatValue); 1327 1328 if (VecTy == MVT::v2i64) { 1329 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1330 ViaVecTy = MVT::v4i32; 1331 1332 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1333 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1334 DAG.getConstant(32, MVT::i32)); 1335 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1336 } 1337 1338 // We currently hold the parts in little endian order. Swap them if 1339 // necessary. 1340 if (BigEndian) 1341 std::swap(SplatValueA, SplatValueB); 1342 1343 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1344 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1345 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1346 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1347 1348 SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, 1349 makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1350 1351 if (VecTy != ViaVecTy) 1352 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1353 1354 return Result; 1355} 1356 1357static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1358 unsigned Opc, SDValue Imm, 1359 bool BigEndian) { 1360 EVT VecTy = Op->getValueType(0); 1361 SDValue Exp2Imm; 1362 SDLoc DL(Op); 1363 1364 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1365 // here for now. 1366 if (VecTy == MVT::v2i64) { 1367 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1368 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1369 1370 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32); 1371 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32); 1372 1373 if (BigEndian) 1374 std::swap(BitImmLoOp, BitImmHiOp); 1375 1376 Exp2Imm = 1377 DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, 1378 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp, 1379 BitImmHiOp, BitImmLoOp, BitImmHiOp)); 1380 } 1381 } 1382 1383 if (!Exp2Imm.getNode()) { 1384 // We couldnt constant fold, do a vector shift instead 1385 1386 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1387 // only values 0-63 are valid. 1388 if (VecTy == MVT::v2i64) 1389 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1390 1391 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1392 1393 Exp2Imm = 1394 DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm); 1395 } 1396 1397 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1398} 1399 1400static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1401 EVT ResTy = Op->getValueType(0); 1402 SDLoc DL(Op); 1403 SDValue One = DAG.getConstant(1, ResTy); 1404 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); 1405 1406 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1407 DAG.getNOT(DL, Bit, ResTy)); 1408} 1409 1410static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1411 SDLoc DL(Op); 1412 EVT ResTy = Op->getValueType(0); 1413 APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1) 1414 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); 1415 SDValue BitMask = DAG.getConstant(~BitImm, ResTy); 1416 1417 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1418} 1419 1420SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1421 SelectionDAG &DAG) const { 1422 SDLoc DL(Op); 1423 1424 switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { 1425 default: 1426 return SDValue(); 1427 case Intrinsic::mips_shilo: 1428 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1429 case Intrinsic::mips_dpau_h_qbl: 1430 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1431 case Intrinsic::mips_dpau_h_qbr: 1432 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1433 case Intrinsic::mips_dpsu_h_qbl: 1434 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1435 case Intrinsic::mips_dpsu_h_qbr: 1436 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1437 case Intrinsic::mips_dpa_w_ph: 1438 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1439 case Intrinsic::mips_dps_w_ph: 1440 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1441 case Intrinsic::mips_dpax_w_ph: 1442 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1443 case Intrinsic::mips_dpsx_w_ph: 1444 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1445 case Intrinsic::mips_mulsa_w_ph: 1446 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1447 case Intrinsic::mips_mult: 1448 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1449 case Intrinsic::mips_multu: 1450 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1451 case Intrinsic::mips_madd: 1452 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1453 case Intrinsic::mips_maddu: 1454 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1455 case Intrinsic::mips_msub: 1456 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1457 case Intrinsic::mips_msubu: 1458 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1459 case Intrinsic::mips_addv_b: 1460 case Intrinsic::mips_addv_h: 1461 case Intrinsic::mips_addv_w: 1462 case Intrinsic::mips_addv_d: 1463 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1464 Op->getOperand(2)); 1465 case Intrinsic::mips_addvi_b: 1466 case Intrinsic::mips_addvi_h: 1467 case Intrinsic::mips_addvi_w: 1468 case Intrinsic::mips_addvi_d: 1469 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1470 lowerMSASplatImm(Op, 2, DAG)); 1471 case Intrinsic::mips_and_v: 1472 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1473 Op->getOperand(2)); 1474 case Intrinsic::mips_andi_b: 1475 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1476 lowerMSASplatImm(Op, 2, DAG)); 1477 case Intrinsic::mips_bclr_b: 1478 case Intrinsic::mips_bclr_h: 1479 case Intrinsic::mips_bclr_w: 1480 case Intrinsic::mips_bclr_d: 1481 return lowerMSABitClear(Op, DAG); 1482 case Intrinsic::mips_bclri_b: 1483 case Intrinsic::mips_bclri_h: 1484 case Intrinsic::mips_bclri_w: 1485 case Intrinsic::mips_bclri_d: 1486 return lowerMSABitClearImm(Op, DAG); 1487 case Intrinsic::mips_binsli_b: 1488 case Intrinsic::mips_binsli_h: 1489 case Intrinsic::mips_binsli_w: 1490 case Intrinsic::mips_binsli_d: { 1491 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1492 EVT VecTy = Op->getValueType(0); 1493 EVT EltTy = VecTy.getVectorElementType(); 1494 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1495 Op->getConstantOperandVal(3)); 1496 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1497 DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), 1498 Op->getOperand(1)); 1499 } 1500 case Intrinsic::mips_binsri_b: 1501 case Intrinsic::mips_binsri_h: 1502 case Intrinsic::mips_binsri_w: 1503 case Intrinsic::mips_binsri_d: { 1504 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1505 EVT VecTy = Op->getValueType(0); 1506 EVT EltTy = VecTy.getVectorElementType(); 1507 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1508 Op->getConstantOperandVal(3)); 1509 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1510 DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), 1511 Op->getOperand(1)); 1512 } 1513 case Intrinsic::mips_bmnz_v: 1514 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1515 Op->getOperand(2), Op->getOperand(1)); 1516 case Intrinsic::mips_bmnzi_b: 1517 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1518 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1519 Op->getOperand(1)); 1520 case Intrinsic::mips_bmz_v: 1521 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1522 Op->getOperand(1), Op->getOperand(2)); 1523 case Intrinsic::mips_bmzi_b: 1524 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1525 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1526 Op->getOperand(2)); 1527 case Intrinsic::mips_bneg_b: 1528 case Intrinsic::mips_bneg_h: 1529 case Intrinsic::mips_bneg_w: 1530 case Intrinsic::mips_bneg_d: { 1531 EVT VecTy = Op->getValueType(0); 1532 SDValue One = DAG.getConstant(1, VecTy); 1533 1534 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1535 DAG.getNode(ISD::SHL, DL, VecTy, One, 1536 Op->getOperand(2))); 1537 } 1538 case Intrinsic::mips_bnegi_b: 1539 case Intrinsic::mips_bnegi_h: 1540 case Intrinsic::mips_bnegi_w: 1541 case Intrinsic::mips_bnegi_d: 1542 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1543 !Subtarget->isLittle()); 1544 case Intrinsic::mips_bnz_b: 1545 case Intrinsic::mips_bnz_h: 1546 case Intrinsic::mips_bnz_w: 1547 case Intrinsic::mips_bnz_d: 1548 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1549 Op->getOperand(1)); 1550 case Intrinsic::mips_bnz_v: 1551 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1552 Op->getOperand(1)); 1553 case Intrinsic::mips_bsel_v: 1554 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1555 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1556 Op->getOperand(1), Op->getOperand(3), 1557 Op->getOperand(2)); 1558 case Intrinsic::mips_bseli_b: 1559 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1560 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1561 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1562 Op->getOperand(2)); 1563 case Intrinsic::mips_bset_b: 1564 case Intrinsic::mips_bset_h: 1565 case Intrinsic::mips_bset_w: 1566 case Intrinsic::mips_bset_d: { 1567 EVT VecTy = Op->getValueType(0); 1568 SDValue One = DAG.getConstant(1, VecTy); 1569 1570 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1571 DAG.getNode(ISD::SHL, DL, VecTy, One, 1572 Op->getOperand(2))); 1573 } 1574 case Intrinsic::mips_bseti_b: 1575 case Intrinsic::mips_bseti_h: 1576 case Intrinsic::mips_bseti_w: 1577 case Intrinsic::mips_bseti_d: 1578 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1579 !Subtarget->isLittle()); 1580 case Intrinsic::mips_bz_b: 1581 case Intrinsic::mips_bz_h: 1582 case Intrinsic::mips_bz_w: 1583 case Intrinsic::mips_bz_d: 1584 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1585 Op->getOperand(1)); 1586 case Intrinsic::mips_bz_v: 1587 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1588 Op->getOperand(1)); 1589 case Intrinsic::mips_ceq_b: 1590 case Intrinsic::mips_ceq_h: 1591 case Intrinsic::mips_ceq_w: 1592 case Intrinsic::mips_ceq_d: 1593 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1594 Op->getOperand(2), ISD::SETEQ); 1595 case Intrinsic::mips_ceqi_b: 1596 case Intrinsic::mips_ceqi_h: 1597 case Intrinsic::mips_ceqi_w: 1598 case Intrinsic::mips_ceqi_d: 1599 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1600 lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); 1601 case Intrinsic::mips_cle_s_b: 1602 case Intrinsic::mips_cle_s_h: 1603 case Intrinsic::mips_cle_s_w: 1604 case Intrinsic::mips_cle_s_d: 1605 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1606 Op->getOperand(2), ISD::SETLE); 1607 case Intrinsic::mips_clei_s_b: 1608 case Intrinsic::mips_clei_s_h: 1609 case Intrinsic::mips_clei_s_w: 1610 case Intrinsic::mips_clei_s_d: 1611 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1612 lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); 1613 case Intrinsic::mips_cle_u_b: 1614 case Intrinsic::mips_cle_u_h: 1615 case Intrinsic::mips_cle_u_w: 1616 case Intrinsic::mips_cle_u_d: 1617 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1618 Op->getOperand(2), ISD::SETULE); 1619 case Intrinsic::mips_clei_u_b: 1620 case Intrinsic::mips_clei_u_h: 1621 case Intrinsic::mips_clei_u_w: 1622 case Intrinsic::mips_clei_u_d: 1623 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1624 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1625 case Intrinsic::mips_clt_s_b: 1626 case Intrinsic::mips_clt_s_h: 1627 case Intrinsic::mips_clt_s_w: 1628 case Intrinsic::mips_clt_s_d: 1629 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1630 Op->getOperand(2), ISD::SETLT); 1631 case Intrinsic::mips_clti_s_b: 1632 case Intrinsic::mips_clti_s_h: 1633 case Intrinsic::mips_clti_s_w: 1634 case Intrinsic::mips_clti_s_d: 1635 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1636 lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); 1637 case Intrinsic::mips_clt_u_b: 1638 case Intrinsic::mips_clt_u_h: 1639 case Intrinsic::mips_clt_u_w: 1640 case Intrinsic::mips_clt_u_d: 1641 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1642 Op->getOperand(2), ISD::SETULT); 1643 case Intrinsic::mips_clti_u_b: 1644 case Intrinsic::mips_clti_u_h: 1645 case Intrinsic::mips_clti_u_w: 1646 case Intrinsic::mips_clti_u_d: 1647 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1648 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1649 case Intrinsic::mips_copy_s_b: 1650 case Intrinsic::mips_copy_s_h: 1651 case Intrinsic::mips_copy_s_w: 1652 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1653 case Intrinsic::mips_copy_s_d: 1654 if (hasMips64()) 1655 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1656 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1657 else { 1658 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1659 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1660 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1661 Op->getValueType(0), Op->getOperand(1), 1662 Op->getOperand(2)); 1663 } 1664 case Intrinsic::mips_copy_u_b: 1665 case Intrinsic::mips_copy_u_h: 1666 case Intrinsic::mips_copy_u_w: 1667 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1668 case Intrinsic::mips_copy_u_d: 1669 if (hasMips64()) 1670 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1671 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1672 else { 1673 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1674 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1675 // Note: When i64 is illegal, this results in copy_s.w instructions 1676 // instead of copy_u.w instructions. This makes no difference to the 1677 // behaviour since i64 is only illegal when the register file is 32-bit. 1678 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1679 Op->getValueType(0), Op->getOperand(1), 1680 Op->getOperand(2)); 1681 } 1682 case Intrinsic::mips_div_s_b: 1683 case Intrinsic::mips_div_s_h: 1684 case Intrinsic::mips_div_s_w: 1685 case Intrinsic::mips_div_s_d: 1686 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1687 Op->getOperand(2)); 1688 case Intrinsic::mips_div_u_b: 1689 case Intrinsic::mips_div_u_h: 1690 case Intrinsic::mips_div_u_w: 1691 case Intrinsic::mips_div_u_d: 1692 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1693 Op->getOperand(2)); 1694 case Intrinsic::mips_fadd_w: 1695 case Intrinsic::mips_fadd_d: 1696 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1697 Op->getOperand(2)); 1698 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1699 case Intrinsic::mips_fceq_w: 1700 case Intrinsic::mips_fceq_d: 1701 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1702 Op->getOperand(2), ISD::SETOEQ); 1703 case Intrinsic::mips_fcle_w: 1704 case Intrinsic::mips_fcle_d: 1705 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1706 Op->getOperand(2), ISD::SETOLE); 1707 case Intrinsic::mips_fclt_w: 1708 case Intrinsic::mips_fclt_d: 1709 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1710 Op->getOperand(2), ISD::SETOLT); 1711 case Intrinsic::mips_fcne_w: 1712 case Intrinsic::mips_fcne_d: 1713 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1714 Op->getOperand(2), ISD::SETONE); 1715 case Intrinsic::mips_fcor_w: 1716 case Intrinsic::mips_fcor_d: 1717 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1718 Op->getOperand(2), ISD::SETO); 1719 case Intrinsic::mips_fcueq_w: 1720 case Intrinsic::mips_fcueq_d: 1721 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1722 Op->getOperand(2), ISD::SETUEQ); 1723 case Intrinsic::mips_fcule_w: 1724 case Intrinsic::mips_fcule_d: 1725 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1726 Op->getOperand(2), ISD::SETULE); 1727 case Intrinsic::mips_fcult_w: 1728 case Intrinsic::mips_fcult_d: 1729 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1730 Op->getOperand(2), ISD::SETULT); 1731 case Intrinsic::mips_fcun_w: 1732 case Intrinsic::mips_fcun_d: 1733 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1734 Op->getOperand(2), ISD::SETUO); 1735 case Intrinsic::mips_fcune_w: 1736 case Intrinsic::mips_fcune_d: 1737 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1738 Op->getOperand(2), ISD::SETUNE); 1739 case Intrinsic::mips_fdiv_w: 1740 case Intrinsic::mips_fdiv_d: 1741 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1742 Op->getOperand(2)); 1743 case Intrinsic::mips_ffint_u_w: 1744 case Intrinsic::mips_ffint_u_d: 1745 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1746 Op->getOperand(1)); 1747 case Intrinsic::mips_ffint_s_w: 1748 case Intrinsic::mips_ffint_s_d: 1749 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1750 Op->getOperand(1)); 1751 case Intrinsic::mips_fill_b: 1752 case Intrinsic::mips_fill_h: 1753 case Intrinsic::mips_fill_w: 1754 case Intrinsic::mips_fill_d: { 1755 SmallVector<SDValue, 16> Ops; 1756 EVT ResTy = Op->getValueType(0); 1757 1758 for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) 1759 Ops.push_back(Op->getOperand(1)); 1760 1761 // If ResTy is v2i64 then the type legalizer will break this node down into 1762 // an equivalent v4i32. 1763 return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, Ops); 1764 } 1765 case Intrinsic::mips_fexp2_w: 1766 case Intrinsic::mips_fexp2_d: { 1767 EVT ResTy = Op->getValueType(0); 1768 return DAG.getNode( 1769 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1770 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1771 } 1772 case Intrinsic::mips_flog2_w: 1773 case Intrinsic::mips_flog2_d: 1774 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1775 case Intrinsic::mips_fmadd_w: 1776 case Intrinsic::mips_fmadd_d: 1777 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1778 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1779 case Intrinsic::mips_fmul_w: 1780 case Intrinsic::mips_fmul_d: 1781 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1782 Op->getOperand(2)); 1783 case Intrinsic::mips_fmsub_w: 1784 case Intrinsic::mips_fmsub_d: { 1785 EVT ResTy = Op->getValueType(0); 1786 return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), 1787 DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, 1788 Op->getOperand(2), Op->getOperand(3))); 1789 } 1790 case Intrinsic::mips_frint_w: 1791 case Intrinsic::mips_frint_d: 1792 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1793 case Intrinsic::mips_fsqrt_w: 1794 case Intrinsic::mips_fsqrt_d: 1795 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1796 case Intrinsic::mips_fsub_w: 1797 case Intrinsic::mips_fsub_d: 1798 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1799 Op->getOperand(2)); 1800 case Intrinsic::mips_ftrunc_u_w: 1801 case Intrinsic::mips_ftrunc_u_d: 1802 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1803 Op->getOperand(1)); 1804 case Intrinsic::mips_ftrunc_s_w: 1805 case Intrinsic::mips_ftrunc_s_d: 1806 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1807 Op->getOperand(1)); 1808 case Intrinsic::mips_ilvev_b: 1809 case Intrinsic::mips_ilvev_h: 1810 case Intrinsic::mips_ilvev_w: 1811 case Intrinsic::mips_ilvev_d: 1812 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1813 Op->getOperand(1), Op->getOperand(2)); 1814 case Intrinsic::mips_ilvl_b: 1815 case Intrinsic::mips_ilvl_h: 1816 case Intrinsic::mips_ilvl_w: 1817 case Intrinsic::mips_ilvl_d: 1818 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1819 Op->getOperand(1), Op->getOperand(2)); 1820 case Intrinsic::mips_ilvod_b: 1821 case Intrinsic::mips_ilvod_h: 1822 case Intrinsic::mips_ilvod_w: 1823 case Intrinsic::mips_ilvod_d: 1824 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1825 Op->getOperand(1), Op->getOperand(2)); 1826 case Intrinsic::mips_ilvr_b: 1827 case Intrinsic::mips_ilvr_h: 1828 case Intrinsic::mips_ilvr_w: 1829 case Intrinsic::mips_ilvr_d: 1830 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1831 Op->getOperand(1), Op->getOperand(2)); 1832 case Intrinsic::mips_insert_b: 1833 case Intrinsic::mips_insert_h: 1834 case Intrinsic::mips_insert_w: 1835 case Intrinsic::mips_insert_d: 1836 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 1837 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 1838 case Intrinsic::mips_insve_b: 1839 case Intrinsic::mips_insve_h: 1840 case Intrinsic::mips_insve_w: 1841 case Intrinsic::mips_insve_d: 1842 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 1843 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 1844 DAG.getConstant(0, MVT::i32)); 1845 case Intrinsic::mips_ldi_b: 1846 case Intrinsic::mips_ldi_h: 1847 case Intrinsic::mips_ldi_w: 1848 case Intrinsic::mips_ldi_d: 1849 return lowerMSASplatImm(Op, 1, DAG); 1850 case Intrinsic::mips_lsa: 1851 case Intrinsic::mips_dlsa: { 1852 EVT ResTy = Op->getValueType(0); 1853 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1854 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 1855 Op->getOperand(2), Op->getOperand(3))); 1856 } 1857 case Intrinsic::mips_maddv_b: 1858 case Intrinsic::mips_maddv_h: 1859 case Intrinsic::mips_maddv_w: 1860 case Intrinsic::mips_maddv_d: { 1861 EVT ResTy = Op->getValueType(0); 1862 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 1863 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1864 Op->getOperand(2), Op->getOperand(3))); 1865 } 1866 case Intrinsic::mips_max_s_b: 1867 case Intrinsic::mips_max_s_h: 1868 case Intrinsic::mips_max_s_w: 1869 case Intrinsic::mips_max_s_d: 1870 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1871 Op->getOperand(1), Op->getOperand(2)); 1872 case Intrinsic::mips_max_u_b: 1873 case Intrinsic::mips_max_u_h: 1874 case Intrinsic::mips_max_u_w: 1875 case Intrinsic::mips_max_u_d: 1876 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1877 Op->getOperand(1), Op->getOperand(2)); 1878 case Intrinsic::mips_maxi_s_b: 1879 case Intrinsic::mips_maxi_s_h: 1880 case Intrinsic::mips_maxi_s_w: 1881 case Intrinsic::mips_maxi_s_d: 1882 return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), 1883 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1884 case Intrinsic::mips_maxi_u_b: 1885 case Intrinsic::mips_maxi_u_h: 1886 case Intrinsic::mips_maxi_u_w: 1887 case Intrinsic::mips_maxi_u_d: 1888 return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), 1889 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1890 case Intrinsic::mips_min_s_b: 1891 case Intrinsic::mips_min_s_h: 1892 case Intrinsic::mips_min_s_w: 1893 case Intrinsic::mips_min_s_d: 1894 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1895 Op->getOperand(1), Op->getOperand(2)); 1896 case Intrinsic::mips_min_u_b: 1897 case Intrinsic::mips_min_u_h: 1898 case Intrinsic::mips_min_u_w: 1899 case Intrinsic::mips_min_u_d: 1900 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1901 Op->getOperand(1), Op->getOperand(2)); 1902 case Intrinsic::mips_mini_s_b: 1903 case Intrinsic::mips_mini_s_h: 1904 case Intrinsic::mips_mini_s_w: 1905 case Intrinsic::mips_mini_s_d: 1906 return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), 1907 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1908 case Intrinsic::mips_mini_u_b: 1909 case Intrinsic::mips_mini_u_h: 1910 case Intrinsic::mips_mini_u_w: 1911 case Intrinsic::mips_mini_u_d: 1912 return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), 1913 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1914 case Intrinsic::mips_mod_s_b: 1915 case Intrinsic::mips_mod_s_h: 1916 case Intrinsic::mips_mod_s_w: 1917 case Intrinsic::mips_mod_s_d: 1918 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 1919 Op->getOperand(2)); 1920 case Intrinsic::mips_mod_u_b: 1921 case Intrinsic::mips_mod_u_h: 1922 case Intrinsic::mips_mod_u_w: 1923 case Intrinsic::mips_mod_u_d: 1924 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 1925 Op->getOperand(2)); 1926 case Intrinsic::mips_mulv_b: 1927 case Intrinsic::mips_mulv_h: 1928 case Intrinsic::mips_mulv_w: 1929 case Intrinsic::mips_mulv_d: 1930 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 1931 Op->getOperand(2)); 1932 case Intrinsic::mips_msubv_b: 1933 case Intrinsic::mips_msubv_h: 1934 case Intrinsic::mips_msubv_w: 1935 case Intrinsic::mips_msubv_d: { 1936 EVT ResTy = Op->getValueType(0); 1937 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 1938 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 1939 Op->getOperand(2), Op->getOperand(3))); 1940 } 1941 case Intrinsic::mips_nlzc_b: 1942 case Intrinsic::mips_nlzc_h: 1943 case Intrinsic::mips_nlzc_w: 1944 case Intrinsic::mips_nlzc_d: 1945 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 1946 case Intrinsic::mips_nor_v: { 1947 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1948 Op->getOperand(1), Op->getOperand(2)); 1949 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1950 } 1951 case Intrinsic::mips_nori_b: { 1952 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1953 Op->getOperand(1), 1954 lowerMSASplatImm(Op, 2, DAG)); 1955 return DAG.getNOT(DL, Res, Res->getValueType(0)); 1956 } 1957 case Intrinsic::mips_or_v: 1958 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 1959 Op->getOperand(2)); 1960 case Intrinsic::mips_ori_b: 1961 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 1962 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1963 case Intrinsic::mips_pckev_b: 1964 case Intrinsic::mips_pckev_h: 1965 case Intrinsic::mips_pckev_w: 1966 case Intrinsic::mips_pckev_d: 1967 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 1968 Op->getOperand(1), Op->getOperand(2)); 1969 case Intrinsic::mips_pckod_b: 1970 case Intrinsic::mips_pckod_h: 1971 case Intrinsic::mips_pckod_w: 1972 case Intrinsic::mips_pckod_d: 1973 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 1974 Op->getOperand(1), Op->getOperand(2)); 1975 case Intrinsic::mips_pcnt_b: 1976 case Intrinsic::mips_pcnt_h: 1977 case Intrinsic::mips_pcnt_w: 1978 case Intrinsic::mips_pcnt_d: 1979 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 1980 case Intrinsic::mips_shf_b: 1981 case Intrinsic::mips_shf_h: 1982 case Intrinsic::mips_shf_w: 1983 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 1984 Op->getOperand(2), Op->getOperand(1)); 1985 case Intrinsic::mips_sll_b: 1986 case Intrinsic::mips_sll_h: 1987 case Intrinsic::mips_sll_w: 1988 case Intrinsic::mips_sll_d: 1989 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 1990 Op->getOperand(2)); 1991 case Intrinsic::mips_slli_b: 1992 case Intrinsic::mips_slli_h: 1993 case Intrinsic::mips_slli_w: 1994 case Intrinsic::mips_slli_d: 1995 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 1996 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 1997 case Intrinsic::mips_splat_b: 1998 case Intrinsic::mips_splat_h: 1999 case Intrinsic::mips_splat_w: 2000 case Intrinsic::mips_splat_d: 2001 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2002 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2003 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2004 // Instead we lower to MipsISD::VSHF and match from there. 2005 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2006 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2007 Op->getOperand(1)); 2008 case Intrinsic::mips_splati_b: 2009 case Intrinsic::mips_splati_h: 2010 case Intrinsic::mips_splati_w: 2011 case Intrinsic::mips_splati_d: 2012 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2013 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2014 Op->getOperand(1)); 2015 case Intrinsic::mips_sra_b: 2016 case Intrinsic::mips_sra_h: 2017 case Intrinsic::mips_sra_w: 2018 case Intrinsic::mips_sra_d: 2019 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2020 Op->getOperand(2)); 2021 case Intrinsic::mips_srai_b: 2022 case Intrinsic::mips_srai_h: 2023 case Intrinsic::mips_srai_w: 2024 case Intrinsic::mips_srai_d: 2025 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2026 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2027 case Intrinsic::mips_srl_b: 2028 case Intrinsic::mips_srl_h: 2029 case Intrinsic::mips_srl_w: 2030 case Intrinsic::mips_srl_d: 2031 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2032 Op->getOperand(2)); 2033 case Intrinsic::mips_srli_b: 2034 case Intrinsic::mips_srli_h: 2035 case Intrinsic::mips_srli_w: 2036 case Intrinsic::mips_srli_d: 2037 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2038 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2039 case Intrinsic::mips_subv_b: 2040 case Intrinsic::mips_subv_h: 2041 case Intrinsic::mips_subv_w: 2042 case Intrinsic::mips_subv_d: 2043 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2044 Op->getOperand(2)); 2045 case Intrinsic::mips_subvi_b: 2046 case Intrinsic::mips_subvi_h: 2047 case Intrinsic::mips_subvi_w: 2048 case Intrinsic::mips_subvi_d: 2049 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2050 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2051 case Intrinsic::mips_vshf_b: 2052 case Intrinsic::mips_vshf_h: 2053 case Intrinsic::mips_vshf_w: 2054 case Intrinsic::mips_vshf_d: 2055 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2056 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2057 case Intrinsic::mips_xor_v: 2058 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2059 Op->getOperand(2)); 2060 case Intrinsic::mips_xori_b: 2061 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2062 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2063 } 2064} 2065 2066static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 2067 SDLoc DL(Op); 2068 SDValue ChainIn = Op->getOperand(0); 2069 SDValue Address = Op->getOperand(2); 2070 SDValue Offset = Op->getOperand(3); 2071 EVT ResTy = Op->getValueType(0); 2072 EVT PtrTy = Address->getValueType(0); 2073 2074 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2075 2076 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, 2077 false, false, 16); 2078} 2079 2080SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2081 SelectionDAG &DAG) const { 2082 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2083 switch (Intr) { 2084 default: 2085 return SDValue(); 2086 case Intrinsic::mips_extp: 2087 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2088 case Intrinsic::mips_extpdp: 2089 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2090 case Intrinsic::mips_extr_w: 2091 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2092 case Intrinsic::mips_extr_r_w: 2093 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2094 case Intrinsic::mips_extr_rs_w: 2095 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2096 case Intrinsic::mips_extr_s_h: 2097 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2098 case Intrinsic::mips_mthlip: 2099 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2100 case Intrinsic::mips_mulsaq_s_w_ph: 2101 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2102 case Intrinsic::mips_maq_s_w_phl: 2103 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2104 case Intrinsic::mips_maq_s_w_phr: 2105 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2106 case Intrinsic::mips_maq_sa_w_phl: 2107 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2108 case Intrinsic::mips_maq_sa_w_phr: 2109 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2110 case Intrinsic::mips_dpaq_s_w_ph: 2111 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2112 case Intrinsic::mips_dpsq_s_w_ph: 2113 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2114 case Intrinsic::mips_dpaq_sa_l_w: 2115 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2116 case Intrinsic::mips_dpsq_sa_l_w: 2117 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2118 case Intrinsic::mips_dpaqx_s_w_ph: 2119 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2120 case Intrinsic::mips_dpaqx_sa_w_ph: 2121 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2122 case Intrinsic::mips_dpsqx_s_w_ph: 2123 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2124 case Intrinsic::mips_dpsqx_sa_w_ph: 2125 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2126 case Intrinsic::mips_ld_b: 2127 case Intrinsic::mips_ld_h: 2128 case Intrinsic::mips_ld_w: 2129 case Intrinsic::mips_ld_d: 2130 return lowerMSALoadIntr(Op, DAG, Intr); 2131 } 2132} 2133 2134static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { 2135 SDLoc DL(Op); 2136 SDValue ChainIn = Op->getOperand(0); 2137 SDValue Value = Op->getOperand(2); 2138 SDValue Address = Op->getOperand(3); 2139 SDValue Offset = Op->getOperand(4); 2140 EVT PtrTy = Address->getValueType(0); 2141 2142 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2143 2144 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, 2145 false, 16); 2146} 2147 2148SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2149 SelectionDAG &DAG) const { 2150 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 2151 switch (Intr) { 2152 default: 2153 return SDValue(); 2154 case Intrinsic::mips_st_b: 2155 case Intrinsic::mips_st_h: 2156 case Intrinsic::mips_st_w: 2157 case Intrinsic::mips_st_d: 2158 return lowerMSAStoreIntr(Op, DAG, Intr); 2159 } 2160} 2161 2162/// \brief Check if the given BuildVectorSDNode is a splat. 2163/// This method currently relies on DAG nodes being reused when equivalent, 2164/// so it's possible for this to return false even when isConstantSplat returns 2165/// true. 2166static bool isSplatVector(const BuildVectorSDNode *N) { 2167 unsigned int nOps = N->getNumOperands(); 2168 assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); 2169 2170 SDValue Operand0 = N->getOperand(0); 2171 2172 for (unsigned int i = 1; i < nOps; ++i) { 2173 if (N->getOperand(i) != Operand0) 2174 return false; 2175 } 2176 2177 return true; 2178} 2179 2180// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2181// 2182// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2183// choose to sign-extend but we could have equally chosen zero-extend. The 2184// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2185// result into this node later (possibly changing it to a zero-extend in the 2186// process). 2187SDValue MipsSETargetLowering:: 2188lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2189 SDLoc DL(Op); 2190 EVT ResTy = Op->getValueType(0); 2191 SDValue Op0 = Op->getOperand(0); 2192 EVT VecTy = Op0->getValueType(0); 2193 2194 if (!VecTy.is128BitVector()) 2195 return SDValue(); 2196 2197 if (ResTy.isInteger()) { 2198 SDValue Op1 = Op->getOperand(1); 2199 EVT EltTy = VecTy.getVectorElementType(); 2200 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2201 DAG.getValueType(EltTy)); 2202 } 2203 2204 return Op; 2205} 2206 2207static bool isConstantOrUndef(const SDValue Op) { 2208 if (Op->getOpcode() == ISD::UNDEF) 2209 return true; 2210 if (dyn_cast<ConstantSDNode>(Op)) 2211 return true; 2212 if (dyn_cast<ConstantFPSDNode>(Op)) 2213 return true; 2214 return false; 2215} 2216 2217static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2218 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2219 if (isConstantOrUndef(Op->getOperand(i))) 2220 return true; 2221 return false; 2222} 2223 2224// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2225// backend. 2226// 2227// Lowers according to the following rules: 2228// - Constant splats are legal as-is as long as the SplatBitSize is a power of 2229// 2 less than or equal to 64 and the value fits into a signed 10-bit 2230// immediate 2231// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2232// is a power of 2 less than or equal to 64 and the value does not fit into a 2233// signed 10-bit immediate 2234// - Non-constant splats are legal as-is. 2235// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2236// - All others are illegal and must be expanded. 2237SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2238 SelectionDAG &DAG) const { 2239 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2240 EVT ResTy = Op->getValueType(0); 2241 SDLoc DL(Op); 2242 APInt SplatValue, SplatUndef; 2243 unsigned SplatBitSize; 2244 bool HasAnyUndefs; 2245 2246 if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) 2247 return SDValue(); 2248 2249 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2250 HasAnyUndefs, 8, 2251 !Subtarget->isLittle()) && SplatBitSize <= 64) { 2252 // We can only cope with 8, 16, 32, or 64-bit elements 2253 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2254 SplatBitSize != 64) 2255 return SDValue(); 2256 2257 // If the value fits into a simm10 then we can use ldi.[bhwd] 2258 // However, if it isn't an integer type we will have to bitcast from an 2259 // integer type first. Also, if there are any undefs, we must lower them 2260 // to defined values first. 2261 if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) 2262 return Op; 2263 2264 EVT ViaVecTy; 2265 2266 switch (SplatBitSize) { 2267 default: 2268 return SDValue(); 2269 case 8: 2270 ViaVecTy = MVT::v16i8; 2271 break; 2272 case 16: 2273 ViaVecTy = MVT::v8i16; 2274 break; 2275 case 32: 2276 ViaVecTy = MVT::v4i32; 2277 break; 2278 case 64: 2279 // There's no fill.d to fall back on for 64-bit values 2280 return SDValue(); 2281 } 2282 2283 // SelectionDAG::getConstant will promote SplatValue appropriately. 2284 SDValue Result = DAG.getConstant(SplatValue, ViaVecTy); 2285 2286 // Bitcast to the type we originally wanted 2287 if (ViaVecTy != ResTy) 2288 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2289 2290 return Result; 2291 } else if (isSplatVector(Node)) 2292 return Op; 2293 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2294 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2295 // The resulting code is the same length as the expansion, but it doesn't 2296 // use memory operations 2297 EVT ResTy = Node->getValueType(0); 2298 2299 assert(ResTy.isVector()); 2300 2301 unsigned NumElts = ResTy.getVectorNumElements(); 2302 SDValue Vector = DAG.getUNDEF(ResTy); 2303 for (unsigned i = 0; i < NumElts; ++i) { 2304 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2305 Node->getOperand(i), 2306 DAG.getConstant(i, MVT::i32)); 2307 } 2308 return Vector; 2309 } 2310 2311 return SDValue(); 2312} 2313 2314// Lower VECTOR_SHUFFLE into SHF (if possible). 2315// 2316// SHF splits the vector into blocks of four elements, then shuffles these 2317// elements according to a <4 x i2> constant (encoded as an integer immediate). 2318// 2319// It is therefore possible to lower into SHF when the mask takes the form: 2320// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2321// When undef's appear they are treated as if they were whatever value is 2322// necessary in order to fit the above form. 2323// 2324// For example: 2325// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2326// <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2327// i32 7, i32 6, i32 5, i32 4> 2328// is lowered to: 2329// (SHF_H $w0, $w1, 27) 2330// where the 27 comes from: 2331// 3 + (2 << 2) + (1 << 4) + (0 << 6) 2332static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2333 SmallVector<int, 16> Indices, 2334 SelectionDAG &DAG) { 2335 int SHFIndices[4] = { -1, -1, -1, -1 }; 2336 2337 if (Indices.size() < 4) 2338 return SDValue(); 2339 2340 for (unsigned i = 0; i < 4; ++i) { 2341 for (unsigned j = i; j < Indices.size(); j += 4) { 2342 int Idx = Indices[j]; 2343 2344 // Convert from vector index to 4-element subvector index 2345 // If an index refers to an element outside of the subvector then give up 2346 if (Idx != -1) { 2347 Idx -= 4 * (j / 4); 2348 if (Idx < 0 || Idx >= 4) 2349 return SDValue(); 2350 } 2351 2352 // If the mask has an undef, replace it with the current index. 2353 // Note that it might still be undef if the current index is also undef 2354 if (SHFIndices[i] == -1) 2355 SHFIndices[i] = Idx; 2356 2357 // Check that non-undef values are the same as in the mask. If they 2358 // aren't then give up 2359 if (!(Idx == -1 || Idx == SHFIndices[i])) 2360 return SDValue(); 2361 } 2362 } 2363 2364 // Calculate the immediate. Replace any remaining undefs with zero 2365 APInt Imm(32, 0); 2366 for (int i = 3; i >= 0; --i) { 2367 int Idx = SHFIndices[i]; 2368 2369 if (Idx == -1) 2370 Idx = 0; 2371 2372 Imm <<= 2; 2373 Imm |= Idx & 0x3; 2374 } 2375 2376 return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, 2377 DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); 2378} 2379 2380// Lower VECTOR_SHUFFLE into ILVEV (if possible). 2381// 2382// ILVEV interleaves the even elements from each vector. 2383// 2384// It is possible to lower into ILVEV when the mask takes the form: 2385// <0, n, 2, n+2, 4, n+4, ...> 2386// where n is the number of elements in the vector. 2387// 2388// When undef's appear in the mask they are treated as if they were whatever 2389// value is necessary in order to fit the above form. 2390static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2391 SmallVector<int, 16> Indices, 2392 SelectionDAG &DAG) { 2393 assert ((Indices.size() % 2) == 0); 2394 int WsIdx = 0; 2395 int WtIdx = ResTy.getVectorNumElements(); 2396 2397 for (unsigned i = 0; i < Indices.size(); i += 2) { 2398 if (Indices[i] != -1 && Indices[i] != WsIdx) 2399 return SDValue(); 2400 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2401 return SDValue(); 2402 WsIdx += 2; 2403 WtIdx += 2; 2404 } 2405 2406 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), 2407 Op->getOperand(1)); 2408} 2409 2410// Lower VECTOR_SHUFFLE into ILVOD (if possible). 2411// 2412// ILVOD interleaves the odd elements from each vector. 2413// 2414// It is possible to lower into ILVOD when the mask takes the form: 2415// <1, n+1, 3, n+3, 5, n+5, ...> 2416// where n is the number of elements in the vector. 2417// 2418// When undef's appear in the mask they are treated as if they were whatever 2419// value is necessary in order to fit the above form. 2420static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2421 SmallVector<int, 16> Indices, 2422 SelectionDAG &DAG) { 2423 assert ((Indices.size() % 2) == 0); 2424 int WsIdx = 1; 2425 int WtIdx = ResTy.getVectorNumElements() + 1; 2426 2427 for (unsigned i = 0; i < Indices.size(); i += 2) { 2428 if (Indices[i] != -1 && Indices[i] != WsIdx) 2429 return SDValue(); 2430 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2431 return SDValue(); 2432 WsIdx += 2; 2433 WtIdx += 2; 2434 } 2435 2436 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), 2437 Op->getOperand(1)); 2438} 2439 2440// Lower VECTOR_SHUFFLE into ILVL (if possible). 2441// 2442// ILVL interleaves consecutive elements from the left half of each vector. 2443// 2444// It is possible to lower into ILVL when the mask takes the form: 2445// <0, n, 1, n+1, 2, n+2, ...> 2446// where n is the number of elements in the vector. 2447// 2448// When undef's appear in the mask they are treated as if they were whatever 2449// value is necessary in order to fit the above form. 2450static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2451 SmallVector<int, 16> Indices, 2452 SelectionDAG &DAG) { 2453 assert ((Indices.size() % 2) == 0); 2454 int WsIdx = 0; 2455 int WtIdx = ResTy.getVectorNumElements(); 2456 2457 for (unsigned i = 0; i < Indices.size(); i += 2) { 2458 if (Indices[i] != -1 && Indices[i] != WsIdx) 2459 return SDValue(); 2460 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2461 return SDValue(); 2462 WsIdx ++; 2463 WtIdx ++; 2464 } 2465 2466 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), 2467 Op->getOperand(1)); 2468} 2469 2470// Lower VECTOR_SHUFFLE into ILVR (if possible). 2471// 2472// ILVR interleaves consecutive elements from the right half of each vector. 2473// 2474// It is possible to lower into ILVR when the mask takes the form: 2475// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2476// where n is the number of elements in the vector and x is half n. 2477// 2478// When undef's appear in the mask they are treated as if they were whatever 2479// value is necessary in order to fit the above form. 2480static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2481 SmallVector<int, 16> Indices, 2482 SelectionDAG &DAG) { 2483 assert ((Indices.size() % 2) == 0); 2484 unsigned NumElts = ResTy.getVectorNumElements(); 2485 int WsIdx = NumElts / 2; 2486 int WtIdx = NumElts + NumElts / 2; 2487 2488 for (unsigned i = 0; i < Indices.size(); i += 2) { 2489 if (Indices[i] != -1 && Indices[i] != WsIdx) 2490 return SDValue(); 2491 if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) 2492 return SDValue(); 2493 WsIdx ++; 2494 WtIdx ++; 2495 } 2496 2497 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), 2498 Op->getOperand(1)); 2499} 2500 2501// Lower VECTOR_SHUFFLE into PCKEV (if possible). 2502// 2503// PCKEV copies the even elements of each vector into the result vector. 2504// 2505// It is possible to lower into PCKEV when the mask takes the form: 2506// <0, 2, 4, ..., n, n+2, n+4, ...> 2507// where n is the number of elements in the vector. 2508// 2509// When undef's appear in the mask they are treated as if they were whatever 2510// value is necessary in order to fit the above form. 2511static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2512 SmallVector<int, 16> Indices, 2513 SelectionDAG &DAG) { 2514 assert ((Indices.size() % 2) == 0); 2515 int Idx = 0; 2516 2517 for (unsigned i = 0; i < Indices.size(); ++i) { 2518 if (Indices[i] != -1 && Indices[i] != Idx) 2519 return SDValue(); 2520 Idx += 2; 2521 } 2522 2523 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), 2524 Op->getOperand(1)); 2525} 2526 2527// Lower VECTOR_SHUFFLE into PCKOD (if possible). 2528// 2529// PCKOD copies the odd elements of each vector into the result vector. 2530// 2531// It is possible to lower into PCKOD when the mask takes the form: 2532// <1, 3, 5, ..., n+1, n+3, n+5, ...> 2533// where n is the number of elements in the vector. 2534// 2535// When undef's appear in the mask they are treated as if they were whatever 2536// value is necessary in order to fit the above form. 2537static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2538 SmallVector<int, 16> Indices, 2539 SelectionDAG &DAG) { 2540 assert ((Indices.size() % 2) == 0); 2541 int Idx = 1; 2542 2543 for (unsigned i = 0; i < Indices.size(); ++i) { 2544 if (Indices[i] != -1 && Indices[i] != Idx) 2545 return SDValue(); 2546 Idx += 2; 2547 } 2548 2549 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), 2550 Op->getOperand(1)); 2551} 2552 2553// Lower VECTOR_SHUFFLE into VSHF. 2554// 2555// This mostly consists of converting the shuffle indices in Indices into a 2556// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2557// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2558// if the type is v8i16 and all the indices are less than 8 then the second 2559// operand is unused and can be replaced with anything. We choose to replace it 2560// with the used operand since this reduces the number of instructions overall. 2561static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2562 SmallVector<int, 16> Indices, 2563 SelectionDAG &DAG) { 2564 SmallVector<SDValue, 16> Ops; 2565 SDValue Op0; 2566 SDValue Op1; 2567 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2568 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2569 bool Using1stVec = false; 2570 bool Using2ndVec = false; 2571 SDLoc DL(Op); 2572 int ResTyNumElts = ResTy.getVectorNumElements(); 2573 2574 for (int i = 0; i < ResTyNumElts; ++i) { 2575 // Idx == -1 means UNDEF 2576 int Idx = Indices[i]; 2577 2578 if (0 <= Idx && Idx < ResTyNumElts) 2579 Using1stVec = true; 2580 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 2581 Using2ndVec = true; 2582 } 2583 2584 for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end(); 2585 ++I) 2586 Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); 2587 2588 SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops); 2589 2590 if (Using1stVec && Using2ndVec) { 2591 Op0 = Op->getOperand(0); 2592 Op1 = Op->getOperand(1); 2593 } else if (Using1stVec) 2594 Op0 = Op1 = Op->getOperand(0); 2595 else if (Using2ndVec) 2596 Op0 = Op1 = Op->getOperand(1); 2597 else 2598 llvm_unreachable("shuffle vector mask references neither vector operand?"); 2599 2600 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 2601 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 2602 // VSHF concatenates the vectors in a bitwise fashion: 2603 // <0b00, 0b01> + <0b10, 0b11> -> 2604 // 0b0100 + 0b1110 -> 0b01001110 2605 // <0b10, 0b11, 0b00, 0b01> 2606 // We must therefore swap the operands to get the correct result. 2607 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 2608} 2609 2610// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 2611// indices in the shuffle. 2612SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 2613 SelectionDAG &DAG) const { 2614 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 2615 EVT ResTy = Op->getValueType(0); 2616 2617 if (!ResTy.is128BitVector()) 2618 return SDValue(); 2619 2620 int ResTyNumElts = ResTy.getVectorNumElements(); 2621 SmallVector<int, 16> Indices; 2622 2623 for (int i = 0; i < ResTyNumElts; ++i) 2624 Indices.push_back(Node->getMaskElt(i)); 2625 2626 SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); 2627 if (Result.getNode()) 2628 return Result; 2629 Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); 2630 if (Result.getNode()) 2631 return Result; 2632 Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); 2633 if (Result.getNode()) 2634 return Result; 2635 Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); 2636 if (Result.getNode()) 2637 return Result; 2638 Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); 2639 if (Result.getNode()) 2640 return Result; 2641 Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); 2642 if (Result.getNode()) 2643 return Result; 2644 Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); 2645 if (Result.getNode()) 2646 return Result; 2647 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); 2648} 2649 2650MachineBasicBlock * MipsSETargetLowering:: 2651emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ 2652 // $bb: 2653 // bposge32_pseudo $vr0 2654 // => 2655 // $bb: 2656 // bposge32 $tbb 2657 // $fbb: 2658 // li $vr2, 0 2659 // b $sink 2660 // $tbb: 2661 // li $vr1, 1 2662 // $sink: 2663 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 2664 2665 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2666 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2667 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2668 DebugLoc DL = MI->getDebugLoc(); 2669 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2670 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 2671 MachineFunction *F = BB->getParent(); 2672 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2673 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2674 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2675 F->insert(It, FBB); 2676 F->insert(It, TBB); 2677 F->insert(It, Sink); 2678 2679 // Transfer the remainder of BB and its successor edges to Sink. 2680 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 2681 BB->end()); 2682 Sink->transferSuccessorsAndUpdatePHIs(BB); 2683 2684 // Add successors. 2685 BB->addSuccessor(FBB); 2686 BB->addSuccessor(TBB); 2687 FBB->addSuccessor(Sink); 2688 TBB->addSuccessor(Sink); 2689 2690 // Insert the real bposge32 instruction to $BB. 2691 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 2692 2693 // Fill $FBB. 2694 unsigned VR2 = RegInfo.createVirtualRegister(RC); 2695 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 2696 .addReg(Mips::ZERO).addImm(0); 2697 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2698 2699 // Fill $TBB. 2700 unsigned VR1 = RegInfo.createVirtualRegister(RC); 2701 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 2702 .addReg(Mips::ZERO).addImm(1); 2703 2704 // Insert phi function to $Sink. 2705 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2706 MI->getOperand(0).getReg()) 2707 .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); 2708 2709 MI->eraseFromParent(); // The pseudo instruction is gone now. 2710 return Sink; 2711} 2712 2713MachineBasicBlock * MipsSETargetLowering:: 2714emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, 2715 unsigned BranchOp) const{ 2716 // $bb: 2717 // vany_nonzero $rd, $ws 2718 // => 2719 // $bb: 2720 // bnz.b $ws, $tbb 2721 // b $fbb 2722 // $fbb: 2723 // li $rd1, 0 2724 // b $sink 2725 // $tbb: 2726 // li $rd2, 1 2727 // $sink: 2728 // $rd = phi($rd1, $fbb, $rd2, $tbb) 2729 2730 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2731 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2732 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 2733 DebugLoc DL = MI->getDebugLoc(); 2734 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2735 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 2736 MachineFunction *F = BB->getParent(); 2737 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 2738 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 2739 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 2740 F->insert(It, FBB); 2741 F->insert(It, TBB); 2742 F->insert(It, Sink); 2743 2744 // Transfer the remainder of BB and its successor edges to Sink. 2745 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 2746 BB->end()); 2747 Sink->transferSuccessorsAndUpdatePHIs(BB); 2748 2749 // Add successors. 2750 BB->addSuccessor(FBB); 2751 BB->addSuccessor(TBB); 2752 FBB->addSuccessor(Sink); 2753 TBB->addSuccessor(Sink); 2754 2755 // Insert the real bnz.b instruction to $BB. 2756 BuildMI(BB, DL, TII->get(BranchOp)) 2757 .addReg(MI->getOperand(1).getReg()) 2758 .addMBB(TBB); 2759 2760 // Fill $FBB. 2761 unsigned RD1 = RegInfo.createVirtualRegister(RC); 2762 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 2763 .addReg(Mips::ZERO).addImm(0); 2764 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 2765 2766 // Fill $TBB. 2767 unsigned RD2 = RegInfo.createVirtualRegister(RC); 2768 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 2769 .addReg(Mips::ZERO).addImm(1); 2770 2771 // Insert phi function to $Sink. 2772 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 2773 MI->getOperand(0).getReg()) 2774 .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB); 2775 2776 MI->eraseFromParent(); // The pseudo instruction is gone now. 2777 return Sink; 2778} 2779 2780// Emit the COPY_FW pseudo instruction. 2781// 2782// copy_fw_pseudo $fd, $ws, n 2783// => 2784// copy_u_w $rt, $ws, $n 2785// mtc1 $rt, $fd 2786// 2787// When n is zero, the equivalent operation can be performed with (potentially) 2788// zero instructions due to register overlaps. This optimization is never valid 2789// for lane 1 because it would require FR=0 mode which isn't supported by MSA. 2790MachineBasicBlock * MipsSETargetLowering:: 2791emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ 2792 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2793 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2794 DebugLoc DL = MI->getDebugLoc(); 2795 unsigned Fd = MI->getOperand(0).getReg(); 2796 unsigned Ws = MI->getOperand(1).getReg(); 2797 unsigned Lane = MI->getOperand(2).getImm(); 2798 2799 if (Lane == 0) 2800 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); 2801 else { 2802 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2803 2804 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 2805 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 2806 } 2807 2808 MI->eraseFromParent(); // The pseudo instruction is gone now. 2809 return BB; 2810} 2811 2812// Emit the COPY_FD pseudo instruction. 2813// 2814// copy_fd_pseudo $fd, $ws, n 2815// => 2816// splati.d $wt, $ws, $n 2817// copy $fd, $wt:sub_64 2818// 2819// When n is zero, the equivalent operation can be performed with (potentially) 2820// zero instructions due to register overlaps. This optimization is always 2821// valid because FR=1 mode which is the only supported mode in MSA. 2822MachineBasicBlock * MipsSETargetLowering:: 2823emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ 2824 assert(Subtarget->isFP64bit()); 2825 2826 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2827 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2828 unsigned Fd = MI->getOperand(0).getReg(); 2829 unsigned Ws = MI->getOperand(1).getReg(); 2830 unsigned Lane = MI->getOperand(2).getImm() * 2; 2831 DebugLoc DL = MI->getDebugLoc(); 2832 2833 if (Lane == 0) 2834 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 2835 else { 2836 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2837 2838 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 2839 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 2840 } 2841 2842 MI->eraseFromParent(); // The pseudo instruction is gone now. 2843 return BB; 2844} 2845 2846// Emit the INSERT_FW pseudo instruction. 2847// 2848// insert_fw_pseudo $wd, $wd_in, $n, $fs 2849// => 2850// subreg_to_reg $wt:sub_lo, $fs 2851// insve_w $wd[$n], $wd_in, $wt[0] 2852MachineBasicBlock * 2853MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, 2854 MachineBasicBlock *BB) const { 2855 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2856 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2857 DebugLoc DL = MI->getDebugLoc(); 2858 unsigned Wd = MI->getOperand(0).getReg(); 2859 unsigned Wd_in = MI->getOperand(1).getReg(); 2860 unsigned Lane = MI->getOperand(2).getImm(); 2861 unsigned Fs = MI->getOperand(3).getReg(); 2862 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 2863 2864 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2865 .addImm(0) 2866 .addReg(Fs) 2867 .addImm(Mips::sub_lo); 2868 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 2869 .addReg(Wd_in) 2870 .addImm(Lane) 2871 .addReg(Wt) 2872 .addImm(0); 2873 2874 MI->eraseFromParent(); // The pseudo instruction is gone now. 2875 return BB; 2876} 2877 2878// Emit the INSERT_FD pseudo instruction. 2879// 2880// insert_fd_pseudo $wd, $fs, n 2881// => 2882// subreg_to_reg $wt:sub_64, $fs 2883// insve_d $wd[$n], $wd_in, $wt[0] 2884MachineBasicBlock * 2885MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, 2886 MachineBasicBlock *BB) const { 2887 assert(Subtarget->isFP64bit()); 2888 2889 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2890 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2891 DebugLoc DL = MI->getDebugLoc(); 2892 unsigned Wd = MI->getOperand(0).getReg(); 2893 unsigned Wd_in = MI->getOperand(1).getReg(); 2894 unsigned Lane = MI->getOperand(2).getImm(); 2895 unsigned Fs = MI->getOperand(3).getReg(); 2896 unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 2897 2898 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2899 .addImm(0) 2900 .addReg(Fs) 2901 .addImm(Mips::sub_64); 2902 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 2903 .addReg(Wd_in) 2904 .addImm(Lane) 2905 .addReg(Wt) 2906 .addImm(0); 2907 2908 MI->eraseFromParent(); // The pseudo instruction is gone now. 2909 return BB; 2910} 2911 2912// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 2913// 2914// For integer: 2915// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 2916// => 2917// (SLL $lanetmp1, $lane, <log2size) 2918// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 2919// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 2920// (NEG $lanetmp2, $lanetmp1) 2921// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 2922// 2923// For floating point: 2924// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 2925// => 2926// (SUBREG_TO_REG $wt, $fs, <subreg>) 2927// (SLL $lanetmp1, $lane, <log2size) 2928// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 2929// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 2930// (NEG $lanetmp2, $lanetmp1) 2931// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 2932MachineBasicBlock * 2933MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI, 2934 MachineBasicBlock *BB, 2935 unsigned EltSizeInBytes, 2936 bool IsFP) const { 2937 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2938 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 2939 DebugLoc DL = MI->getDebugLoc(); 2940 unsigned Wd = MI->getOperand(0).getReg(); 2941 unsigned SrcVecReg = MI->getOperand(1).getReg(); 2942 unsigned LaneReg = MI->getOperand(2).getReg(); 2943 unsigned SrcValReg = MI->getOperand(3).getReg(); 2944 2945 const TargetRegisterClass *VecRC = nullptr; 2946 const TargetRegisterClass *GPRRC = isGP64bit() ? &Mips::GPR64RegClass 2947 : &Mips::GPR32RegClass; 2948 unsigned EltLog2Size; 2949 unsigned InsertOp = 0; 2950 unsigned InsveOp = 0; 2951 switch (EltSizeInBytes) { 2952 default: 2953 llvm_unreachable("Unexpected size"); 2954 case 1: 2955 EltLog2Size = 0; 2956 InsertOp = Mips::INSERT_B; 2957 InsveOp = Mips::INSVE_B; 2958 VecRC = &Mips::MSA128BRegClass; 2959 break; 2960 case 2: 2961 EltLog2Size = 1; 2962 InsertOp = Mips::INSERT_H; 2963 InsveOp = Mips::INSVE_H; 2964 VecRC = &Mips::MSA128HRegClass; 2965 break; 2966 case 4: 2967 EltLog2Size = 2; 2968 InsertOp = Mips::INSERT_W; 2969 InsveOp = Mips::INSVE_W; 2970 VecRC = &Mips::MSA128WRegClass; 2971 break; 2972 case 8: 2973 EltLog2Size = 3; 2974 InsertOp = Mips::INSERT_D; 2975 InsveOp = Mips::INSVE_D; 2976 VecRC = &Mips::MSA128DRegClass; 2977 break; 2978 } 2979 2980 if (IsFP) { 2981 unsigned Wt = RegInfo.createVirtualRegister(VecRC); 2982 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 2983 .addImm(0) 2984 .addReg(SrcValReg) 2985 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 2986 SrcValReg = Wt; 2987 } 2988 2989 // Convert the lane index into a byte index 2990 if (EltSizeInBytes != 1) { 2991 unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 2992 BuildMI(*BB, MI, DL, TII->get(Mips::SLL), LaneTmp1) 2993 .addReg(LaneReg) 2994 .addImm(EltLog2Size); 2995 LaneReg = LaneTmp1; 2996 } 2997 2998 // Rotate bytes around so that the desired lane is element zero 2999 unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3000 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3001 .addReg(SrcVecReg) 3002 .addReg(SrcVecReg) 3003 .addReg(LaneReg); 3004 3005 unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3006 if (IsFP) { 3007 // Use insve.df to insert to element zero 3008 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3009 .addReg(WdTmp1) 3010 .addImm(0) 3011 .addReg(SrcValReg) 3012 .addImm(0); 3013 } else { 3014 // Use insert.df to insert to element zero 3015 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3016 .addReg(WdTmp1) 3017 .addReg(SrcValReg) 3018 .addImm(0); 3019 } 3020 3021 // Rotate elements the rest of the way for a full rotation. 3022 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3023 // the lane index to do this. 3024 unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3025 BuildMI(*BB, MI, DL, TII->get(Mips::SUB), LaneTmp2) 3026 .addReg(Mips::ZERO) 3027 .addReg(LaneReg); 3028 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3029 .addReg(WdTmp2) 3030 .addReg(WdTmp2) 3031 .addReg(LaneTmp2); 3032 3033 MI->eraseFromParent(); // The pseudo instruction is gone now. 3034 return BB; 3035} 3036 3037// Emit the FILL_FW pseudo instruction. 3038// 3039// fill_fw_pseudo $wd, $fs 3040// => 3041// implicit_def $wt1 3042// insert_subreg $wt2:subreg_lo, $wt1, $fs 3043// splati.w $wd, $wt2[0] 3044MachineBasicBlock * 3045MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, 3046 MachineBasicBlock *BB) const { 3047 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3048 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3049 DebugLoc DL = MI->getDebugLoc(); 3050 unsigned Wd = MI->getOperand(0).getReg(); 3051 unsigned Fs = MI->getOperand(1).getReg(); 3052 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3053 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3054 3055 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3056 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3057 .addReg(Wt1) 3058 .addReg(Fs) 3059 .addImm(Mips::sub_lo); 3060 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3061 3062 MI->eraseFromParent(); // The pseudo instruction is gone now. 3063 return BB; 3064} 3065 3066// Emit the FILL_FD pseudo instruction. 3067// 3068// fill_fd_pseudo $wd, $fs 3069// => 3070// implicit_def $wt1 3071// insert_subreg $wt2:subreg_64, $wt1, $fs 3072// splati.d $wd, $wt2[0] 3073MachineBasicBlock * 3074MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, 3075 MachineBasicBlock *BB) const { 3076 assert(Subtarget->isFP64bit()); 3077 3078 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3079 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3080 DebugLoc DL = MI->getDebugLoc(); 3081 unsigned Wd = MI->getOperand(0).getReg(); 3082 unsigned Fs = MI->getOperand(1).getReg(); 3083 unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3084 unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3085 3086 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3087 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3088 .addReg(Wt1) 3089 .addReg(Fs) 3090 .addImm(Mips::sub_64); 3091 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3092 3093 MI->eraseFromParent(); // The pseudo instruction is gone now. 3094 return BB; 3095} 3096 3097// Emit the FEXP2_W_1 pseudo instructions. 3098// 3099// fexp2_w_1_pseudo $wd, $wt 3100// => 3101// ldi.w $ws, 1 3102// fexp2.w $wd, $ws, $wt 3103MachineBasicBlock * 3104MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, 3105 MachineBasicBlock *BB) const { 3106 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3107 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3108 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3109 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3110 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3111 DebugLoc DL = MI->getDebugLoc(); 3112 3113 // Splat 1.0 into a vector 3114 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3115 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3116 3117 // Emit 1.0 * fexp2(Wt) 3118 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg()) 3119 .addReg(Ws2) 3120 .addReg(MI->getOperand(1).getReg()); 3121 3122 MI->eraseFromParent(); // The pseudo instruction is gone now. 3123 return BB; 3124} 3125 3126// Emit the FEXP2_D_1 pseudo instructions. 3127// 3128// fexp2_d_1_pseudo $wd, $wt 3129// => 3130// ldi.d $ws, 1 3131// fexp2.d $wd, $ws, $wt 3132MachineBasicBlock * 3133MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI, 3134 MachineBasicBlock *BB) const { 3135 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3136 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3137 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3138 unsigned Ws1 = RegInfo.createVirtualRegister(RC); 3139 unsigned Ws2 = RegInfo.createVirtualRegister(RC); 3140 DebugLoc DL = MI->getDebugLoc(); 3141 3142 // Splat 1.0 into a vector 3143 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3144 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3145 3146 // Emit 1.0 * fexp2(Wt) 3147 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg()) 3148 .addReg(Ws2) 3149 .addReg(MI->getOperand(1).getReg()); 3150 3151 MI->eraseFromParent(); // The pseudo instruction is gone now. 3152 return BB; 3153} 3154