MipsSEISelLowering.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Subclass of MipsTargetLowering specialized for mips32/64.
11//
12//===----------------------------------------------------------------------===//
13#define DEBUG_TYPE "mips-isel"
14#include "MipsSEISelLowering.h"
15#include "MipsRegisterInfo.h"
16#include "MipsTargetMachine.h"
17#include "llvm/CodeGen/MachineInstrBuilder.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19#include "llvm/IR/Intrinsics.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/Target/TargetInstrInfo.h"
24
25using namespace llvm;
26
27static cl::opt<bool>
28EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
29                    cl::desc("MIPS: Enable tail calls."), cl::init(false));
30
31static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
32                                   cl::desc("Expand double precision loads and "
33                                            "stores to their single precision "
34                                            "counterparts"));
35
36MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
37  : MipsTargetLowering(TM) {
38  // Set up the register classes
39  addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
40
41  if (isGP64bit())
42    addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
43
44  if (Subtarget->hasDSP() || Subtarget->hasMSA()) {
45    // Expand all truncating stores and extending loads.
46    unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
47    unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
48
49    for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) {
50      for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1)
51        setTruncStoreAction((MVT::SimpleValueType)VT0,
52                            (MVT::SimpleValueType)VT1, Expand);
53
54      setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
55      setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
56      setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand);
57    }
58  }
59
60  if (Subtarget->hasDSP()) {
61    MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
62
63    for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
64      addRegisterClass(VecTys[i], &Mips::DSPRRegClass);
65
66      // Expand all builtin opcodes.
67      for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
68        setOperationAction(Opc, VecTys[i], Expand);
69
70      setOperationAction(ISD::ADD, VecTys[i], Legal);
71      setOperationAction(ISD::SUB, VecTys[i], Legal);
72      setOperationAction(ISD::LOAD, VecTys[i], Legal);
73      setOperationAction(ISD::STORE, VecTys[i], Legal);
74      setOperationAction(ISD::BITCAST, VecTys[i], Legal);
75    }
76
77    setTargetDAGCombine(ISD::SHL);
78    setTargetDAGCombine(ISD::SRA);
79    setTargetDAGCombine(ISD::SRL);
80    setTargetDAGCombine(ISD::SETCC);
81    setTargetDAGCombine(ISD::VSELECT);
82  }
83
84  if (Subtarget->hasDSPR2())
85    setOperationAction(ISD::MUL, MVT::v2i16, Legal);
86
87  if (Subtarget->hasMSA()) {
88    addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
89    addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
90    addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
91    addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
92    addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
93    addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
94    addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
95
96    setTargetDAGCombine(ISD::AND);
97    setTargetDAGCombine(ISD::OR);
98    setTargetDAGCombine(ISD::SRA);
99    setTargetDAGCombine(ISD::VSELECT);
100    setTargetDAGCombine(ISD::XOR);
101  }
102
103  if (!Subtarget->mipsSEUsesSoftFloat()) {
104    addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
105
106    // When dealing with single precision only, use libcalls
107    if (!Subtarget->isSingleFloat()) {
108      if (Subtarget->isFP64bit())
109        addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
110      else
111        addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
112    }
113  }
114
115  setOperationAction(ISD::SMUL_LOHI,          MVT::i32, Custom);
116  setOperationAction(ISD::UMUL_LOHI,          MVT::i32, Custom);
117  setOperationAction(ISD::MULHS,              MVT::i32, Custom);
118  setOperationAction(ISD::MULHU,              MVT::i32, Custom);
119
120  if (Subtarget->hasCnMips())
121    setOperationAction(ISD::MUL,              MVT::i64, Legal);
122  else if (hasMips64())
123    setOperationAction(ISD::MUL,              MVT::i64, Custom);
124
125  if (hasMips64()) {
126    setOperationAction(ISD::MULHS,            MVT::i64, Custom);
127    setOperationAction(ISD::MULHU,            MVT::i64, Custom);
128  }
129
130  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
131  setOperationAction(ISD::INTRINSIC_W_CHAIN,  MVT::i64, Custom);
132
133  setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
134  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
135  setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
136  setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
137  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
138  setOperationAction(ISD::LOAD,               MVT::i32, Custom);
139  setOperationAction(ISD::STORE,              MVT::i32, Custom);
140
141  setTargetDAGCombine(ISD::ADDE);
142  setTargetDAGCombine(ISD::SUBE);
143  setTargetDAGCombine(ISD::MUL);
144
145  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
146  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
147  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
148
149  if (NoDPLoadStore) {
150    setOperationAction(ISD::LOAD, MVT::f64, Custom);
151    setOperationAction(ISD::STORE, MVT::f64, Custom);
152  }
153
154  computeRegisterProperties();
155}
156
157const MipsTargetLowering *
158llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
159  return new MipsSETargetLowering(TM);
160}
161
162// Enable MSA support for the given integer type and Register class.
163void MipsSETargetLowering::
164addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
165  addRegisterClass(Ty, RC);
166
167  // Expand all builtin opcodes.
168  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
169    setOperationAction(Opc, Ty, Expand);
170
171  setOperationAction(ISD::BITCAST, Ty, Legal);
172  setOperationAction(ISD::LOAD, Ty, Legal);
173  setOperationAction(ISD::STORE, Ty, Legal);
174  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
175  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
176  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
177
178  setOperationAction(ISD::ADD, Ty, Legal);
179  setOperationAction(ISD::AND, Ty, Legal);
180  setOperationAction(ISD::CTLZ, Ty, Legal);
181  setOperationAction(ISD::CTPOP, Ty, Legal);
182  setOperationAction(ISD::MUL, Ty, Legal);
183  setOperationAction(ISD::OR, Ty, Legal);
184  setOperationAction(ISD::SDIV, Ty, Legal);
185  setOperationAction(ISD::SREM, Ty, Legal);
186  setOperationAction(ISD::SHL, Ty, Legal);
187  setOperationAction(ISD::SRA, Ty, Legal);
188  setOperationAction(ISD::SRL, Ty, Legal);
189  setOperationAction(ISD::SUB, Ty, Legal);
190  setOperationAction(ISD::UDIV, Ty, Legal);
191  setOperationAction(ISD::UREM, Ty, Legal);
192  setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
193  setOperationAction(ISD::VSELECT, Ty, Legal);
194  setOperationAction(ISD::XOR, Ty, Legal);
195
196  if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
197    setOperationAction(ISD::FP_TO_SINT, Ty, Legal);
198    setOperationAction(ISD::FP_TO_UINT, Ty, Legal);
199    setOperationAction(ISD::SINT_TO_FP, Ty, Legal);
200    setOperationAction(ISD::UINT_TO_FP, Ty, Legal);
201  }
202
203  setOperationAction(ISD::SETCC, Ty, Legal);
204  setCondCodeAction(ISD::SETNE, Ty, Expand);
205  setCondCodeAction(ISD::SETGE, Ty, Expand);
206  setCondCodeAction(ISD::SETGT, Ty, Expand);
207  setCondCodeAction(ISD::SETUGE, Ty, Expand);
208  setCondCodeAction(ISD::SETUGT, Ty, Expand);
209}
210
211// Enable MSA support for the given floating-point type and Register class.
212void MipsSETargetLowering::
213addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
214  addRegisterClass(Ty, RC);
215
216  // Expand all builtin opcodes.
217  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
218    setOperationAction(Opc, Ty, Expand);
219
220  setOperationAction(ISD::LOAD, Ty, Legal);
221  setOperationAction(ISD::STORE, Ty, Legal);
222  setOperationAction(ISD::BITCAST, Ty, Legal);
223  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal);
224  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
225  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
226
227  if (Ty != MVT::v8f16) {
228    setOperationAction(ISD::FABS,  Ty, Legal);
229    setOperationAction(ISD::FADD,  Ty, Legal);
230    setOperationAction(ISD::FDIV,  Ty, Legal);
231    setOperationAction(ISD::FEXP2, Ty, Legal);
232    setOperationAction(ISD::FLOG2, Ty, Legal);
233    setOperationAction(ISD::FMA,   Ty, Legal);
234    setOperationAction(ISD::FMUL,  Ty, Legal);
235    setOperationAction(ISD::FRINT, Ty, Legal);
236    setOperationAction(ISD::FSQRT, Ty, Legal);
237    setOperationAction(ISD::FSUB,  Ty, Legal);
238    setOperationAction(ISD::VSELECT, Ty, Legal);
239
240    setOperationAction(ISD::SETCC, Ty, Legal);
241    setCondCodeAction(ISD::SETOGE, Ty, Expand);
242    setCondCodeAction(ISD::SETOGT, Ty, Expand);
243    setCondCodeAction(ISD::SETUGE, Ty, Expand);
244    setCondCodeAction(ISD::SETUGT, Ty, Expand);
245    setCondCodeAction(ISD::SETGE,  Ty, Expand);
246    setCondCodeAction(ISD::SETGT,  Ty, Expand);
247  }
248}
249
250bool
251MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
252                                                    unsigned,
253                                                    bool *Fast) const {
254  MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
255
256  switch (SVT) {
257  case MVT::i64:
258  case MVT::i32:
259    if (Fast)
260      *Fast = true;
261    return true;
262  default:
263    return false;
264  }
265}
266
267SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
268                                             SelectionDAG &DAG) const {
269  switch(Op.getOpcode()) {
270  case ISD::LOAD:  return lowerLOAD(Op, DAG);
271  case ISD::STORE: return lowerSTORE(Op, DAG);
272  case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
273  case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
274  case ISD::MULHS:     return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
275  case ISD::MULHU:     return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
276  case ISD::MUL:       return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
277  case ISD::SDIVREM:   return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
278  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
279                                          DAG);
280  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
281  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
282  case ISD::INTRINSIC_VOID:     return lowerINTRINSIC_VOID(Op, DAG);
283  case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
284  case ISD::BUILD_VECTOR:       return lowerBUILD_VECTOR(Op, DAG);
285  case ISD::VECTOR_SHUFFLE:     return lowerVECTOR_SHUFFLE(Op, DAG);
286  }
287
288  return MipsTargetLowering::LowerOperation(Op, DAG);
289}
290
291// selectMADD -
292// Transforms a subgraph in CurDAG if the following pattern is found:
293//  (addc multLo, Lo0), (adde multHi, Hi0),
294// where,
295//  multHi/Lo: product of multiplication
296//  Lo0: initial value of Lo register
297//  Hi0: initial value of Hi register
298// Return true if pattern matching was successful.
299static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
300  // ADDENode's second operand must be a flag output of an ADDC node in order
301  // for the matching to be successful.
302  SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
303
304  if (ADDCNode->getOpcode() != ISD::ADDC)
305    return false;
306
307  SDValue MultHi = ADDENode->getOperand(0);
308  SDValue MultLo = ADDCNode->getOperand(0);
309  SDNode *MultNode = MultHi.getNode();
310  unsigned MultOpc = MultHi.getOpcode();
311
312  // MultHi and MultLo must be generated by the same node,
313  if (MultLo.getNode() != MultNode)
314    return false;
315
316  // and it must be a multiplication.
317  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
318    return false;
319
320  // MultLo amd MultHi must be the first and second output of MultNode
321  // respectively.
322  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
323    return false;
324
325  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
326  // of the values of MultNode, in which case MultNode will be removed in later
327  // phases.
328  // If there exist users other than ADDENode or ADDCNode, this function returns
329  // here, which will result in MultNode being mapped to a single MULT
330  // instruction node rather than a pair of MULT and MADD instructions being
331  // produced.
332  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
333    return false;
334
335  SDLoc DL(ADDENode);
336
337  // Initialize accumulator.
338  SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
339                                  ADDCNode->getOperand(1),
340                                  ADDENode->getOperand(1));
341
342  // create MipsMAdd(u) node
343  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
344
345  SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
346                                 MultNode->getOperand(0),// Factor 0
347                                 MultNode->getOperand(1),// Factor 1
348                                 ACCIn);
349
350  // replace uses of adde and addc here
351  if (!SDValue(ADDCNode, 0).use_empty()) {
352    SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
353    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
354  }
355  if (!SDValue(ADDENode, 0).use_empty()) {
356    SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
357    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
358  }
359
360  return true;
361}
362
363// selectMSUB -
364// Transforms a subgraph in CurDAG if the following pattern is found:
365//  (addc Lo0, multLo), (sube Hi0, multHi),
366// where,
367//  multHi/Lo: product of multiplication
368//  Lo0: initial value of Lo register
369//  Hi0: initial value of Hi register
370// Return true if pattern matching was successful.
371static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
372  // SUBENode's second operand must be a flag output of an SUBC node in order
373  // for the matching to be successful.
374  SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
375
376  if (SUBCNode->getOpcode() != ISD::SUBC)
377    return false;
378
379  SDValue MultHi = SUBENode->getOperand(1);
380  SDValue MultLo = SUBCNode->getOperand(1);
381  SDNode *MultNode = MultHi.getNode();
382  unsigned MultOpc = MultHi.getOpcode();
383
384  // MultHi and MultLo must be generated by the same node,
385  if (MultLo.getNode() != MultNode)
386    return false;
387
388  // and it must be a multiplication.
389  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
390    return false;
391
392  // MultLo amd MultHi must be the first and second output of MultNode
393  // respectively.
394  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
395    return false;
396
397  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
398  // of the values of MultNode, in which case MultNode will be removed in later
399  // phases.
400  // If there exist users other than SUBENode or SUBCNode, this function returns
401  // here, which will result in MultNode being mapped to a single MULT
402  // instruction node rather than a pair of MULT and MSUB instructions being
403  // produced.
404  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
405    return false;
406
407  SDLoc DL(SUBENode);
408
409  // Initialize accumulator.
410  SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
411                                  SUBCNode->getOperand(0),
412                                  SUBENode->getOperand(0));
413
414  // create MipsSub(u) node
415  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
416
417  SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
418                                 MultNode->getOperand(0),// Factor 0
419                                 MultNode->getOperand(1),// Factor 1
420                                 ACCIn);
421
422  // replace uses of sube and subc here
423  if (!SDValue(SUBCNode, 0).use_empty()) {
424    SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
425    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
426  }
427  if (!SDValue(SUBENode, 0).use_empty()) {
428    SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
429    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
430  }
431
432  return true;
433}
434
435static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
436                                  TargetLowering::DAGCombinerInfo &DCI,
437                                  const MipsSubtarget *Subtarget) {
438  if (DCI.isBeforeLegalize())
439    return SDValue();
440
441  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
442      selectMADD(N, &DAG))
443    return SDValue(N, 0);
444
445  return SDValue();
446}
447
448// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
449//
450// Performs the following transformations:
451// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
452//   sign/zero-extension is completely overwritten by the new one performed by
453//   the ISD::AND.
454// - Removes redundant zero extensions performed by an ISD::AND.
455static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
456                                 TargetLowering::DAGCombinerInfo &DCI,
457                                 const MipsSubtarget *Subtarget) {
458  if (!Subtarget->hasMSA())
459    return SDValue();
460
461  SDValue Op0 = N->getOperand(0);
462  SDValue Op1 = N->getOperand(1);
463  unsigned Op0Opcode = Op0->getOpcode();
464
465  // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
466  // where $d + 1 == 2^n and n == 32
467  // or    $d + 1 == 2^n and n <= 32 and ZExt
468  // -> (MipsVExtractZExt $a, $b, $c)
469  if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
470      Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
471    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
472
473    if (!Mask)
474      return SDValue();
475
476    int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
477
478    if (Log2IfPositive <= 0)
479      return SDValue(); // Mask+1 is not a power of 2
480
481    SDValue Op0Op2 = Op0->getOperand(2);
482    EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
483    unsigned ExtendTySize = ExtendTy.getSizeInBits();
484    unsigned Log2 = Log2IfPositive;
485
486    if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
487        Log2 == ExtendTySize) {
488      SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
489      DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT,
490                      Op0->getVTList(), Ops, Op0->getNumOperands());
491      return Op0;
492    }
493  }
494
495  return SDValue();
496}
497
498// Determine if the specified node is a constant vector splat.
499//
500// Returns true and sets Imm if:
501// * N is a ISD::BUILD_VECTOR representing a constant splat
502//
503// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
504// differences are that it assumes the MSA has already been checked and the
505// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
506// must not be in order for binsri.d to be selectable).
507static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
508  BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
509
510  if (Node == NULL)
511    return false;
512
513  APInt SplatValue, SplatUndef;
514  unsigned SplatBitSize;
515  bool HasAnyUndefs;
516
517  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
518                             8, !IsLittleEndian))
519    return false;
520
521  Imm = SplatValue;
522
523  return true;
524}
525
526// Test whether the given node is an all-ones build_vector.
527static bool isVectorAllOnes(SDValue N) {
528  // Look through bitcasts. Endianness doesn't matter because we are looking
529  // for an all-ones value.
530  if (N->getOpcode() == ISD::BITCAST)
531    N = N->getOperand(0);
532
533  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
534
535  if (!BVN)
536    return false;
537
538  APInt SplatValue, SplatUndef;
539  unsigned SplatBitSize;
540  bool HasAnyUndefs;
541
542  // Endianness doesn't matter in this context because we are looking for
543  // an all-ones value.
544  if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
545    return SplatValue.isAllOnesValue();
546
547  return false;
548}
549
550// Test whether N is the bitwise inverse of OfNode.
551static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
552  if (N->getOpcode() != ISD::XOR)
553    return false;
554
555  if (isVectorAllOnes(N->getOperand(0)))
556    return N->getOperand(1) == OfNode;
557
558  if (isVectorAllOnes(N->getOperand(1)))
559    return N->getOperand(0) == OfNode;
560
561  return false;
562}
563
564// Perform combines where ISD::OR is the root node.
565//
566// Performs the following transformations:
567// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
568//   where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
569//   vector type.
570static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
571                                TargetLowering::DAGCombinerInfo &DCI,
572                                const MipsSubtarget *Subtarget) {
573  if (!Subtarget->hasMSA())
574    return SDValue();
575
576  EVT Ty = N->getValueType(0);
577
578  if (!Ty.is128BitVector())
579    return SDValue();
580
581  SDValue Op0 = N->getOperand(0);
582  SDValue Op1 = N->getOperand(1);
583
584  if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
585    SDValue Op0Op0 = Op0->getOperand(0);
586    SDValue Op0Op1 = Op0->getOperand(1);
587    SDValue Op1Op0 = Op1->getOperand(0);
588    SDValue Op1Op1 = Op1->getOperand(1);
589    bool IsLittleEndian = !Subtarget->isLittle();
590
591    SDValue IfSet, IfClr, Cond;
592    bool IsConstantMask = false;
593    APInt Mask, InvMask;
594
595    // If Op0Op0 is an appropriate mask, try to find it's inverse in either
596    // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
597    // looking.
598    // IfClr will be set if we find a valid match.
599    if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
600      Cond = Op0Op0;
601      IfSet = Op0Op1;
602
603      if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
604          Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
605        IfClr = Op1Op1;
606      else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
607               Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
608        IfClr = Op1Op0;
609
610      IsConstantMask = true;
611    }
612
613    // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
614    // thing again using this mask.
615    // IfClr will be set if we find a valid match.
616    if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
617      Cond = Op0Op1;
618      IfSet = Op0Op0;
619
620      if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
621          Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
622        IfClr = Op1Op1;
623      else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
624               Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
625        IfClr = Op1Op0;
626
627      IsConstantMask = true;
628    }
629
630    // If IfClr is not yet set, try looking for a non-constant match.
631    // IfClr will be set if we find a valid match amongst the eight
632    // possibilities.
633    if (!IfClr.getNode()) {
634      if (isBitwiseInverse(Op0Op0, Op1Op0)) {
635        Cond = Op1Op0;
636        IfSet = Op1Op1;
637        IfClr = Op0Op1;
638      } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
639        Cond = Op1Op0;
640        IfSet = Op1Op1;
641        IfClr = Op0Op0;
642      } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
643        Cond = Op1Op1;
644        IfSet = Op1Op0;
645        IfClr = Op0Op1;
646      } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
647        Cond = Op1Op1;
648        IfSet = Op1Op0;
649        IfClr = Op0Op0;
650      } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
651        Cond = Op0Op0;
652        IfSet = Op0Op1;
653        IfClr = Op1Op1;
654      } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
655        Cond = Op0Op0;
656        IfSet = Op0Op1;
657        IfClr = Op1Op0;
658      } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
659        Cond = Op0Op1;
660        IfSet = Op0Op0;
661        IfClr = Op1Op1;
662      } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
663        Cond = Op0Op1;
664        IfSet = Op0Op0;
665        IfClr = Op1Op0;
666      }
667    }
668
669    // At this point, IfClr will be set if we have a valid match.
670    if (!IfClr.getNode())
671      return SDValue();
672
673    assert(Cond.getNode() && IfSet.getNode());
674
675    // Fold degenerate cases.
676    if (IsConstantMask) {
677      if (Mask.isAllOnesValue())
678        return IfSet;
679      else if (Mask == 0)
680        return IfClr;
681    }
682
683    // Transform the DAG into an equivalent VSELECT.
684    return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
685  }
686
687  return SDValue();
688}
689
690static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
691                                  TargetLowering::DAGCombinerInfo &DCI,
692                                  const MipsSubtarget *Subtarget) {
693  if (DCI.isBeforeLegalize())
694    return SDValue();
695
696  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
697      selectMSUB(N, &DAG))
698    return SDValue(N, 0);
699
700  return SDValue();
701}
702
703static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT,
704                            EVT ShiftTy, SelectionDAG &DAG) {
705  // Clear the upper (64 - VT.sizeInBits) bits.
706  C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits());
707
708  // Return 0.
709  if (C == 0)
710    return DAG.getConstant(0, VT);
711
712  // Return x.
713  if (C == 1)
714    return X;
715
716  // If c is power of 2, return (shl x, log2(c)).
717  if (isPowerOf2_64(C))
718    return DAG.getNode(ISD::SHL, DL, VT, X,
719                       DAG.getConstant(Log2_64(C), ShiftTy));
720
721  unsigned Log2Ceil = Log2_64_Ceil(C);
722  uint64_t Floor = 1LL << Log2_64(C);
723  uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil;
724
725  // If |c - floor_c| <= |c - ceil_c|,
726  // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
727  // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
728  if (C - Floor <= Ceil - C) {
729    SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
730    SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
731    return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
732  }
733
734  // If |c - floor_c| > |c - ceil_c|,
735  // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
736  SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
737  SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
738  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
739}
740
741static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
742                                 const TargetLowering::DAGCombinerInfo &DCI,
743                                 const MipsSETargetLowering *TL) {
744  EVT VT = N->getValueType(0);
745
746  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
747    if (!VT.isVector())
748      return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N),
749                          VT, TL->getScalarShiftAmountTy(VT), DAG);
750
751  return SDValue(N, 0);
752}
753
754static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
755                                      SelectionDAG &DAG,
756                                      const MipsSubtarget *Subtarget) {
757  // See if this is a vector splat immediate node.
758  APInt SplatValue, SplatUndef;
759  unsigned SplatBitSize;
760  bool HasAnyUndefs;
761  unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
762  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
763
764  if (!Subtarget->hasDSP())
765    return SDValue();
766
767  if (!BV ||
768      !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
769                           EltSize, !Subtarget->isLittle()) ||
770      (SplatBitSize != EltSize) ||
771      (SplatValue.getZExtValue() >= EltSize))
772    return SDValue();
773
774  return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0),
775                     DAG.getConstant(SplatValue.getZExtValue(), MVT::i32));
776}
777
778static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
779                                 TargetLowering::DAGCombinerInfo &DCI,
780                                 const MipsSubtarget *Subtarget) {
781  EVT Ty = N->getValueType(0);
782
783  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
784    return SDValue();
785
786  return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
787}
788
789// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
790// constant splats into MipsISD::SHRA_DSP for DSPr2.
791//
792// Performs the following transformations:
793// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
794//   sign/zero-extension is completely overwritten by the new one performed by
795//   the ISD::SRA and ISD::SHL nodes.
796// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
797//   sequence.
798//
799// See performDSPShiftCombine for more information about the transformation
800// used for DSPr2.
801static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
802                                 TargetLowering::DAGCombinerInfo &DCI,
803                                 const MipsSubtarget *Subtarget) {
804  EVT Ty = N->getValueType(0);
805
806  if (Subtarget->hasMSA()) {
807    SDValue Op0 = N->getOperand(0);
808    SDValue Op1 = N->getOperand(1);
809
810    // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
811    // where $d + sizeof($c) == 32
812    // or    $d + sizeof($c) <= 32 and SExt
813    // -> (MipsVExtractSExt $a, $b, $c)
814    if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
815      SDValue Op0Op0 = Op0->getOperand(0);
816      ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
817
818      if (!ShAmount)
819        return SDValue();
820
821      if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
822          Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
823        return SDValue();
824
825      EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
826      unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
827
828      if (TotalBits == 32 ||
829          (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
830           TotalBits <= 32)) {
831        SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
832                          Op0Op0->getOperand(2) };
833        DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT,
834                        Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands());
835        return Op0Op0;
836      }
837    }
838  }
839
840  if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
841    return SDValue();
842
843  return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
844}
845
846
847static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
848                                 TargetLowering::DAGCombinerInfo &DCI,
849                                 const MipsSubtarget *Subtarget) {
850  EVT Ty = N->getValueType(0);
851
852  if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8))
853    return SDValue();
854
855  return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
856}
857
858static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
859  bool IsV216 = (Ty == MVT::v2i16);
860
861  switch (CC) {
862  case ISD::SETEQ:
863  case ISD::SETNE:  return true;
864  case ISD::SETLT:
865  case ISD::SETLE:
866  case ISD::SETGT:
867  case ISD::SETGE:  return IsV216;
868  case ISD::SETULT:
869  case ISD::SETULE:
870  case ISD::SETUGT:
871  case ISD::SETUGE: return !IsV216;
872  default:          return false;
873  }
874}
875
876static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
877  EVT Ty = N->getValueType(0);
878
879  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
880    return SDValue();
881
882  if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
883    return SDValue();
884
885  return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
886                     N->getOperand(1), N->getOperand(2));
887}
888
889static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
890  EVT Ty = N->getValueType(0);
891
892  if (Ty.is128BitVector() && Ty.isInteger()) {
893    // Try the following combines:
894    //   (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b)
895    //   (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b)
896    //   (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b)
897    //   (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b)
898    //   (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b)
899    //   (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b)
900    //   (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b)
901    //   (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b)
902    // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but
903    // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the
904    // legalizer.
905    SDValue Op0 = N->getOperand(0);
906
907    if (Op0->getOpcode() != ISD::SETCC)
908      return SDValue();
909
910    ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get();
911    bool Signed;
912
913    if (CondCode == ISD::SETLT  || CondCode == ISD::SETLE)
914      Signed = true;
915    else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE)
916      Signed = false;
917    else
918      return SDValue();
919
920    SDValue Op1 = N->getOperand(1);
921    SDValue Op2 = N->getOperand(2);
922    SDValue Op0Op0 = Op0->getOperand(0);
923    SDValue Op0Op1 = Op0->getOperand(1);
924
925    if (Op1 == Op0Op0 && Op2 == Op0Op1)
926      return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N),
927                         Ty, Op1, Op2);
928    else if (Op1 == Op0Op1 && Op2 == Op0Op0)
929      return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N),
930                         Ty, Op1, Op2);
931  } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) {
932    SDValue SetCC = N->getOperand(0);
933
934    if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
935      return SDValue();
936
937    return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
938                       SetCC.getOperand(0), SetCC.getOperand(1),
939                       N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
940  }
941
942  return SDValue();
943}
944
945static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
946                                 const MipsSubtarget *Subtarget) {
947  EVT Ty = N->getValueType(0);
948
949  if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
950    // Try the following combines:
951    //   (xor (or $a, $b), (build_vector allones))
952    //   (xor (or $a, $b), (bitcast (build_vector allones)))
953    SDValue Op0 = N->getOperand(0);
954    SDValue Op1 = N->getOperand(1);
955    SDValue NotOp;
956
957    if (ISD::isBuildVectorAllOnes(Op0.getNode()))
958      NotOp = Op1;
959    else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
960      NotOp = Op0;
961    else
962      return SDValue();
963
964    if (NotOp->getOpcode() == ISD::OR)
965      return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
966                         NotOp->getOperand(1));
967  }
968
969  return SDValue();
970}
971
972SDValue
973MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
974  SelectionDAG &DAG = DCI.DAG;
975  SDValue Val;
976
977  switch (N->getOpcode()) {
978  case ISD::ADDE:
979    return performADDECombine(N, DAG, DCI, Subtarget);
980  case ISD::AND:
981    Val = performANDCombine(N, DAG, DCI, Subtarget);
982    break;
983  case ISD::OR:
984    Val = performORCombine(N, DAG, DCI, Subtarget);
985    break;
986  case ISD::SUBE:
987    return performSUBECombine(N, DAG, DCI, Subtarget);
988  case ISD::MUL:
989    return performMULCombine(N, DAG, DCI, this);
990  case ISD::SHL:
991    return performSHLCombine(N, DAG, DCI, Subtarget);
992  case ISD::SRA:
993    return performSRACombine(N, DAG, DCI, Subtarget);
994  case ISD::SRL:
995    return performSRLCombine(N, DAG, DCI, Subtarget);
996  case ISD::VSELECT:
997    return performVSELECTCombine(N, DAG);
998  case ISD::XOR:
999    Val = performXORCombine(N, DAG, Subtarget);
1000    break;
1001  case ISD::SETCC:
1002    Val = performSETCCCombine(N, DAG);
1003    break;
1004  }
1005
1006  if (Val.getNode()) {
1007    DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1008          N->printrWithDepth(dbgs(), &DAG);
1009          dbgs() << "\n=> \n";
1010          Val.getNode()->printrWithDepth(dbgs(), &DAG);
1011          dbgs() << "\n");
1012    return Val;
1013  }
1014
1015  return MipsTargetLowering::PerformDAGCombine(N, DCI);
1016}
1017
1018MachineBasicBlock *
1019MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
1020                                                  MachineBasicBlock *BB) const {
1021  switch (MI->getOpcode()) {
1022  default:
1023    return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
1024  case Mips::BPOSGE32_PSEUDO:
1025    return emitBPOSGE32(MI, BB);
1026  case Mips::SNZ_B_PSEUDO:
1027    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1028  case Mips::SNZ_H_PSEUDO:
1029    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1030  case Mips::SNZ_W_PSEUDO:
1031    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1032  case Mips::SNZ_D_PSEUDO:
1033    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1034  case Mips::SNZ_V_PSEUDO:
1035    return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1036  case Mips::SZ_B_PSEUDO:
1037    return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1038  case Mips::SZ_H_PSEUDO:
1039    return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1040  case Mips::SZ_W_PSEUDO:
1041    return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1042  case Mips::SZ_D_PSEUDO:
1043    return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1044  case Mips::SZ_V_PSEUDO:
1045    return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1046  case Mips::COPY_FW_PSEUDO:
1047    return emitCOPY_FW(MI, BB);
1048  case Mips::COPY_FD_PSEUDO:
1049    return emitCOPY_FD(MI, BB);
1050  case Mips::INSERT_FW_PSEUDO:
1051    return emitINSERT_FW(MI, BB);
1052  case Mips::INSERT_FD_PSEUDO:
1053    return emitINSERT_FD(MI, BB);
1054  case Mips::FILL_FW_PSEUDO:
1055    return emitFILL_FW(MI, BB);
1056  case Mips::FILL_FD_PSEUDO:
1057    return emitFILL_FD(MI, BB);
1058  case Mips::FEXP2_W_1_PSEUDO:
1059    return emitFEXP2_W_1(MI, BB);
1060  case Mips::FEXP2_D_1_PSEUDO:
1061    return emitFEXP2_D_1(MI, BB);
1062  }
1063}
1064
1065bool MipsSETargetLowering::
1066isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
1067                                  unsigned NextStackOffset,
1068                                  const MipsFunctionInfo& FI) const {
1069  if (!EnableMipsTailCalls)
1070    return false;
1071
1072  // Return false if either the callee or caller has a byval argument.
1073  if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
1074    return false;
1075
1076  // Return true if the callee's argument area is no larger than the
1077  // caller's.
1078  return NextStackOffset <= FI.getIncomingArgSize();
1079}
1080
1081void MipsSETargetLowering::
1082getOpndList(SmallVectorImpl<SDValue> &Ops,
1083            std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
1084            bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1085            CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
1086  Ops.push_back(Callee);
1087  MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1088                                  InternalLinkage, CLI, Callee, Chain);
1089}
1090
1091SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1092  LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1093
1094  if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1095    return MipsTargetLowering::lowerLOAD(Op, DAG);
1096
1097  // Replace a double precision load with two i32 loads and a buildpair64.
1098  SDLoc DL(Op);
1099  SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1100  EVT PtrVT = Ptr.getValueType();
1101
1102  // i32 load from lower address.
1103  SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr,
1104                           MachinePointerInfo(), Nd.isVolatile(),
1105                           Nd.isNonTemporal(), Nd.isInvariant(),
1106                           Nd.getAlignment());
1107
1108  // i32 load from higher address.
1109  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
1110  SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr,
1111                           MachinePointerInfo(), Nd.isVolatile(),
1112                           Nd.isNonTemporal(), Nd.isInvariant(),
1113                           std::min(Nd.getAlignment(), 4U));
1114
1115  if (!Subtarget->isLittle())
1116    std::swap(Lo, Hi);
1117
1118  SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1119  SDValue Ops[2] = {BP, Hi.getValue(1)};
1120  return DAG.getMergeValues(Ops, 2, DL);
1121}
1122
1123SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1124  StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1125
1126  if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1127    return MipsTargetLowering::lowerSTORE(Op, DAG);
1128
1129  // Replace a double precision store with two extractelement64s and i32 stores.
1130  SDLoc DL(Op);
1131  SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1132  EVT PtrVT = Ptr.getValueType();
1133  SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1134                           Val, DAG.getConstant(0, MVT::i32));
1135  SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1136                           Val, DAG.getConstant(1, MVT::i32));
1137
1138  if (!Subtarget->isLittle())
1139    std::swap(Lo, Hi);
1140
1141  // i32 store to lower address.
1142  Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
1143                       Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
1144                       Nd.getTBAAInfo());
1145
1146  // i32 store to higher address.
1147  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
1148  return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1149                      Nd.isVolatile(), Nd.isNonTemporal(),
1150                      std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo());
1151}
1152
1153SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1154                                          bool HasLo, bool HasHi,
1155                                          SelectionDAG &DAG) const {
1156  EVT Ty = Op.getOperand(0).getValueType();
1157  SDLoc DL(Op);
1158  SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1159                             Op.getOperand(0), Op.getOperand(1));
1160  SDValue Lo, Hi;
1161
1162  if (HasLo)
1163    Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1164  if (HasHi)
1165    Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1166
1167  if (!HasLo || !HasHi)
1168    return HasLo ? Lo : Hi;
1169
1170  SDValue Vals[] = { Lo, Hi };
1171  return DAG.getMergeValues(Vals, 2, DL);
1172}
1173
1174
1175static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) {
1176  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
1177                             DAG.getConstant(0, MVT::i32));
1178  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
1179                             DAG.getConstant(1, MVT::i32));
1180  return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1181}
1182
1183static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) {
1184  SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1185  SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1186  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1187}
1188
1189// This function expands mips intrinsic nodes which have 64-bit input operands
1190// or output values.
1191//
1192// out64 = intrinsic-node in64
1193// =>
1194// lo = copy (extract-element (in64, 0))
1195// hi = copy (extract-element (in64, 1))
1196// mips-specific-node
1197// v0 = copy lo
1198// v1 = copy hi
1199// out64 = merge-values (v0, v1)
1200//
1201static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1202  SDLoc DL(Op);
1203  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1204  SmallVector<SDValue, 3> Ops;
1205  unsigned OpNo = 0;
1206
1207  // See if Op has a chain input.
1208  if (HasChainIn)
1209    Ops.push_back(Op->getOperand(OpNo++));
1210
1211  // The next operand is the intrinsic opcode.
1212  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1213
1214  // See if the next operand has type i64.
1215  SDValue Opnd = Op->getOperand(++OpNo), In64;
1216
1217  if (Opnd.getValueType() == MVT::i64)
1218    In64 = initAccumulator(Opnd, DL, DAG);
1219  else
1220    Ops.push_back(Opnd);
1221
1222  // Push the remaining operands.
1223  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1224    Ops.push_back(Op->getOperand(OpNo));
1225
1226  // Add In64 to the end of the list.
1227  if (In64.getNode())
1228    Ops.push_back(In64);
1229
1230  // Scan output.
1231  SmallVector<EVT, 2> ResTys;
1232
1233  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
1234       I != E; ++I)
1235    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
1236
1237  // Create node.
1238  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
1239  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1240
1241  if (!HasChainIn)
1242    return Out;
1243
1244  assert(Val->getValueType(1) == MVT::Other);
1245  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1246  return DAG.getMergeValues(Vals, 2, DL);
1247}
1248
1249// Lower an MSA copy intrinsic into the specified SelectionDAG node
1250static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1251  SDLoc DL(Op);
1252  SDValue Vec = Op->getOperand(1);
1253  SDValue Idx = Op->getOperand(2);
1254  EVT ResTy = Op->getValueType(0);
1255  EVT EltTy = Vec->getValueType(0).getVectorElementType();
1256
1257  SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1258                               DAG.getValueType(EltTy));
1259
1260  return Result;
1261}
1262
1263static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1264  EVT ResVecTy = Op->getValueType(0);
1265  EVT ViaVecTy = ResVecTy;
1266  SDLoc DL(Op);
1267
1268  // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1269  // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1270  // lanes.
1271  SDValue LaneA;
1272  SDValue LaneB = Op->getOperand(2);
1273
1274  if (ResVecTy == MVT::v2i64) {
1275    LaneA = DAG.getConstant(0, MVT::i32);
1276    ViaVecTy = MVT::v4i32;
1277  } else
1278    LaneA = LaneB;
1279
1280  SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1281                      LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1282
1283  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
1284                               ViaVecTy.getVectorNumElements());
1285
1286  if (ViaVecTy != ResVecTy)
1287    Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
1288
1289  return Result;
1290}
1291
1292static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
1293  return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0));
1294}
1295
1296static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1297                                   bool BigEndian, SelectionDAG &DAG) {
1298  EVT ViaVecTy = VecTy;
1299  SDValue SplatValueA = SplatValue;
1300  SDValue SplatValueB = SplatValue;
1301  SDLoc DL(SplatValue);
1302
1303  if (VecTy == MVT::v2i64) {
1304    // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1305    ViaVecTy = MVT::v4i32;
1306
1307    SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1308    SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1309                              DAG.getConstant(32, MVT::i32));
1310    SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1311  }
1312
1313  // We currently hold the parts in little endian order. Swap them if
1314  // necessary.
1315  if (BigEndian)
1316    std::swap(SplatValueA, SplatValueB);
1317
1318  SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1319                      SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1320                      SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1321                      SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1322
1323  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
1324                               ViaVecTy.getVectorNumElements());
1325
1326  if (VecTy != ViaVecTy)
1327    Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1328
1329  return Result;
1330}
1331
1332static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
1333                                        unsigned Opc, SDValue Imm,
1334                                        bool BigEndian) {
1335  EVT VecTy = Op->getValueType(0);
1336  SDValue Exp2Imm;
1337  SDLoc DL(Op);
1338
1339  // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1340  // here for now.
1341  if (VecTy == MVT::v2i64) {
1342    if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1343      APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1344
1345      SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32);
1346      SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
1347
1348      if (BigEndian)
1349        std::swap(BitImmLoOp, BitImmHiOp);
1350
1351      Exp2Imm =
1352          DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
1353                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp,
1354                                  BitImmHiOp, BitImmLoOp, BitImmHiOp));
1355    }
1356  }
1357
1358  if (Exp2Imm.getNode() == NULL) {
1359    // We couldnt constant fold, do a vector shift instead
1360
1361    // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1362    // only values 0-63 are valid.
1363    if (VecTy == MVT::v2i64)
1364      Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1365
1366    Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1367
1368    Exp2Imm =
1369        DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm);
1370  }
1371
1372  return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1373}
1374
1375static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
1376  EVT ResTy = Op->getValueType(0);
1377  SDLoc DL(Op);
1378  SDValue One = DAG.getConstant(1, ResTy);
1379  SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
1380
1381  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1382                     DAG.getNOT(DL, Bit, ResTy));
1383}
1384
1385static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
1386  SDLoc DL(Op);
1387  EVT ResTy = Op->getValueType(0);
1388  APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
1389                 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
1390  SDValue BitMask = DAG.getConstant(~BitImm, ResTy);
1391
1392  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1393}
1394
1395SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1396                                                      SelectionDAG &DAG) const {
1397  SDLoc DL(Op);
1398
1399  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
1400  default:
1401    return SDValue();
1402  case Intrinsic::mips_shilo:
1403    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1404  case Intrinsic::mips_dpau_h_qbl:
1405    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1406  case Intrinsic::mips_dpau_h_qbr:
1407    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1408  case Intrinsic::mips_dpsu_h_qbl:
1409    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1410  case Intrinsic::mips_dpsu_h_qbr:
1411    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1412  case Intrinsic::mips_dpa_w_ph:
1413    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1414  case Intrinsic::mips_dps_w_ph:
1415    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1416  case Intrinsic::mips_dpax_w_ph:
1417    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1418  case Intrinsic::mips_dpsx_w_ph:
1419    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1420  case Intrinsic::mips_mulsa_w_ph:
1421    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1422  case Intrinsic::mips_mult:
1423    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1424  case Intrinsic::mips_multu:
1425    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1426  case Intrinsic::mips_madd:
1427    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1428  case Intrinsic::mips_maddu:
1429    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1430  case Intrinsic::mips_msub:
1431    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1432  case Intrinsic::mips_msubu:
1433    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1434  case Intrinsic::mips_addv_b:
1435  case Intrinsic::mips_addv_h:
1436  case Intrinsic::mips_addv_w:
1437  case Intrinsic::mips_addv_d:
1438    return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1439                       Op->getOperand(2));
1440  case Intrinsic::mips_addvi_b:
1441  case Intrinsic::mips_addvi_h:
1442  case Intrinsic::mips_addvi_w:
1443  case Intrinsic::mips_addvi_d:
1444    return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1445                       lowerMSASplatImm(Op, 2, DAG));
1446  case Intrinsic::mips_and_v:
1447    return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1448                       Op->getOperand(2));
1449  case Intrinsic::mips_andi_b:
1450    return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1451                       lowerMSASplatImm(Op, 2, DAG));
1452  case Intrinsic::mips_bclr_b:
1453  case Intrinsic::mips_bclr_h:
1454  case Intrinsic::mips_bclr_w:
1455  case Intrinsic::mips_bclr_d:
1456    return lowerMSABitClear(Op, DAG);
1457  case Intrinsic::mips_bclri_b:
1458  case Intrinsic::mips_bclri_h:
1459  case Intrinsic::mips_bclri_w:
1460  case Intrinsic::mips_bclri_d:
1461    return lowerMSABitClearImm(Op, DAG);
1462  case Intrinsic::mips_binsli_b:
1463  case Intrinsic::mips_binsli_h:
1464  case Intrinsic::mips_binsli_w:
1465  case Intrinsic::mips_binsli_d: {
1466    // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1467    EVT VecTy = Op->getValueType(0);
1468    EVT EltTy = VecTy.getVectorElementType();
1469    APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
1470                                       Op->getConstantOperandVal(3));
1471    return DAG.getNode(ISD::VSELECT, DL, VecTy,
1472                       DAG.getConstant(Mask, VecTy, true), Op->getOperand(2),
1473                       Op->getOperand(1));
1474  }
1475  case Intrinsic::mips_binsri_b:
1476  case Intrinsic::mips_binsri_h:
1477  case Intrinsic::mips_binsri_w:
1478  case Intrinsic::mips_binsri_d: {
1479    // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1480    EVT VecTy = Op->getValueType(0);
1481    EVT EltTy = VecTy.getVectorElementType();
1482    APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
1483                                      Op->getConstantOperandVal(3));
1484    return DAG.getNode(ISD::VSELECT, DL, VecTy,
1485                       DAG.getConstant(Mask, VecTy, true), Op->getOperand(2),
1486                       Op->getOperand(1));
1487  }
1488  case Intrinsic::mips_bmnz_v:
1489    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1490                       Op->getOperand(2), Op->getOperand(1));
1491  case Intrinsic::mips_bmnzi_b:
1492    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1493                       lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1494                       Op->getOperand(1));
1495  case Intrinsic::mips_bmz_v:
1496    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1497                       Op->getOperand(1), Op->getOperand(2));
1498  case Intrinsic::mips_bmzi_b:
1499    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1500                       lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1501                       Op->getOperand(2));
1502  case Intrinsic::mips_bneg_b:
1503  case Intrinsic::mips_bneg_h:
1504  case Intrinsic::mips_bneg_w:
1505  case Intrinsic::mips_bneg_d: {
1506    EVT VecTy = Op->getValueType(0);
1507    SDValue One = DAG.getConstant(1, VecTy);
1508
1509    return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1510                       DAG.getNode(ISD::SHL, DL, VecTy, One,
1511                                   Op->getOperand(2)));
1512  }
1513  case Intrinsic::mips_bnegi_b:
1514  case Intrinsic::mips_bnegi_h:
1515  case Intrinsic::mips_bnegi_w:
1516  case Intrinsic::mips_bnegi_d:
1517    return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1518                                    !Subtarget->isLittle());
1519  case Intrinsic::mips_bnz_b:
1520  case Intrinsic::mips_bnz_h:
1521  case Intrinsic::mips_bnz_w:
1522  case Intrinsic::mips_bnz_d:
1523    return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1524                       Op->getOperand(1));
1525  case Intrinsic::mips_bnz_v:
1526    return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1527                       Op->getOperand(1));
1528  case Intrinsic::mips_bsel_v:
1529    // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1530    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1531                       Op->getOperand(1), Op->getOperand(3),
1532                       Op->getOperand(2));
1533  case Intrinsic::mips_bseli_b:
1534    // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1535    return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1536                       Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1537                       Op->getOperand(2));
1538  case Intrinsic::mips_bset_b:
1539  case Intrinsic::mips_bset_h:
1540  case Intrinsic::mips_bset_w:
1541  case Intrinsic::mips_bset_d: {
1542    EVT VecTy = Op->getValueType(0);
1543    SDValue One = DAG.getConstant(1, VecTy);
1544
1545    return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1546                       DAG.getNode(ISD::SHL, DL, VecTy, One,
1547                                   Op->getOperand(2)));
1548  }
1549  case Intrinsic::mips_bseti_b:
1550  case Intrinsic::mips_bseti_h:
1551  case Intrinsic::mips_bseti_w:
1552  case Intrinsic::mips_bseti_d:
1553    return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1554                                    !Subtarget->isLittle());
1555  case Intrinsic::mips_bz_b:
1556  case Intrinsic::mips_bz_h:
1557  case Intrinsic::mips_bz_w:
1558  case Intrinsic::mips_bz_d:
1559    return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1560                       Op->getOperand(1));
1561  case Intrinsic::mips_bz_v:
1562    return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1563                       Op->getOperand(1));
1564  case Intrinsic::mips_ceq_b:
1565  case Intrinsic::mips_ceq_h:
1566  case Intrinsic::mips_ceq_w:
1567  case Intrinsic::mips_ceq_d:
1568    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1569                        Op->getOperand(2), ISD::SETEQ);
1570  case Intrinsic::mips_ceqi_b:
1571  case Intrinsic::mips_ceqi_h:
1572  case Intrinsic::mips_ceqi_w:
1573  case Intrinsic::mips_ceqi_d:
1574    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1575                        lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ);
1576  case Intrinsic::mips_cle_s_b:
1577  case Intrinsic::mips_cle_s_h:
1578  case Intrinsic::mips_cle_s_w:
1579  case Intrinsic::mips_cle_s_d:
1580    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1581                        Op->getOperand(2), ISD::SETLE);
1582  case Intrinsic::mips_clei_s_b:
1583  case Intrinsic::mips_clei_s_h:
1584  case Intrinsic::mips_clei_s_w:
1585  case Intrinsic::mips_clei_s_d:
1586    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1587                        lowerMSASplatImm(Op, 2, DAG), ISD::SETLE);
1588  case Intrinsic::mips_cle_u_b:
1589  case Intrinsic::mips_cle_u_h:
1590  case Intrinsic::mips_cle_u_w:
1591  case Intrinsic::mips_cle_u_d:
1592    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1593                        Op->getOperand(2), ISD::SETULE);
1594  case Intrinsic::mips_clei_u_b:
1595  case Intrinsic::mips_clei_u_h:
1596  case Intrinsic::mips_clei_u_w:
1597  case Intrinsic::mips_clei_u_d:
1598    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1599                        lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1600  case Intrinsic::mips_clt_s_b:
1601  case Intrinsic::mips_clt_s_h:
1602  case Intrinsic::mips_clt_s_w:
1603  case Intrinsic::mips_clt_s_d:
1604    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1605                        Op->getOperand(2), ISD::SETLT);
1606  case Intrinsic::mips_clti_s_b:
1607  case Intrinsic::mips_clti_s_h:
1608  case Intrinsic::mips_clti_s_w:
1609  case Intrinsic::mips_clti_s_d:
1610    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1611                        lowerMSASplatImm(Op, 2, DAG), ISD::SETLT);
1612  case Intrinsic::mips_clt_u_b:
1613  case Intrinsic::mips_clt_u_h:
1614  case Intrinsic::mips_clt_u_w:
1615  case Intrinsic::mips_clt_u_d:
1616    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1617                        Op->getOperand(2), ISD::SETULT);
1618  case Intrinsic::mips_clti_u_b:
1619  case Intrinsic::mips_clti_u_h:
1620  case Intrinsic::mips_clti_u_w:
1621  case Intrinsic::mips_clti_u_d:
1622    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1623                        lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1624  case Intrinsic::mips_copy_s_b:
1625  case Intrinsic::mips_copy_s_h:
1626  case Intrinsic::mips_copy_s_w:
1627    return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1628  case Intrinsic::mips_copy_s_d:
1629    if (hasMips64())
1630      // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1631      return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1632    else {
1633      // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1634      // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1635      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1636                         Op->getValueType(0), Op->getOperand(1),
1637                         Op->getOperand(2));
1638    }
1639  case Intrinsic::mips_copy_u_b:
1640  case Intrinsic::mips_copy_u_h:
1641  case Intrinsic::mips_copy_u_w:
1642    return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1643  case Intrinsic::mips_copy_u_d:
1644    if (hasMips64())
1645      // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1646      return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1647    else {
1648      // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1649      // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1650      // Note: When i64 is illegal, this results in copy_s.w instructions
1651      // instead of copy_u.w instructions. This makes no difference to the
1652      // behaviour since i64 is only illegal when the register file is 32-bit.
1653      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1654                         Op->getValueType(0), Op->getOperand(1),
1655                         Op->getOperand(2));
1656    }
1657  case Intrinsic::mips_div_s_b:
1658  case Intrinsic::mips_div_s_h:
1659  case Intrinsic::mips_div_s_w:
1660  case Intrinsic::mips_div_s_d:
1661    return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1662                       Op->getOperand(2));
1663  case Intrinsic::mips_div_u_b:
1664  case Intrinsic::mips_div_u_h:
1665  case Intrinsic::mips_div_u_w:
1666  case Intrinsic::mips_div_u_d:
1667    return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1668                       Op->getOperand(2));
1669  case Intrinsic::mips_fadd_w:
1670  case Intrinsic::mips_fadd_d:
1671    return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1672                       Op->getOperand(2));
1673  // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1674  case Intrinsic::mips_fceq_w:
1675  case Intrinsic::mips_fceq_d:
1676    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1677                        Op->getOperand(2), ISD::SETOEQ);
1678  case Intrinsic::mips_fcle_w:
1679  case Intrinsic::mips_fcle_d:
1680    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1681                        Op->getOperand(2), ISD::SETOLE);
1682  case Intrinsic::mips_fclt_w:
1683  case Intrinsic::mips_fclt_d:
1684    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1685                        Op->getOperand(2), ISD::SETOLT);
1686  case Intrinsic::mips_fcne_w:
1687  case Intrinsic::mips_fcne_d:
1688    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1689                        Op->getOperand(2), ISD::SETONE);
1690  case Intrinsic::mips_fcor_w:
1691  case Intrinsic::mips_fcor_d:
1692    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1693                        Op->getOperand(2), ISD::SETO);
1694  case Intrinsic::mips_fcueq_w:
1695  case Intrinsic::mips_fcueq_d:
1696    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1697                        Op->getOperand(2), ISD::SETUEQ);
1698  case Intrinsic::mips_fcule_w:
1699  case Intrinsic::mips_fcule_d:
1700    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1701                        Op->getOperand(2), ISD::SETULE);
1702  case Intrinsic::mips_fcult_w:
1703  case Intrinsic::mips_fcult_d:
1704    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1705                        Op->getOperand(2), ISD::SETULT);
1706  case Intrinsic::mips_fcun_w:
1707  case Intrinsic::mips_fcun_d:
1708    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1709                        Op->getOperand(2), ISD::SETUO);
1710  case Intrinsic::mips_fcune_w:
1711  case Intrinsic::mips_fcune_d:
1712    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1713                        Op->getOperand(2), ISD::SETUNE);
1714  case Intrinsic::mips_fdiv_w:
1715  case Intrinsic::mips_fdiv_d:
1716    return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1717                       Op->getOperand(2));
1718  case Intrinsic::mips_ffint_u_w:
1719  case Intrinsic::mips_ffint_u_d:
1720    return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1721                       Op->getOperand(1));
1722  case Intrinsic::mips_ffint_s_w:
1723  case Intrinsic::mips_ffint_s_d:
1724    return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1725                       Op->getOperand(1));
1726  case Intrinsic::mips_fill_b:
1727  case Intrinsic::mips_fill_h:
1728  case Intrinsic::mips_fill_w:
1729  case Intrinsic::mips_fill_d: {
1730    SmallVector<SDValue, 16> Ops;
1731    EVT ResTy = Op->getValueType(0);
1732
1733    for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i)
1734      Ops.push_back(Op->getOperand(1));
1735
1736    // If ResTy is v2i64 then the type legalizer will break this node down into
1737    // an equivalent v4i32.
1738    return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size());
1739  }
1740  case Intrinsic::mips_fexp2_w:
1741  case Intrinsic::mips_fexp2_d: {
1742    EVT ResTy = Op->getValueType(0);
1743    return DAG.getNode(
1744        ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
1745        DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
1746  }
1747  case Intrinsic::mips_flog2_w:
1748  case Intrinsic::mips_flog2_d:
1749    return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
1750  case Intrinsic::mips_fmadd_w:
1751  case Intrinsic::mips_fmadd_d:
1752    return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
1753                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1754  case Intrinsic::mips_fmul_w:
1755  case Intrinsic::mips_fmul_d:
1756    return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
1757                       Op->getOperand(2));
1758  case Intrinsic::mips_fmsub_w:
1759  case Intrinsic::mips_fmsub_d: {
1760    EVT ResTy = Op->getValueType(0);
1761    return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
1762                       DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
1763                                   Op->getOperand(2), Op->getOperand(3)));
1764  }
1765  case Intrinsic::mips_frint_w:
1766  case Intrinsic::mips_frint_d:
1767    return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
1768  case Intrinsic::mips_fsqrt_w:
1769  case Intrinsic::mips_fsqrt_d:
1770    return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
1771  case Intrinsic::mips_fsub_w:
1772  case Intrinsic::mips_fsub_d:
1773    return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
1774                       Op->getOperand(2));
1775  case Intrinsic::mips_ftrunc_u_w:
1776  case Intrinsic::mips_ftrunc_u_d:
1777    return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
1778                       Op->getOperand(1));
1779  case Intrinsic::mips_ftrunc_s_w:
1780  case Intrinsic::mips_ftrunc_s_d:
1781    return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
1782                       Op->getOperand(1));
1783  case Intrinsic::mips_ilvev_b:
1784  case Intrinsic::mips_ilvev_h:
1785  case Intrinsic::mips_ilvev_w:
1786  case Intrinsic::mips_ilvev_d:
1787    return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
1788                       Op->getOperand(1), Op->getOperand(2));
1789  case Intrinsic::mips_ilvl_b:
1790  case Intrinsic::mips_ilvl_h:
1791  case Intrinsic::mips_ilvl_w:
1792  case Intrinsic::mips_ilvl_d:
1793    return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
1794                       Op->getOperand(1), Op->getOperand(2));
1795  case Intrinsic::mips_ilvod_b:
1796  case Intrinsic::mips_ilvod_h:
1797  case Intrinsic::mips_ilvod_w:
1798  case Intrinsic::mips_ilvod_d:
1799    return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
1800                       Op->getOperand(1), Op->getOperand(2));
1801  case Intrinsic::mips_ilvr_b:
1802  case Intrinsic::mips_ilvr_h:
1803  case Intrinsic::mips_ilvr_w:
1804  case Intrinsic::mips_ilvr_d:
1805    return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
1806                       Op->getOperand(1), Op->getOperand(2));
1807  case Intrinsic::mips_insert_b:
1808  case Intrinsic::mips_insert_h:
1809  case Intrinsic::mips_insert_w:
1810  case Intrinsic::mips_insert_d:
1811    return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
1812                       Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
1813  case Intrinsic::mips_insve_b:
1814  case Intrinsic::mips_insve_h:
1815  case Intrinsic::mips_insve_w:
1816  case Intrinsic::mips_insve_d:
1817    return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
1818                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
1819                       DAG.getConstant(0, MVT::i32));
1820  case Intrinsic::mips_ldi_b:
1821  case Intrinsic::mips_ldi_h:
1822  case Intrinsic::mips_ldi_w:
1823  case Intrinsic::mips_ldi_d:
1824    return lowerMSASplatImm(Op, 1, DAG);
1825  case Intrinsic::mips_lsa:
1826  case Intrinsic::mips_dlsa: {
1827    EVT ResTy = Op->getValueType(0);
1828    return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1829                       DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
1830                                   Op->getOperand(2), Op->getOperand(3)));
1831  }
1832  case Intrinsic::mips_maddv_b:
1833  case Intrinsic::mips_maddv_h:
1834  case Intrinsic::mips_maddv_w:
1835  case Intrinsic::mips_maddv_d: {
1836    EVT ResTy = Op->getValueType(0);
1837    return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1838                       DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
1839                                   Op->getOperand(2), Op->getOperand(3)));
1840  }
1841  case Intrinsic::mips_max_s_b:
1842  case Intrinsic::mips_max_s_h:
1843  case Intrinsic::mips_max_s_w:
1844  case Intrinsic::mips_max_s_d:
1845    return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
1846                       Op->getOperand(1), Op->getOperand(2));
1847  case Intrinsic::mips_max_u_b:
1848  case Intrinsic::mips_max_u_h:
1849  case Intrinsic::mips_max_u_w:
1850  case Intrinsic::mips_max_u_d:
1851    return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0),
1852                       Op->getOperand(1), Op->getOperand(2));
1853  case Intrinsic::mips_maxi_s_b:
1854  case Intrinsic::mips_maxi_s_h:
1855  case Intrinsic::mips_maxi_s_w:
1856  case Intrinsic::mips_maxi_s_d:
1857    return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
1858                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1859  case Intrinsic::mips_maxi_u_b:
1860  case Intrinsic::mips_maxi_u_h:
1861  case Intrinsic::mips_maxi_u_w:
1862  case Intrinsic::mips_maxi_u_d:
1863    return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0),
1864                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1865  case Intrinsic::mips_min_s_b:
1866  case Intrinsic::mips_min_s_h:
1867  case Intrinsic::mips_min_s_w:
1868  case Intrinsic::mips_min_s_d:
1869    return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
1870                       Op->getOperand(1), Op->getOperand(2));
1871  case Intrinsic::mips_min_u_b:
1872  case Intrinsic::mips_min_u_h:
1873  case Intrinsic::mips_min_u_w:
1874  case Intrinsic::mips_min_u_d:
1875    return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0),
1876                       Op->getOperand(1), Op->getOperand(2));
1877  case Intrinsic::mips_mini_s_b:
1878  case Intrinsic::mips_mini_s_h:
1879  case Intrinsic::mips_mini_s_w:
1880  case Intrinsic::mips_mini_s_d:
1881    return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
1882                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1883  case Intrinsic::mips_mini_u_b:
1884  case Intrinsic::mips_mini_u_h:
1885  case Intrinsic::mips_mini_u_w:
1886  case Intrinsic::mips_mini_u_d:
1887    return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0),
1888                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1889  case Intrinsic::mips_mod_s_b:
1890  case Intrinsic::mips_mod_s_h:
1891  case Intrinsic::mips_mod_s_w:
1892  case Intrinsic::mips_mod_s_d:
1893    return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
1894                       Op->getOperand(2));
1895  case Intrinsic::mips_mod_u_b:
1896  case Intrinsic::mips_mod_u_h:
1897  case Intrinsic::mips_mod_u_w:
1898  case Intrinsic::mips_mod_u_d:
1899    return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
1900                       Op->getOperand(2));
1901  case Intrinsic::mips_mulv_b:
1902  case Intrinsic::mips_mulv_h:
1903  case Intrinsic::mips_mulv_w:
1904  case Intrinsic::mips_mulv_d:
1905    return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
1906                       Op->getOperand(2));
1907  case Intrinsic::mips_msubv_b:
1908  case Intrinsic::mips_msubv_h:
1909  case Intrinsic::mips_msubv_w:
1910  case Intrinsic::mips_msubv_d: {
1911    EVT ResTy = Op->getValueType(0);
1912    return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
1913                       DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
1914                                   Op->getOperand(2), Op->getOperand(3)));
1915  }
1916  case Intrinsic::mips_nlzc_b:
1917  case Intrinsic::mips_nlzc_h:
1918  case Intrinsic::mips_nlzc_w:
1919  case Intrinsic::mips_nlzc_d:
1920    return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
1921  case Intrinsic::mips_nor_v: {
1922    SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
1923                              Op->getOperand(1), Op->getOperand(2));
1924    return DAG.getNOT(DL, Res, Res->getValueType(0));
1925  }
1926  case Intrinsic::mips_nori_b: {
1927    SDValue Res =  DAG.getNode(ISD::OR, DL, Op->getValueType(0),
1928                               Op->getOperand(1),
1929                               lowerMSASplatImm(Op, 2, DAG));
1930    return DAG.getNOT(DL, Res, Res->getValueType(0));
1931  }
1932  case Intrinsic::mips_or_v:
1933    return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
1934                       Op->getOperand(2));
1935  case Intrinsic::mips_ori_b:
1936    return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
1937                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1938  case Intrinsic::mips_pckev_b:
1939  case Intrinsic::mips_pckev_h:
1940  case Intrinsic::mips_pckev_w:
1941  case Intrinsic::mips_pckev_d:
1942    return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
1943                       Op->getOperand(1), Op->getOperand(2));
1944  case Intrinsic::mips_pckod_b:
1945  case Intrinsic::mips_pckod_h:
1946  case Intrinsic::mips_pckod_w:
1947  case Intrinsic::mips_pckod_d:
1948    return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
1949                       Op->getOperand(1), Op->getOperand(2));
1950  case Intrinsic::mips_pcnt_b:
1951  case Intrinsic::mips_pcnt_h:
1952  case Intrinsic::mips_pcnt_w:
1953  case Intrinsic::mips_pcnt_d:
1954    return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
1955  case Intrinsic::mips_shf_b:
1956  case Intrinsic::mips_shf_h:
1957  case Intrinsic::mips_shf_w:
1958    return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
1959                       Op->getOperand(2), Op->getOperand(1));
1960  case Intrinsic::mips_sll_b:
1961  case Intrinsic::mips_sll_h:
1962  case Intrinsic::mips_sll_w:
1963  case Intrinsic::mips_sll_d:
1964    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
1965                       Op->getOperand(2));
1966  case Intrinsic::mips_slli_b:
1967  case Intrinsic::mips_slli_h:
1968  case Intrinsic::mips_slli_w:
1969  case Intrinsic::mips_slli_d:
1970    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
1971                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
1972  case Intrinsic::mips_splat_b:
1973  case Intrinsic::mips_splat_h:
1974  case Intrinsic::mips_splat_w:
1975  case Intrinsic::mips_splat_d:
1976    // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
1977    // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
1978    // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
1979    // Instead we lower to MipsISD::VSHF and match from there.
1980    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
1981                       lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
1982                       Op->getOperand(1));
1983  case Intrinsic::mips_splati_b:
1984  case Intrinsic::mips_splati_h:
1985  case Intrinsic::mips_splati_w:
1986  case Intrinsic::mips_splati_d:
1987    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
1988                       lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
1989                       Op->getOperand(1));
1990  case Intrinsic::mips_sra_b:
1991  case Intrinsic::mips_sra_h:
1992  case Intrinsic::mips_sra_w:
1993  case Intrinsic::mips_sra_d:
1994    return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
1995                       Op->getOperand(2));
1996  case Intrinsic::mips_srai_b:
1997  case Intrinsic::mips_srai_h:
1998  case Intrinsic::mips_srai_w:
1999  case Intrinsic::mips_srai_d:
2000    return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2001                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2002  case Intrinsic::mips_srl_b:
2003  case Intrinsic::mips_srl_h:
2004  case Intrinsic::mips_srl_w:
2005  case Intrinsic::mips_srl_d:
2006    return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2007                       Op->getOperand(2));
2008  case Intrinsic::mips_srli_b:
2009  case Intrinsic::mips_srli_h:
2010  case Intrinsic::mips_srli_w:
2011  case Intrinsic::mips_srli_d:
2012    return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2013                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2014  case Intrinsic::mips_subv_b:
2015  case Intrinsic::mips_subv_h:
2016  case Intrinsic::mips_subv_w:
2017  case Intrinsic::mips_subv_d:
2018    return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2019                       Op->getOperand(2));
2020  case Intrinsic::mips_subvi_b:
2021  case Intrinsic::mips_subvi_h:
2022  case Intrinsic::mips_subvi_w:
2023  case Intrinsic::mips_subvi_d:
2024    return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2025                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2026  case Intrinsic::mips_vshf_b:
2027  case Intrinsic::mips_vshf_h:
2028  case Intrinsic::mips_vshf_w:
2029  case Intrinsic::mips_vshf_d:
2030    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2031                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2032  case Intrinsic::mips_xor_v:
2033    return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2034                       Op->getOperand(2));
2035  case Intrinsic::mips_xori_b:
2036    return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2037                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2038  }
2039}
2040
2041static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
2042  SDLoc DL(Op);
2043  SDValue ChainIn = Op->getOperand(0);
2044  SDValue Address = Op->getOperand(2);
2045  SDValue Offset  = Op->getOperand(3);
2046  EVT ResTy = Op->getValueType(0);
2047  EVT PtrTy = Address->getValueType(0);
2048
2049  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2050
2051  return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false,
2052                     false, false, 16);
2053}
2054
2055SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2056                                                     SelectionDAG &DAG) const {
2057  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2058  switch (Intr) {
2059  default:
2060    return SDValue();
2061  case Intrinsic::mips_extp:
2062    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2063  case Intrinsic::mips_extpdp:
2064    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2065  case Intrinsic::mips_extr_w:
2066    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2067  case Intrinsic::mips_extr_r_w:
2068    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2069  case Intrinsic::mips_extr_rs_w:
2070    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2071  case Intrinsic::mips_extr_s_h:
2072    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2073  case Intrinsic::mips_mthlip:
2074    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2075  case Intrinsic::mips_mulsaq_s_w_ph:
2076    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
2077  case Intrinsic::mips_maq_s_w_phl:
2078    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2079  case Intrinsic::mips_maq_s_w_phr:
2080    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2081  case Intrinsic::mips_maq_sa_w_phl:
2082    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
2083  case Intrinsic::mips_maq_sa_w_phr:
2084    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
2085  case Intrinsic::mips_dpaq_s_w_ph:
2086    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2087  case Intrinsic::mips_dpsq_s_w_ph:
2088    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2089  case Intrinsic::mips_dpaq_sa_l_w:
2090    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2091  case Intrinsic::mips_dpsq_sa_l_w:
2092    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2093  case Intrinsic::mips_dpaqx_s_w_ph:
2094    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
2095  case Intrinsic::mips_dpaqx_sa_w_ph:
2096    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
2097  case Intrinsic::mips_dpsqx_s_w_ph:
2098    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
2099  case Intrinsic::mips_dpsqx_sa_w_ph:
2100    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
2101  case Intrinsic::mips_ld_b:
2102  case Intrinsic::mips_ld_h:
2103  case Intrinsic::mips_ld_w:
2104  case Intrinsic::mips_ld_d:
2105   return lowerMSALoadIntr(Op, DAG, Intr);
2106  }
2107}
2108
2109static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
2110  SDLoc DL(Op);
2111  SDValue ChainIn = Op->getOperand(0);
2112  SDValue Value   = Op->getOperand(2);
2113  SDValue Address = Op->getOperand(3);
2114  SDValue Offset  = Op->getOperand(4);
2115  EVT PtrTy = Address->getValueType(0);
2116
2117  Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2118
2119  return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false,
2120                      false, 16);
2121}
2122
2123SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2124                                                  SelectionDAG &DAG) const {
2125  unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2126  switch (Intr) {
2127  default:
2128    return SDValue();
2129  case Intrinsic::mips_st_b:
2130  case Intrinsic::mips_st_h:
2131  case Intrinsic::mips_st_w:
2132  case Intrinsic::mips_st_d:
2133    return lowerMSAStoreIntr(Op, DAG, Intr);
2134  }
2135}
2136
2137/// \brief Check if the given BuildVectorSDNode is a splat.
2138/// This method currently relies on DAG nodes being reused when equivalent,
2139/// so it's possible for this to return false even when isConstantSplat returns
2140/// true.
2141static bool isSplatVector(const BuildVectorSDNode *N) {
2142  unsigned int nOps = N->getNumOperands();
2143  assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
2144
2145  SDValue Operand0 = N->getOperand(0);
2146
2147  for (unsigned int i = 1; i < nOps; ++i) {
2148    if (N->getOperand(i) != Operand0)
2149      return false;
2150  }
2151
2152  return true;
2153}
2154
2155// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2156//
2157// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2158// choose to sign-extend but we could have equally chosen zero-extend. The
2159// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2160// result into this node later (possibly changing it to a zero-extend in the
2161// process).
2162SDValue MipsSETargetLowering::
2163lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2164  SDLoc DL(Op);
2165  EVT ResTy = Op->getValueType(0);
2166  SDValue Op0 = Op->getOperand(0);
2167  EVT VecTy = Op0->getValueType(0);
2168
2169  if (!VecTy.is128BitVector())
2170    return SDValue();
2171
2172  if (ResTy.isInteger()) {
2173    SDValue Op1 = Op->getOperand(1);
2174    EVT EltTy = VecTy.getVectorElementType();
2175    return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2176                       DAG.getValueType(EltTy));
2177  }
2178
2179  return Op;
2180}
2181
2182static bool isConstantOrUndef(const SDValue Op) {
2183  if (Op->getOpcode() == ISD::UNDEF)
2184    return true;
2185  if (dyn_cast<ConstantSDNode>(Op))
2186    return true;
2187  if (dyn_cast<ConstantFPSDNode>(Op))
2188    return true;
2189  return false;
2190}
2191
2192static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2193  for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2194    if (isConstantOrUndef(Op->getOperand(i)))
2195      return true;
2196  return false;
2197}
2198
2199// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2200// backend.
2201//
2202// Lowers according to the following rules:
2203// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2204//   2 less than or equal to 64 and the value fits into a signed 10-bit
2205//   immediate
2206// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2207//   is a power of 2 less than or equal to 64 and the value does not fit into a
2208//   signed 10-bit immediate
2209// - Non-constant splats are legal as-is.
2210// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2211// - All others are illegal and must be expanded.
2212SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2213                                                SelectionDAG &DAG) const {
2214  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2215  EVT ResTy = Op->getValueType(0);
2216  SDLoc DL(Op);
2217  APInt SplatValue, SplatUndef;
2218  unsigned SplatBitSize;
2219  bool HasAnyUndefs;
2220
2221  if (!Subtarget->hasMSA() || !ResTy.is128BitVector())
2222    return SDValue();
2223
2224  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2225                            HasAnyUndefs, 8,
2226                            !Subtarget->isLittle()) && SplatBitSize <= 64) {
2227    // We can only cope with 8, 16, 32, or 64-bit elements
2228    if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2229        SplatBitSize != 64)
2230      return SDValue();
2231
2232    // If the value fits into a simm10 then we can use ldi.[bhwd]
2233    // However, if it isn't an integer type we will have to bitcast from an
2234    // integer type first. Also, if there are any undefs, we must lower them
2235    // to defined values first.
2236    if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10))
2237      return Op;
2238
2239    EVT ViaVecTy;
2240
2241    switch (SplatBitSize) {
2242    default:
2243      return SDValue();
2244    case 8:
2245      ViaVecTy = MVT::v16i8;
2246      break;
2247    case 16:
2248      ViaVecTy = MVT::v8i16;
2249      break;
2250    case 32:
2251      ViaVecTy = MVT::v4i32;
2252      break;
2253    case 64:
2254      // There's no fill.d to fall back on for 64-bit values
2255      return SDValue();
2256    }
2257
2258    // SelectionDAG::getConstant will promote SplatValue appropriately.
2259    SDValue Result = DAG.getConstant(SplatValue, ViaVecTy);
2260
2261    // Bitcast to the type we originally wanted
2262    if (ViaVecTy != ResTy)
2263      Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2264
2265    return Result;
2266  } else if (isSplatVector(Node))
2267    return Op;
2268  else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2269    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2270    // The resulting code is the same length as the expansion, but it doesn't
2271    // use memory operations
2272    EVT ResTy = Node->getValueType(0);
2273
2274    assert(ResTy.isVector());
2275
2276    unsigned NumElts = ResTy.getVectorNumElements();
2277    SDValue Vector = DAG.getUNDEF(ResTy);
2278    for (unsigned i = 0; i < NumElts; ++i) {
2279      Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
2280                           Node->getOperand(i),
2281                           DAG.getConstant(i, MVT::i32));
2282    }
2283    return Vector;
2284  }
2285
2286  return SDValue();
2287}
2288
2289// Lower VECTOR_SHUFFLE into SHF (if possible).
2290//
2291// SHF splits the vector into blocks of four elements, then shuffles these
2292// elements according to a <4 x i2> constant (encoded as an integer immediate).
2293//
2294// It is therefore possible to lower into SHF when the mask takes the form:
2295//   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2296// When undef's appear they are treated as if they were whatever value is
2297// necessary in order to fit the above form.
2298//
2299// For example:
2300//   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2301//                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2302//                                 i32 7, i32 6, i32 5, i32 4>
2303// is lowered to:
2304//   (SHF_H $w0, $w1, 27)
2305// where the 27 comes from:
2306//   3 + (2 << 2) + (1 << 4) + (0 << 6)
2307static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
2308                                       SmallVector<int, 16> Indices,
2309                                       SelectionDAG &DAG) {
2310  int SHFIndices[4] = { -1, -1, -1, -1 };
2311
2312  if (Indices.size() < 4)
2313    return SDValue();
2314
2315  for (unsigned i = 0; i < 4; ++i) {
2316    for (unsigned j = i; j < Indices.size(); j += 4) {
2317      int Idx = Indices[j];
2318
2319      // Convert from vector index to 4-element subvector index
2320      // If an index refers to an element outside of the subvector then give up
2321      if (Idx != -1) {
2322        Idx -= 4 * (j / 4);
2323        if (Idx < 0 || Idx >= 4)
2324          return SDValue();
2325      }
2326
2327      // If the mask has an undef, replace it with the current index.
2328      // Note that it might still be undef if the current index is also undef
2329      if (SHFIndices[i] == -1)
2330        SHFIndices[i] = Idx;
2331
2332      // Check that non-undef values are the same as in the mask. If they
2333      // aren't then give up
2334      if (!(Idx == -1 || Idx == SHFIndices[i]))
2335        return SDValue();
2336    }
2337  }
2338
2339  // Calculate the immediate. Replace any remaining undefs with zero
2340  APInt Imm(32, 0);
2341  for (int i = 3; i >= 0; --i) {
2342    int Idx = SHFIndices[i];
2343
2344    if (Idx == -1)
2345      Idx = 0;
2346
2347    Imm <<= 2;
2348    Imm |= Idx & 0x3;
2349  }
2350
2351  return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy,
2352                     DAG.getConstant(Imm, MVT::i32), Op->getOperand(0));
2353}
2354
2355// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2356//
2357// ILVEV interleaves the even elements from each vector.
2358//
2359// It is possible to lower into ILVEV when the mask takes the form:
2360//   <0, n, 2, n+2, 4, n+4, ...>
2361// where n is the number of elements in the vector.
2362//
2363// When undef's appear in the mask they are treated as if they were whatever
2364// value is necessary in order to fit the above form.
2365static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
2366                                         SmallVector<int, 16> Indices,
2367                                         SelectionDAG &DAG) {
2368  assert ((Indices.size() % 2) == 0);
2369  int WsIdx = 0;
2370  int WtIdx = ResTy.getVectorNumElements();
2371
2372  for (unsigned i = 0; i < Indices.size(); i += 2) {
2373    if (Indices[i] != -1 && Indices[i] != WsIdx)
2374      return SDValue();
2375    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
2376      return SDValue();
2377    WsIdx += 2;
2378    WtIdx += 2;
2379  }
2380
2381  return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0),
2382                     Op->getOperand(1));
2383}
2384
2385// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2386//
2387// ILVOD interleaves the odd elements from each vector.
2388//
2389// It is possible to lower into ILVOD when the mask takes the form:
2390//   <1, n+1, 3, n+3, 5, n+5, ...>
2391// where n is the number of elements in the vector.
2392//
2393// When undef's appear in the mask they are treated as if they were whatever
2394// value is necessary in order to fit the above form.
2395static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
2396                                         SmallVector<int, 16> Indices,
2397                                         SelectionDAG &DAG) {
2398  assert ((Indices.size() % 2) == 0);
2399  int WsIdx = 1;
2400  int WtIdx = ResTy.getVectorNumElements() + 1;
2401
2402  for (unsigned i = 0; i < Indices.size(); i += 2) {
2403    if (Indices[i] != -1 && Indices[i] != WsIdx)
2404      return SDValue();
2405    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
2406      return SDValue();
2407    WsIdx += 2;
2408    WtIdx += 2;
2409  }
2410
2411  return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0),
2412                     Op->getOperand(1));
2413}
2414
2415// Lower VECTOR_SHUFFLE into ILVL (if possible).
2416//
2417// ILVL interleaves consecutive elements from the left half of each vector.
2418//
2419// It is possible to lower into ILVL when the mask takes the form:
2420//   <0, n, 1, n+1, 2, n+2, ...>
2421// where n is the number of elements in the vector.
2422//
2423// When undef's appear in the mask they are treated as if they were whatever
2424// value is necessary in order to fit the above form.
2425static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
2426                                        SmallVector<int, 16> Indices,
2427                                        SelectionDAG &DAG) {
2428  assert ((Indices.size() % 2) == 0);
2429  int WsIdx = 0;
2430  int WtIdx = ResTy.getVectorNumElements();
2431
2432  for (unsigned i = 0; i < Indices.size(); i += 2) {
2433    if (Indices[i] != -1 && Indices[i] != WsIdx)
2434      return SDValue();
2435    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
2436      return SDValue();
2437    WsIdx ++;
2438    WtIdx ++;
2439  }
2440
2441  return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0),
2442                     Op->getOperand(1));
2443}
2444
2445// Lower VECTOR_SHUFFLE into ILVR (if possible).
2446//
2447// ILVR interleaves consecutive elements from the right half of each vector.
2448//
2449// It is possible to lower into ILVR when the mask takes the form:
2450//   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2451// where n is the number of elements in the vector and x is half n.
2452//
2453// When undef's appear in the mask they are treated as if they were whatever
2454// value is necessary in order to fit the above form.
2455static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
2456                                        SmallVector<int, 16> Indices,
2457                                        SelectionDAG &DAG) {
2458  assert ((Indices.size() % 2) == 0);
2459  unsigned NumElts = ResTy.getVectorNumElements();
2460  int WsIdx = NumElts / 2;
2461  int WtIdx = NumElts + NumElts / 2;
2462
2463  for (unsigned i = 0; i < Indices.size(); i += 2) {
2464    if (Indices[i] != -1 && Indices[i] != WsIdx)
2465      return SDValue();
2466    if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
2467      return SDValue();
2468    WsIdx ++;
2469    WtIdx ++;
2470  }
2471
2472  return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0),
2473                     Op->getOperand(1));
2474}
2475
2476// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2477//
2478// PCKEV copies the even elements of each vector into the result vector.
2479//
2480// It is possible to lower into PCKEV when the mask takes the form:
2481//   <0, 2, 4, ..., n, n+2, n+4, ...>
2482// where n is the number of elements in the vector.
2483//
2484// When undef's appear in the mask they are treated as if they were whatever
2485// value is necessary in order to fit the above form.
2486static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
2487                                         SmallVector<int, 16> Indices,
2488                                         SelectionDAG &DAG) {
2489  assert ((Indices.size() % 2) == 0);
2490  int Idx = 0;
2491
2492  for (unsigned i = 0; i < Indices.size(); ++i) {
2493    if (Indices[i] != -1 && Indices[i] != Idx)
2494      return SDValue();
2495    Idx += 2;
2496  }
2497
2498  return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0),
2499                     Op->getOperand(1));
2500}
2501
2502// Lower VECTOR_SHUFFLE into PCKOD (if possible).
2503//
2504// PCKOD copies the odd elements of each vector into the result vector.
2505//
2506// It is possible to lower into PCKOD when the mask takes the form:
2507//   <1, 3, 5, ..., n+1, n+3, n+5, ...>
2508// where n is the number of elements in the vector.
2509//
2510// When undef's appear in the mask they are treated as if they were whatever
2511// value is necessary in order to fit the above form.
2512static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
2513                                         SmallVector<int, 16> Indices,
2514                                         SelectionDAG &DAG) {
2515  assert ((Indices.size() % 2) == 0);
2516  int Idx = 1;
2517
2518  for (unsigned i = 0; i < Indices.size(); ++i) {
2519    if (Indices[i] != -1 && Indices[i] != Idx)
2520      return SDValue();
2521    Idx += 2;
2522  }
2523
2524  return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0),
2525                     Op->getOperand(1));
2526}
2527
2528// Lower VECTOR_SHUFFLE into VSHF.
2529//
2530// This mostly consists of converting the shuffle indices in Indices into a
2531// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2532// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2533// if the type is v8i16 and all the indices are less than 8 then the second
2534// operand is unused and can be replaced with anything. We choose to replace it
2535// with the used operand since this reduces the number of instructions overall.
2536static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
2537                                        SmallVector<int, 16> Indices,
2538                                        SelectionDAG &DAG) {
2539  SmallVector<SDValue, 16> Ops;
2540  SDValue Op0;
2541  SDValue Op1;
2542  EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
2543  EVT MaskEltTy = MaskVecTy.getVectorElementType();
2544  bool Using1stVec = false;
2545  bool Using2ndVec = false;
2546  SDLoc DL(Op);
2547  int ResTyNumElts = ResTy.getVectorNumElements();
2548
2549  for (int i = 0; i < ResTyNumElts; ++i) {
2550    // Idx == -1 means UNDEF
2551    int Idx = Indices[i];
2552
2553    if (0 <= Idx && Idx < ResTyNumElts)
2554      Using1stVec = true;
2555    if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
2556      Using2ndVec = true;
2557  }
2558
2559  for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
2560       ++I)
2561    Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy));
2562
2563  SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0],
2564                                Ops.size());
2565
2566  if (Using1stVec && Using2ndVec) {
2567    Op0 = Op->getOperand(0);
2568    Op1 = Op->getOperand(1);
2569  } else if (Using1stVec)
2570    Op0 = Op1 = Op->getOperand(0);
2571  else if (Using2ndVec)
2572    Op0 = Op1 = Op->getOperand(1);
2573  else
2574    llvm_unreachable("shuffle vector mask references neither vector operand?");
2575
2576  // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2577  // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2578  // VSHF concatenates the vectors in a bitwise fashion:
2579  // <0b00, 0b01> + <0b10, 0b11> ->
2580  // 0b0100       + 0b1110       -> 0b01001110
2581  //                                <0b10, 0b11, 0b00, 0b01>
2582  // We must therefore swap the operands to get the correct result.
2583  return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
2584}
2585
2586// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2587// indices in the shuffle.
2588SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2589                                                  SelectionDAG &DAG) const {
2590  ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
2591  EVT ResTy = Op->getValueType(0);
2592
2593  if (!ResTy.is128BitVector())
2594    return SDValue();
2595
2596  int ResTyNumElts = ResTy.getVectorNumElements();
2597  SmallVector<int, 16> Indices;
2598
2599  for (int i = 0; i < ResTyNumElts; ++i)
2600    Indices.push_back(Node->getMaskElt(i));
2601
2602  SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
2603  if (Result.getNode())
2604    return Result;
2605  Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG);
2606  if (Result.getNode())
2607    return Result;
2608  Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG);
2609  if (Result.getNode())
2610    return Result;
2611  Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG);
2612  if (Result.getNode())
2613    return Result;
2614  Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG);
2615  if (Result.getNode())
2616    return Result;
2617  Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG);
2618  if (Result.getNode())
2619    return Result;
2620  Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG);
2621  if (Result.getNode())
2622    return Result;
2623  return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
2624}
2625
2626MachineBasicBlock * MipsSETargetLowering::
2627emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
2628  // $bb:
2629  //  bposge32_pseudo $vr0
2630  //  =>
2631  // $bb:
2632  //  bposge32 $tbb
2633  // $fbb:
2634  //  li $vr2, 0
2635  //  b $sink
2636  // $tbb:
2637  //  li $vr1, 1
2638  // $sink:
2639  //  $vr0 = phi($vr2, $fbb, $vr1, $tbb)
2640
2641  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2642  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2643  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
2644  DebugLoc DL = MI->getDebugLoc();
2645  const BasicBlock *LLVM_BB = BB->getBasicBlock();
2646  MachineFunction::iterator It = std::next(MachineFunction::iterator(BB));
2647  MachineFunction *F = BB->getParent();
2648  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
2649  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
2650  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
2651  F->insert(It, FBB);
2652  F->insert(It, TBB);
2653  F->insert(It, Sink);
2654
2655  // Transfer the remainder of BB and its successor edges to Sink.
2656  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
2657               BB->end());
2658  Sink->transferSuccessorsAndUpdatePHIs(BB);
2659
2660  // Add successors.
2661  BB->addSuccessor(FBB);
2662  BB->addSuccessor(TBB);
2663  FBB->addSuccessor(Sink);
2664  TBB->addSuccessor(Sink);
2665
2666  // Insert the real bposge32 instruction to $BB.
2667  BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
2668
2669  // Fill $FBB.
2670  unsigned VR2 = RegInfo.createVirtualRegister(RC);
2671  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
2672    .addReg(Mips::ZERO).addImm(0);
2673  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
2674
2675  // Fill $TBB.
2676  unsigned VR1 = RegInfo.createVirtualRegister(RC);
2677  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
2678    .addReg(Mips::ZERO).addImm(1);
2679
2680  // Insert phi function to $Sink.
2681  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
2682          MI->getOperand(0).getReg())
2683    .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
2684
2685  MI->eraseFromParent();   // The pseudo instruction is gone now.
2686  return Sink;
2687}
2688
2689MachineBasicBlock * MipsSETargetLowering::
2690emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
2691                     unsigned BranchOp) const{
2692  // $bb:
2693  //  vany_nonzero $rd, $ws
2694  //  =>
2695  // $bb:
2696  //  bnz.b $ws, $tbb
2697  //  b $fbb
2698  // $fbb:
2699  //  li $rd1, 0
2700  //  b $sink
2701  // $tbb:
2702  //  li $rd2, 1
2703  // $sink:
2704  //  $rd = phi($rd1, $fbb, $rd2, $tbb)
2705
2706  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2707  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2708  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
2709  DebugLoc DL = MI->getDebugLoc();
2710  const BasicBlock *LLVM_BB = BB->getBasicBlock();
2711  MachineFunction::iterator It = std::next(MachineFunction::iterator(BB));
2712  MachineFunction *F = BB->getParent();
2713  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
2714  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
2715  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
2716  F->insert(It, FBB);
2717  F->insert(It, TBB);
2718  F->insert(It, Sink);
2719
2720  // Transfer the remainder of BB and its successor edges to Sink.
2721  Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
2722               BB->end());
2723  Sink->transferSuccessorsAndUpdatePHIs(BB);
2724
2725  // Add successors.
2726  BB->addSuccessor(FBB);
2727  BB->addSuccessor(TBB);
2728  FBB->addSuccessor(Sink);
2729  TBB->addSuccessor(Sink);
2730
2731  // Insert the real bnz.b instruction to $BB.
2732  BuildMI(BB, DL, TII->get(BranchOp))
2733    .addReg(MI->getOperand(1).getReg())
2734    .addMBB(TBB);
2735
2736  // Fill $FBB.
2737  unsigned RD1 = RegInfo.createVirtualRegister(RC);
2738  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
2739    .addReg(Mips::ZERO).addImm(0);
2740  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
2741
2742  // Fill $TBB.
2743  unsigned RD2 = RegInfo.createVirtualRegister(RC);
2744  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
2745    .addReg(Mips::ZERO).addImm(1);
2746
2747  // Insert phi function to $Sink.
2748  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
2749          MI->getOperand(0).getReg())
2750    .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB);
2751
2752  MI->eraseFromParent();   // The pseudo instruction is gone now.
2753  return Sink;
2754}
2755
2756// Emit the COPY_FW pseudo instruction.
2757//
2758// copy_fw_pseudo $fd, $ws, n
2759// =>
2760// copy_u_w $rt, $ws, $n
2761// mtc1     $rt, $fd
2762//
2763// When n is zero, the equivalent operation can be performed with (potentially)
2764// zero instructions due to register overlaps. This optimization is never valid
2765// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
2766MachineBasicBlock * MipsSETargetLowering::
2767emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
2768  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2769  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2770  DebugLoc DL = MI->getDebugLoc();
2771  unsigned Fd = MI->getOperand(0).getReg();
2772  unsigned Ws = MI->getOperand(1).getReg();
2773  unsigned Lane = MI->getOperand(2).getImm();
2774
2775  if (Lane == 0)
2776    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
2777  else {
2778    unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
2779
2780    BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
2781    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
2782  }
2783
2784  MI->eraseFromParent();   // The pseudo instruction is gone now.
2785  return BB;
2786}
2787
2788// Emit the COPY_FD pseudo instruction.
2789//
2790// copy_fd_pseudo $fd, $ws, n
2791// =>
2792// splati.d $wt, $ws, $n
2793// copy $fd, $wt:sub_64
2794//
2795// When n is zero, the equivalent operation can be performed with (potentially)
2796// zero instructions due to register overlaps. This optimization is always
2797// valid because FR=1 mode which is the only supported mode in MSA.
2798MachineBasicBlock * MipsSETargetLowering::
2799emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
2800  assert(Subtarget->isFP64bit());
2801
2802  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2803  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2804  unsigned Fd  = MI->getOperand(0).getReg();
2805  unsigned Ws  = MI->getOperand(1).getReg();
2806  unsigned Lane = MI->getOperand(2).getImm() * 2;
2807  DebugLoc DL = MI->getDebugLoc();
2808
2809  if (Lane == 0)
2810    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
2811  else {
2812    unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
2813
2814    BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
2815    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
2816  }
2817
2818  MI->eraseFromParent();   // The pseudo instruction is gone now.
2819  return BB;
2820}
2821
2822// Emit the INSERT_FW pseudo instruction.
2823//
2824// insert_fw_pseudo $wd, $wd_in, $n, $fs
2825// =>
2826// subreg_to_reg $wt:sub_lo, $fs
2827// insve_w $wd[$n], $wd_in, $wt[0]
2828MachineBasicBlock *
2829MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
2830                                    MachineBasicBlock *BB) const {
2831  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2832  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2833  DebugLoc DL = MI->getDebugLoc();
2834  unsigned Wd = MI->getOperand(0).getReg();
2835  unsigned Wd_in = MI->getOperand(1).getReg();
2836  unsigned Lane = MI->getOperand(2).getImm();
2837  unsigned Fs = MI->getOperand(3).getReg();
2838  unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
2839
2840  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
2841      .addImm(0)
2842      .addReg(Fs)
2843      .addImm(Mips::sub_lo);
2844  BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
2845      .addReg(Wd_in)
2846      .addImm(Lane)
2847      .addReg(Wt)
2848      .addImm(0);
2849
2850  MI->eraseFromParent(); // The pseudo instruction is gone now.
2851  return BB;
2852}
2853
2854// Emit the INSERT_FD pseudo instruction.
2855//
2856// insert_fd_pseudo $wd, $fs, n
2857// =>
2858// subreg_to_reg $wt:sub_64, $fs
2859// insve_d $wd[$n], $wd_in, $wt[0]
2860MachineBasicBlock *
2861MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
2862                                    MachineBasicBlock *BB) const {
2863  assert(Subtarget->isFP64bit());
2864
2865  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2866  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2867  DebugLoc DL = MI->getDebugLoc();
2868  unsigned Wd = MI->getOperand(0).getReg();
2869  unsigned Wd_in = MI->getOperand(1).getReg();
2870  unsigned Lane = MI->getOperand(2).getImm();
2871  unsigned Fs = MI->getOperand(3).getReg();
2872  unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
2873
2874  BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
2875      .addImm(0)
2876      .addReg(Fs)
2877      .addImm(Mips::sub_64);
2878  BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
2879      .addReg(Wd_in)
2880      .addImm(Lane)
2881      .addReg(Wt)
2882      .addImm(0);
2883
2884  MI->eraseFromParent(); // The pseudo instruction is gone now.
2885  return BB;
2886}
2887
2888// Emit the FILL_FW pseudo instruction.
2889//
2890// fill_fw_pseudo $wd, $fs
2891// =>
2892// implicit_def $wt1
2893// insert_subreg $wt2:subreg_lo, $wt1, $fs
2894// splati.w $wd, $wt2[0]
2895MachineBasicBlock *
2896MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
2897                                  MachineBasicBlock *BB) const {
2898  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2899  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2900  DebugLoc DL = MI->getDebugLoc();
2901  unsigned Wd = MI->getOperand(0).getReg();
2902  unsigned Fs = MI->getOperand(1).getReg();
2903  unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
2904  unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
2905
2906  BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
2907  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
2908      .addReg(Wt1)
2909      .addReg(Fs)
2910      .addImm(Mips::sub_lo);
2911  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
2912
2913  MI->eraseFromParent(); // The pseudo instruction is gone now.
2914  return BB;
2915}
2916
2917// Emit the FILL_FD pseudo instruction.
2918//
2919// fill_fd_pseudo $wd, $fs
2920// =>
2921// implicit_def $wt1
2922// insert_subreg $wt2:subreg_64, $wt1, $fs
2923// splati.d $wd, $wt2[0]
2924MachineBasicBlock *
2925MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
2926                                  MachineBasicBlock *BB) const {
2927  assert(Subtarget->isFP64bit());
2928
2929  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2930  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2931  DebugLoc DL = MI->getDebugLoc();
2932  unsigned Wd = MI->getOperand(0).getReg();
2933  unsigned Fs = MI->getOperand(1).getReg();
2934  unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
2935  unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
2936
2937  BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
2938  BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
2939      .addReg(Wt1)
2940      .addReg(Fs)
2941      .addImm(Mips::sub_64);
2942  BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
2943
2944  MI->eraseFromParent();   // The pseudo instruction is gone now.
2945  return BB;
2946}
2947
2948// Emit the FEXP2_W_1 pseudo instructions.
2949//
2950// fexp2_w_1_pseudo $wd, $wt
2951// =>
2952// ldi.w $ws, 1
2953// fexp2.w $wd, $ws, $wt
2954MachineBasicBlock *
2955MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
2956                                    MachineBasicBlock *BB) const {
2957  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2958  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2959  const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
2960  unsigned Ws1 = RegInfo.createVirtualRegister(RC);
2961  unsigned Ws2 = RegInfo.createVirtualRegister(RC);
2962  DebugLoc DL = MI->getDebugLoc();
2963
2964  // Splat 1.0 into a vector
2965  BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
2966  BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
2967
2968  // Emit 1.0 * fexp2(Wt)
2969  BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg())
2970      .addReg(Ws2)
2971      .addReg(MI->getOperand(1).getReg());
2972
2973  MI->eraseFromParent(); // The pseudo instruction is gone now.
2974  return BB;
2975}
2976
2977// Emit the FEXP2_D_1 pseudo instructions.
2978//
2979// fexp2_d_1_pseudo $wd, $wt
2980// =>
2981// ldi.d $ws, 1
2982// fexp2.d $wd, $ws, $wt
2983MachineBasicBlock *
2984MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI,
2985                                    MachineBasicBlock *BB) const {
2986  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2987  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
2988  const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
2989  unsigned Ws1 = RegInfo.createVirtualRegister(RC);
2990  unsigned Ws2 = RegInfo.createVirtualRegister(RC);
2991  DebugLoc DL = MI->getDebugLoc();
2992
2993  // Splat 1.0 into a vector
2994  BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
2995  BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
2996
2997  // Emit 1.0 * fexp2(Wt)
2998  BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg())
2999      .addReg(Ws2)
3000      .addReg(MI->getOperand(1).getReg());
3001
3002  MI->eraseFromParent(); // The pseudo instruction is gone now.
3003  return BB;
3004}
3005