AArch64ISelDAGToDAG.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the AArch64 target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64TargetMachine.h"
15#include "MCTargetDesc/AArch64AddressingModes.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/CodeGen/SelectionDAGISel.h"
18#include "llvm/IR/Function.h" // To access function attributes.
19#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/Intrinsics.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/ErrorHandling.h"
23#include "llvm/Support/MathExtras.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "aarch64-isel"
29
30//===--------------------------------------------------------------------===//
31/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32/// instructions for SelectionDAG operations.
33///
34namespace {
35
36class AArch64DAGToDAGISel : public SelectionDAGISel {
37  AArch64TargetMachine &TM;
38
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42
43  bool ForCodeSize;
44
45public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47                               CodeGenOpt::Level OptLevel)
48      : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
49        ForCodeSize(false) {}
50
51  const char *getPassName() const override {
52    return "AArch64 Instruction Selection";
53  }
54
55  bool runOnMachineFunction(MachineFunction &MF) override {
56    AttributeSet FnAttrs = MF.getFunction()->getAttributes();
57    ForCodeSize =
58        FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
59                             Attribute::OptimizeForSize) ||
60        FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
61    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
62    return SelectionDAGISel::runOnMachineFunction(MF);
63  }
64
65  SDNode *Select(SDNode *Node) override;
66
67  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
68  /// inline asm expressions.
69  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
70                                    char ConstraintCode,
71                                    std::vector<SDValue> &OutOps) override;
72
73  SDNode *SelectMLAV64LaneV128(SDNode *N);
74  SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
75  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79    return SelectShiftedRegister(N, false, Reg, Shift);
80  }
81  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82    return SelectShiftedRegister(N, true, Reg, Shift);
83  }
84  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
85    return SelectAddrModeIndexed(N, 1, Base, OffImm);
86  }
87  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
88    return SelectAddrModeIndexed(N, 2, Base, OffImm);
89  }
90  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
91    return SelectAddrModeIndexed(N, 4, Base, OffImm);
92  }
93  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
94    return SelectAddrModeIndexed(N, 8, Base, OffImm);
95  }
96  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
97    return SelectAddrModeIndexed(N, 16, Base, OffImm);
98  }
99  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
100    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
101  }
102  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
103    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
104  }
105  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
106    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
107  }
108  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
109    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
110  }
111  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
112    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
113  }
114
115  template<int Width>
116  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
117                         SDValue &SignExtend, SDValue &DoShift) {
118    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
119  }
120
121  template<int Width>
122  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
123                         SDValue &SignExtend, SDValue &DoShift) {
124    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
125  }
126
127
128  /// Form sequences of consecutive 64/128-bit registers for use in NEON
129  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
130  /// between 1 and 4 elements. If it contains a single element that is returned
131  /// unchanged; otherwise a REG_SEQUENCE value is returned.
132  SDValue createDTuple(ArrayRef<SDValue> Vecs);
133  SDValue createQTuple(ArrayRef<SDValue> Vecs);
134
135  /// Generic helper for the createDTuple/createQTuple
136  /// functions. Those should almost always be called instead.
137  SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
138                      unsigned SubRegs[]);
139
140  SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
141
142  SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
143
144  SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
145                     unsigned SubRegIdx);
146  SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
147                         unsigned SubRegIdx);
148  SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
149  SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
150
151  SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
152  SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
153  SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
154  SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
155
156  SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
157  SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
158
159  SDNode *SelectBitfieldExtractOp(SDNode *N);
160  SDNode *SelectBitfieldInsertOp(SDNode *N);
161
162  SDNode *SelectLIBM(SDNode *N);
163
164// Include the pieces autogenerated from the target description.
165#include "AArch64GenDAGISel.inc"
166
167private:
168  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
169                             SDValue &Shift);
170  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
171                             SDValue &OffImm);
172  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
173                              SDValue &OffImm);
174  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
175                         SDValue &Offset, SDValue &SignExtend,
176                         SDValue &DoShift);
177  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
178                         SDValue &Offset, SDValue &SignExtend,
179                         SDValue &DoShift);
180  bool isWorthFolding(SDValue V) const;
181  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
182                         SDValue &Offset, SDValue &SignExtend);
183
184  template<unsigned RegWidth>
185  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
186    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
187  }
188
189  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
190};
191} // end anonymous namespace
192
193/// isIntImmediate - This method tests to see if the node is a constant
194/// operand. If so Imm will receive the 32-bit value.
195static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
196  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
197    Imm = C->getZExtValue();
198    return true;
199  }
200  return false;
201}
202
203// isIntImmediate - This method tests to see if a constant operand.
204// If so Imm will receive the value.
205static bool isIntImmediate(SDValue N, uint64_t &Imm) {
206  return isIntImmediate(N.getNode(), Imm);
207}
208
209// isOpcWithIntImmediate - This method tests to see if the node is a specific
210// opcode and that it has a immediate integer right operand.
211// If so Imm will receive the 32 bit value.
212static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
213                                  uint64_t &Imm) {
214  return N->getOpcode() == Opc &&
215         isIntImmediate(N->getOperand(1).getNode(), Imm);
216}
217
218bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
219    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
220  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
221  // Require the address to be in a register.  That is safe for all AArch64
222  // variants and it is hard to do anything much smarter without knowing
223  // how the operand is used.
224  OutOps.push_back(Op);
225  return false;
226}
227
228/// SelectArithImmed - Select an immediate value that can be represented as
229/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
230/// Val set to the 12-bit value and Shift set to the shifter operand.
231bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
232                                           SDValue &Shift) {
233  // This function is called from the addsub_shifted_imm ComplexPattern,
234  // which lists [imm] as the list of opcode it's interested in, however
235  // we still need to check whether the operand is actually an immediate
236  // here because the ComplexPattern opcode list is only used in
237  // root-level opcode matching.
238  if (!isa<ConstantSDNode>(N.getNode()))
239    return false;
240
241  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
242  unsigned ShiftAmt;
243
244  if (Immed >> 12 == 0) {
245    ShiftAmt = 0;
246  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
247    ShiftAmt = 12;
248    Immed = Immed >> 12;
249  } else
250    return false;
251
252  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
253  Val = CurDAG->getTargetConstant(Immed, MVT::i32);
254  Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
255  return true;
256}
257
258/// SelectNegArithImmed - As above, but negates the value before trying to
259/// select it.
260bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
261                                              SDValue &Shift) {
262  // This function is called from the addsub_shifted_imm ComplexPattern,
263  // which lists [imm] as the list of opcode it's interested in, however
264  // we still need to check whether the operand is actually an immediate
265  // here because the ComplexPattern opcode list is only used in
266  // root-level opcode matching.
267  if (!isa<ConstantSDNode>(N.getNode()))
268    return false;
269
270  // The immediate operand must be a 24-bit zero-extended immediate.
271  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
272
273  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
274  // have the opposite effect on the C flag, so this pattern mustn't match under
275  // those circumstances.
276  if (Immed == 0)
277    return false;
278
279  if (N.getValueType() == MVT::i32)
280    Immed = ~((uint32_t)Immed) + 1;
281  else
282    Immed = ~Immed + 1ULL;
283  if (Immed & 0xFFFFFFFFFF000000ULL)
284    return false;
285
286  Immed &= 0xFFFFFFULL;
287  return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
288}
289
290/// getShiftTypeForNode - Translate a shift node to the corresponding
291/// ShiftType value.
292static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
293  switch (N.getOpcode()) {
294  default:
295    return AArch64_AM::InvalidShiftExtend;
296  case ISD::SHL:
297    return AArch64_AM::LSL;
298  case ISD::SRL:
299    return AArch64_AM::LSR;
300  case ISD::SRA:
301    return AArch64_AM::ASR;
302  case ISD::ROTR:
303    return AArch64_AM::ROR;
304  }
305}
306
307/// \brief Determine wether it is worth to fold V into an extended register.
308bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
309  // it hurts if the a value is used at least twice, unless we are optimizing
310  // for code size.
311  if (ForCodeSize || V.hasOneUse())
312    return true;
313  return false;
314}
315
316/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
317/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
318/// instructions allow the shifted register to be rotated, but the arithmetic
319/// instructions do not.  The AllowROR parameter specifies whether ROR is
320/// supported.
321bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
322                                                SDValue &Reg, SDValue &Shift) {
323  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
324  if (ShType == AArch64_AM::InvalidShiftExtend)
325    return false;
326  if (!AllowROR && ShType == AArch64_AM::ROR)
327    return false;
328
329  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
330    unsigned BitSize = N.getValueType().getSizeInBits();
331    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
332    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
333
334    Reg = N.getOperand(0);
335    Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
336    return isWorthFolding(N);
337  }
338
339  return false;
340}
341
342/// getExtendTypeForNode - Translate an extend node to the corresponding
343/// ExtendType value.
344static AArch64_AM::ShiftExtendType
345getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
346  if (N.getOpcode() == ISD::SIGN_EXTEND ||
347      N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
348    EVT SrcVT;
349    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
350      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
351    else
352      SrcVT = N.getOperand(0).getValueType();
353
354    if (!IsLoadStore && SrcVT == MVT::i8)
355      return AArch64_AM::SXTB;
356    else if (!IsLoadStore && SrcVT == MVT::i16)
357      return AArch64_AM::SXTH;
358    else if (SrcVT == MVT::i32)
359      return AArch64_AM::SXTW;
360    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
361
362    return AArch64_AM::InvalidShiftExtend;
363  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
364             N.getOpcode() == ISD::ANY_EXTEND) {
365    EVT SrcVT = N.getOperand(0).getValueType();
366    if (!IsLoadStore && SrcVT == MVT::i8)
367      return AArch64_AM::UXTB;
368    else if (!IsLoadStore && SrcVT == MVT::i16)
369      return AArch64_AM::UXTH;
370    else if (SrcVT == MVT::i32)
371      return AArch64_AM::UXTW;
372    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
373
374    return AArch64_AM::InvalidShiftExtend;
375  } else if (N.getOpcode() == ISD::AND) {
376    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
377    if (!CSD)
378      return AArch64_AM::InvalidShiftExtend;
379    uint64_t AndMask = CSD->getZExtValue();
380
381    switch (AndMask) {
382    default:
383      return AArch64_AM::InvalidShiftExtend;
384    case 0xFF:
385      return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
386    case 0xFFFF:
387      return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
388    case 0xFFFFFFFF:
389      return AArch64_AM::UXTW;
390    }
391  }
392
393  return AArch64_AM::InvalidShiftExtend;
394}
395
396// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
397static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
398  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
399      DL->getOpcode() != AArch64ISD::DUPLANE32)
400    return false;
401
402  SDValue SV = DL->getOperand(0);
403  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
404    return false;
405
406  SDValue EV = SV.getOperand(1);
407  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
408    return false;
409
410  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
411  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
412  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
413  LaneOp = EV.getOperand(0);
414
415  return true;
416}
417
418// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
419// high lane extract.
420static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
421                             SDValue &LaneOp, int &LaneIdx) {
422
423  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
424    std::swap(Op0, Op1);
425    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
426      return false;
427  }
428  StdOp = Op1;
429  return true;
430}
431
432/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
433/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
434/// so that we don't emit unnecessary lane extracts.
435SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
436  SDValue Op0 = N->getOperand(0);
437  SDValue Op1 = N->getOperand(1);
438  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
439  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
440  int LaneIdx = -1; // Will hold the lane index.
441
442  if (Op1.getOpcode() != ISD::MUL ||
443      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
444                        LaneIdx)) {
445    std::swap(Op0, Op1);
446    if (Op1.getOpcode() != ISD::MUL ||
447        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
448                          LaneIdx))
449      return nullptr;
450  }
451
452  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
453
454  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
455
456  unsigned MLAOpc = ~0U;
457
458  switch (N->getSimpleValueType(0).SimpleTy) {
459  default:
460    llvm_unreachable("Unrecognized MLA.");
461  case MVT::v4i16:
462    MLAOpc = AArch64::MLAv4i16_indexed;
463    break;
464  case MVT::v8i16:
465    MLAOpc = AArch64::MLAv8i16_indexed;
466    break;
467  case MVT::v2i32:
468    MLAOpc = AArch64::MLAv2i32_indexed;
469    break;
470  case MVT::v4i32:
471    MLAOpc = AArch64::MLAv4i32_indexed;
472    break;
473  }
474
475  return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
476}
477
478SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
479  SDValue SMULLOp0;
480  SDValue SMULLOp1;
481  int LaneIdx;
482
483  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
484                        LaneIdx))
485    return nullptr;
486
487  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
488
489  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
490
491  unsigned SMULLOpc = ~0U;
492
493  if (IntNo == Intrinsic::aarch64_neon_smull) {
494    switch (N->getSimpleValueType(0).SimpleTy) {
495    default:
496      llvm_unreachable("Unrecognized SMULL.");
497    case MVT::v4i32:
498      SMULLOpc = AArch64::SMULLv4i16_indexed;
499      break;
500    case MVT::v2i64:
501      SMULLOpc = AArch64::SMULLv2i32_indexed;
502      break;
503    }
504  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
505    switch (N->getSimpleValueType(0).SimpleTy) {
506    default:
507      llvm_unreachable("Unrecognized SMULL.");
508    case MVT::v4i32:
509      SMULLOpc = AArch64::UMULLv4i16_indexed;
510      break;
511    case MVT::v2i64:
512      SMULLOpc = AArch64::UMULLv2i32_indexed;
513      break;
514    }
515  } else
516    llvm_unreachable("Unrecognized intrinsic.");
517
518  return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
519}
520
521/// Instructions that accept extend modifiers like UXTW expect the register
522/// being extended to be a GPR32, but the incoming DAG might be acting on a
523/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
524/// this is the case.
525static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
526  if (N.getValueType() == MVT::i32)
527    return N;
528
529  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
530  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
531                                               SDLoc(N), MVT::i32, N, SubReg);
532  return SDValue(Node, 0);
533}
534
535
536/// SelectArithExtendedRegister - Select a "extended register" operand.  This
537/// operand folds in an extend followed by an optional left shift.
538bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
539                                                      SDValue &Shift) {
540  unsigned ShiftVal = 0;
541  AArch64_AM::ShiftExtendType Ext;
542
543  if (N.getOpcode() == ISD::SHL) {
544    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
545    if (!CSD)
546      return false;
547    ShiftVal = CSD->getZExtValue();
548    if (ShiftVal > 4)
549      return false;
550
551    Ext = getExtendTypeForNode(N.getOperand(0));
552    if (Ext == AArch64_AM::InvalidShiftExtend)
553      return false;
554
555    Reg = N.getOperand(0).getOperand(0);
556  } else {
557    Ext = getExtendTypeForNode(N);
558    if (Ext == AArch64_AM::InvalidShiftExtend)
559      return false;
560
561    Reg = N.getOperand(0);
562  }
563
564  // AArch64 mandates that the RHS of the operation must use the smallest
565  // register classs that could contain the size being extended from.  Thus,
566  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
567  // there might not be an actual 32-bit value in the program.  We can
568  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
569  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
570  Reg = narrowIfNeeded(CurDAG, Reg);
571  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
572  return isWorthFolding(N);
573}
574
575/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
576/// immediate" address.  The "Size" argument is the size in bytes of the memory
577/// reference, which determines the scale.
578bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
579                                              SDValue &Base, SDValue &OffImm) {
580  const TargetLowering *TLI = getTargetLowering();
581  if (N.getOpcode() == ISD::FrameIndex) {
582    int FI = cast<FrameIndexSDNode>(N)->getIndex();
583    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
584    OffImm = CurDAG->getTargetConstant(0, MVT::i64);
585    return true;
586  }
587
588  if (N.getOpcode() == AArch64ISD::ADDlow) {
589    GlobalAddressSDNode *GAN =
590        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
591    Base = N.getOperand(0);
592    OffImm = N.getOperand(1);
593    if (!GAN)
594      return true;
595
596    const GlobalValue *GV = GAN->getGlobal();
597    unsigned Alignment = GV->getAlignment();
598    const DataLayout *DL = TLI->getDataLayout();
599    if (Alignment == 0 && !Subtarget->isTargetDarwin())
600      Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
601
602    if (Alignment >= Size)
603      return true;
604  }
605
606  if (CurDAG->isBaseWithConstantOffset(N)) {
607    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
608      int64_t RHSC = (int64_t)RHS->getZExtValue();
609      unsigned Scale = Log2_32(Size);
610      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
611        Base = N.getOperand(0);
612        if (Base.getOpcode() == ISD::FrameIndex) {
613          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
614          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
615        }
616        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
617        return true;
618      }
619    }
620  }
621
622  // Before falling back to our general case, check if the unscaled
623  // instructions can handle this. If so, that's preferable.
624  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
625    return false;
626
627  // Base only. The address will be materialized into a register before
628  // the memory is accessed.
629  //    add x0, Xbase, #offset
630  //    ldr x0, [x0]
631  Base = N;
632  OffImm = CurDAG->getTargetConstant(0, MVT::i64);
633  return true;
634}
635
636/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
637/// immediate" address.  This should only match when there is an offset that
638/// is not valid for a scaled immediate addressing mode.  The "Size" argument
639/// is the size in bytes of the memory reference, which is needed here to know
640/// what is valid for a scaled immediate.
641bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
642                                                 SDValue &Base,
643                                                 SDValue &OffImm) {
644  if (!CurDAG->isBaseWithConstantOffset(N))
645    return false;
646  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
647    int64_t RHSC = RHS->getSExtValue();
648    // If the offset is valid as a scaled immediate, don't match here.
649    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
650        RHSC < (0x1000 << Log2_32(Size)))
651      return false;
652    if (RHSC >= -256 && RHSC < 256) {
653      Base = N.getOperand(0);
654      if (Base.getOpcode() == ISD::FrameIndex) {
655        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
656        const TargetLowering *TLI = getTargetLowering();
657        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
658      }
659      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
660      return true;
661    }
662  }
663  return false;
664}
665
666static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
667  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
668  SDValue ImpDef = SDValue(
669      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
670      0);
671  MachineSDNode *Node = CurDAG->getMachineNode(
672      TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
673  return SDValue(Node, 0);
674}
675
676/// \brief Check if the given SHL node (\p N), can be used to form an
677/// extended register for an addressing mode.
678bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
679                                            bool WantExtend, SDValue &Offset,
680                                            SDValue &SignExtend) {
681  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
682  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
683  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
684    return false;
685
686  if (WantExtend) {
687    AArch64_AM::ShiftExtendType Ext =
688        getExtendTypeForNode(N.getOperand(0), true);
689    if (Ext == AArch64_AM::InvalidShiftExtend)
690      return false;
691
692    Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
693    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
694  } else {
695    Offset = N.getOperand(0);
696    SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
697  }
698
699  unsigned LegalShiftVal = Log2_32(Size);
700  unsigned ShiftVal = CSD->getZExtValue();
701
702  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
703    return false;
704
705  if (isWorthFolding(N))
706    return true;
707
708  return false;
709}
710
711bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
712                                            SDValue &Base, SDValue &Offset,
713                                            SDValue &SignExtend,
714                                            SDValue &DoShift) {
715  if (N.getOpcode() != ISD::ADD)
716    return false;
717  SDValue LHS = N.getOperand(0);
718  SDValue RHS = N.getOperand(1);
719
720  // We don't want to match immediate adds here, because they are better lowered
721  // to the register-immediate addressing modes.
722  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
723    return false;
724
725  // Check if this particular node is reused in any non-memory related
726  // operation.  If yes, do not try to fold this node into the address
727  // computation, since the computation will be kept.
728  const SDNode *Node = N.getNode();
729  for (SDNode *UI : Node->uses()) {
730    if (!isa<MemSDNode>(*UI))
731      return false;
732  }
733
734  // Remember if it is worth folding N when it produces extended register.
735  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
736
737  // Try to match a shifted extend on the RHS.
738  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
739      SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
740    Base = LHS;
741    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
742    return true;
743  }
744
745  // Try to match a shifted extend on the LHS.
746  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
747      SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
748    Base = RHS;
749    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
750    return true;
751  }
752
753  // There was no shift, whatever else we find.
754  DoShift = CurDAG->getTargetConstant(false, MVT::i32);
755
756  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
757  // Try to match an unshifted extend on the LHS.
758  if (IsExtendedRegisterWorthFolding &&
759      (Ext = getExtendTypeForNode(LHS, true)) !=
760          AArch64_AM::InvalidShiftExtend) {
761    Base = RHS;
762    Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
763    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
764    if (isWorthFolding(LHS))
765      return true;
766  }
767
768  // Try to match an unshifted extend on the RHS.
769  if (IsExtendedRegisterWorthFolding &&
770      (Ext = getExtendTypeForNode(RHS, true)) !=
771          AArch64_AM::InvalidShiftExtend) {
772    Base = LHS;
773    Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
774    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
775    if (isWorthFolding(RHS))
776      return true;
777  }
778
779  return false;
780}
781
782bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
783                                            SDValue &Base, SDValue &Offset,
784                                            SDValue &SignExtend,
785                                            SDValue &DoShift) {
786  if (N.getOpcode() != ISD::ADD)
787    return false;
788  SDValue LHS = N.getOperand(0);
789  SDValue RHS = N.getOperand(1);
790
791  // We don't want to match immediate adds here, because they are better lowered
792  // to the register-immediate addressing modes.
793  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
794    return false;
795
796  // Check if this particular node is reused in any non-memory related
797  // operation.  If yes, do not try to fold this node into the address
798  // computation, since the computation will be kept.
799  const SDNode *Node = N.getNode();
800  for (SDNode *UI : Node->uses()) {
801    if (!isa<MemSDNode>(*UI))
802      return false;
803  }
804
805  // Remember if it is worth folding N when it produces extended register.
806  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
807
808  // Try to match a shifted extend on the RHS.
809  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
810      SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
811    Base = LHS;
812    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
813    return true;
814  }
815
816  // Try to match a shifted extend on the LHS.
817  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
818      SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
819    Base = RHS;
820    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
821    return true;
822  }
823
824  // Match any non-shifted, non-extend, non-immediate add expression.
825  Base = LHS;
826  Offset = RHS;
827  SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
828  DoShift = CurDAG->getTargetConstant(false, MVT::i32);
829  // Reg1 + Reg2 is free: no check needed.
830  return true;
831}
832
833SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
834  static unsigned RegClassIDs[] = {
835      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
836  static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
837                                AArch64::dsub2, AArch64::dsub3 };
838
839  return createTuple(Regs, RegClassIDs, SubRegs);
840}
841
842SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
843  static unsigned RegClassIDs[] = {
844      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
845  static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
846                                AArch64::qsub2, AArch64::qsub3 };
847
848  return createTuple(Regs, RegClassIDs, SubRegs);
849}
850
851SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
852                                         unsigned RegClassIDs[],
853                                         unsigned SubRegs[]) {
854  // There's no special register-class for a vector-list of 1 element: it's just
855  // a vector.
856  if (Regs.size() == 1)
857    return Regs[0];
858
859  assert(Regs.size() >= 2 && Regs.size() <= 4);
860
861  SDLoc DL(Regs[0].getNode());
862
863  SmallVector<SDValue, 4> Ops;
864
865  // First operand of REG_SEQUENCE is the desired RegClass.
866  Ops.push_back(
867      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
868
869  // Then we get pairs of source & subregister-position for the components.
870  for (unsigned i = 0; i < Regs.size(); ++i) {
871    Ops.push_back(Regs[i]);
872    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
873  }
874
875  SDNode *N =
876      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
877  return SDValue(N, 0);
878}
879
880SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
881                                         unsigned Opc, bool isExt) {
882  SDLoc dl(N);
883  EVT VT = N->getValueType(0);
884
885  unsigned ExtOff = isExt;
886
887  // Form a REG_SEQUENCE to force register allocation.
888  unsigned Vec0Off = ExtOff + 1;
889  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
890                               N->op_begin() + Vec0Off + NumVecs);
891  SDValue RegSeq = createQTuple(Regs);
892
893  SmallVector<SDValue, 6> Ops;
894  if (isExt)
895    Ops.push_back(N->getOperand(1));
896  Ops.push_back(RegSeq);
897  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
898  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
899}
900
901SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
902  LoadSDNode *LD = cast<LoadSDNode>(N);
903  if (LD->isUnindexed())
904    return nullptr;
905  EVT VT = LD->getMemoryVT();
906  EVT DstVT = N->getValueType(0);
907  ISD::MemIndexedMode AM = LD->getAddressingMode();
908  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
909
910  // We're not doing validity checking here. That was done when checking
911  // if we should mark the load as indexed or not. We're just selecting
912  // the right instruction.
913  unsigned Opcode = 0;
914
915  ISD::LoadExtType ExtType = LD->getExtensionType();
916  bool InsertTo64 = false;
917  if (VT == MVT::i64)
918    Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
919  else if (VT == MVT::i32) {
920    if (ExtType == ISD::NON_EXTLOAD)
921      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
922    else if (ExtType == ISD::SEXTLOAD)
923      Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
924    else {
925      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
926      InsertTo64 = true;
927      // The result of the load is only i32. It's the subreg_to_reg that makes
928      // it into an i64.
929      DstVT = MVT::i32;
930    }
931  } else if (VT == MVT::i16) {
932    if (ExtType == ISD::SEXTLOAD) {
933      if (DstVT == MVT::i64)
934        Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
935      else
936        Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
937    } else {
938      Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
939      InsertTo64 = DstVT == MVT::i64;
940      // The result of the load is only i32. It's the subreg_to_reg that makes
941      // it into an i64.
942      DstVT = MVT::i32;
943    }
944  } else if (VT == MVT::i8) {
945    if (ExtType == ISD::SEXTLOAD) {
946      if (DstVT == MVT::i64)
947        Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
948      else
949        Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
950    } else {
951      Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
952      InsertTo64 = DstVT == MVT::i64;
953      // The result of the load is only i32. It's the subreg_to_reg that makes
954      // it into an i64.
955      DstVT = MVT::i32;
956    }
957  } else if (VT == MVT::f32) {
958    Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
959  } else if (VT == MVT::f64 || VT.is64BitVector()) {
960    Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
961  } else if (VT.is128BitVector()) {
962    Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
963  } else
964    return nullptr;
965  SDValue Chain = LD->getChain();
966  SDValue Base = LD->getBasePtr();
967  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
968  int OffsetVal = (int)OffsetOp->getZExtValue();
969  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
970  SDValue Ops[] = { Base, Offset, Chain };
971  SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
972                                       MVT::Other, Ops);
973  // Either way, we're replacing the node, so tell the caller that.
974  Done = true;
975  SDValue LoadedVal = SDValue(Res, 1);
976  if (InsertTo64) {
977    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
978    LoadedVal =
979        SDValue(CurDAG->getMachineNode(
980                    AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
981                    CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
982                0);
983  }
984
985  ReplaceUses(SDValue(N, 0), LoadedVal);
986  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
987  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
988
989  return nullptr;
990}
991
992SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
993                                        unsigned Opc, unsigned SubRegIdx) {
994  SDLoc dl(N);
995  EVT VT = N->getValueType(0);
996  SDValue Chain = N->getOperand(0);
997
998  SmallVector<SDValue, 6> Ops;
999  Ops.push_back(N->getOperand(2)); // Mem operand;
1000  Ops.push_back(Chain);
1001
1002  std::vector<EVT> ResTys;
1003  ResTys.push_back(MVT::Untyped);
1004  ResTys.push_back(MVT::Other);
1005
1006  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1007  SDValue SuperReg = SDValue(Ld, 0);
1008  for (unsigned i = 0; i < NumVecs; ++i)
1009    ReplaceUses(SDValue(N, i),
1010        CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1011
1012  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1013  return nullptr;
1014}
1015
1016SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1017                                            unsigned Opc, unsigned SubRegIdx) {
1018  SDLoc dl(N);
1019  EVT VT = N->getValueType(0);
1020  SDValue Chain = N->getOperand(0);
1021
1022  SmallVector<SDValue, 6> Ops;
1023  Ops.push_back(N->getOperand(1)); // Mem operand
1024  Ops.push_back(N->getOperand(2)); // Incremental
1025  Ops.push_back(Chain);
1026
1027  std::vector<EVT> ResTys;
1028  ResTys.push_back(MVT::i64); // Type of the write back register
1029  ResTys.push_back(MVT::Untyped);
1030  ResTys.push_back(MVT::Other);
1031
1032  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1033
1034  // Update uses of write back register
1035  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1036
1037  // Update uses of vector list
1038  SDValue SuperReg = SDValue(Ld, 1);
1039  if (NumVecs == 1)
1040    ReplaceUses(SDValue(N, 0), SuperReg);
1041  else
1042    for (unsigned i = 0; i < NumVecs; ++i)
1043      ReplaceUses(SDValue(N, i),
1044          CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1045
1046  // Update the chain
1047  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1048  return nullptr;
1049}
1050
1051SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1052                                         unsigned Opc) {
1053  SDLoc dl(N);
1054  EVT VT = N->getOperand(2)->getValueType(0);
1055
1056  // Form a REG_SEQUENCE to force register allocation.
1057  bool Is128Bit = VT.getSizeInBits() == 128;
1058  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1059  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1060
1061  SmallVector<SDValue, 6> Ops;
1062  Ops.push_back(RegSeq);
1063  Ops.push_back(N->getOperand(NumVecs + 2));
1064  Ops.push_back(N->getOperand(0));
1065  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1066
1067  return St;
1068}
1069
1070SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1071                                             unsigned Opc) {
1072  SDLoc dl(N);
1073  EVT VT = N->getOperand(2)->getValueType(0);
1074  SmallVector<EVT, 2> ResTys;
1075  ResTys.push_back(MVT::i64);   // Type of the write back register
1076  ResTys.push_back(MVT::Other); // Type for the Chain
1077
1078  // Form a REG_SEQUENCE to force register allocation.
1079  bool Is128Bit = VT.getSizeInBits() == 128;
1080  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1081  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1082
1083  SmallVector<SDValue, 6> Ops;
1084  Ops.push_back(RegSeq);
1085  Ops.push_back(N->getOperand(NumVecs + 1)); // base register
1086  Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
1087  Ops.push_back(N->getOperand(0)); // Chain
1088  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1089
1090  return St;
1091}
1092
1093/// WidenVector - Given a value in the V64 register class, produce the
1094/// equivalent value in the V128 register class.
1095class WidenVector {
1096  SelectionDAG &DAG;
1097
1098public:
1099  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1100
1101  SDValue operator()(SDValue V64Reg) {
1102    EVT VT = V64Reg.getValueType();
1103    unsigned NarrowSize = VT.getVectorNumElements();
1104    MVT EltTy = VT.getVectorElementType().getSimpleVT();
1105    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1106    SDLoc DL(V64Reg);
1107
1108    SDValue Undef =
1109        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1110    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1111  }
1112};
1113
1114/// NarrowVector - Given a value in the V128 register class, produce the
1115/// equivalent value in the V64 register class.
1116static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1117  EVT VT = V128Reg.getValueType();
1118  unsigned WideSize = VT.getVectorNumElements();
1119  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1120  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1121
1122  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1123                                    V128Reg);
1124}
1125
1126SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1127                                            unsigned Opc) {
1128  SDLoc dl(N);
1129  EVT VT = N->getValueType(0);
1130  bool Narrow = VT.getSizeInBits() == 64;
1131
1132  // Form a REG_SEQUENCE to force register allocation.
1133  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1134
1135  if (Narrow)
1136    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1137                   WidenVector(*CurDAG));
1138
1139  SDValue RegSeq = createQTuple(Regs);
1140
1141  std::vector<EVT> ResTys;
1142  ResTys.push_back(MVT::Untyped);
1143  ResTys.push_back(MVT::Other);
1144
1145  unsigned LaneNo =
1146      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1147
1148  SmallVector<SDValue, 6> Ops;
1149  Ops.push_back(RegSeq);
1150  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1151  Ops.push_back(N->getOperand(NumVecs + 3));
1152  Ops.push_back(N->getOperand(0));
1153  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1154  SDValue SuperReg = SDValue(Ld, 0);
1155
1156  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1157  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1158                              AArch64::qsub3 };
1159  for (unsigned i = 0; i < NumVecs; ++i) {
1160    SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1161    if (Narrow)
1162      NV = NarrowVector(NV, *CurDAG);
1163    ReplaceUses(SDValue(N, i), NV);
1164  }
1165
1166  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1167
1168  return Ld;
1169}
1170
1171SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1172                                                unsigned Opc) {
1173  SDLoc dl(N);
1174  EVT VT = N->getValueType(0);
1175  bool Narrow = VT.getSizeInBits() == 64;
1176
1177  // Form a REG_SEQUENCE to force register allocation.
1178  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1179
1180  if (Narrow)
1181    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1182                   WidenVector(*CurDAG));
1183
1184  SDValue RegSeq = createQTuple(Regs);
1185
1186  std::vector<EVT> ResTys;
1187  ResTys.push_back(MVT::i64); // Type of the write back register
1188  ResTys.push_back(MVT::Untyped);
1189  ResTys.push_back(MVT::Other);
1190
1191  unsigned LaneNo =
1192      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1193
1194  SmallVector<SDValue, 6> Ops;
1195  Ops.push_back(RegSeq);
1196  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
1197  Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
1198  Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
1199  Ops.push_back(N->getOperand(0));
1200  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1201
1202  // Update uses of the write back register
1203  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1204
1205  // Update uses of the vector list
1206  SDValue SuperReg = SDValue(Ld, 1);
1207  if (NumVecs == 1) {
1208    ReplaceUses(SDValue(N, 0),
1209                Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1210  } else {
1211    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1212    static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1213                                AArch64::qsub3 };
1214    for (unsigned i = 0; i < NumVecs; ++i) {
1215      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1216                                                  SuperReg);
1217      if (Narrow)
1218        NV = NarrowVector(NV, *CurDAG);
1219      ReplaceUses(SDValue(N, i), NV);
1220    }
1221  }
1222
1223  // Update the Chain
1224  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1225
1226  return Ld;
1227}
1228
1229SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1230                                             unsigned Opc) {
1231  SDLoc dl(N);
1232  EVT VT = N->getOperand(2)->getValueType(0);
1233  bool Narrow = VT.getSizeInBits() == 64;
1234
1235  // Form a REG_SEQUENCE to force register allocation.
1236  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1237
1238  if (Narrow)
1239    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1240                   WidenVector(*CurDAG));
1241
1242  SDValue RegSeq = createQTuple(Regs);
1243
1244  unsigned LaneNo =
1245      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1246
1247  SmallVector<SDValue, 6> Ops;
1248  Ops.push_back(RegSeq);
1249  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1250  Ops.push_back(N->getOperand(NumVecs + 3));
1251  Ops.push_back(N->getOperand(0));
1252  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1253
1254  // Transfer memoperands.
1255  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1256  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1257  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1258
1259  return St;
1260}
1261
1262SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1263                                                 unsigned Opc) {
1264  SDLoc dl(N);
1265  EVT VT = N->getOperand(2)->getValueType(0);
1266  bool Narrow = VT.getSizeInBits() == 64;
1267
1268  // Form a REG_SEQUENCE to force register allocation.
1269  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1270
1271  if (Narrow)
1272    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1273                   WidenVector(*CurDAG));
1274
1275  SDValue RegSeq = createQTuple(Regs);
1276
1277  SmallVector<EVT, 2> ResTys;
1278  ResTys.push_back(MVT::i64);   // Type of the write back register
1279  ResTys.push_back(MVT::Other);
1280
1281  unsigned LaneNo =
1282      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1283
1284  SmallVector<SDValue, 6> Ops;
1285  Ops.push_back(RegSeq);
1286  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1287  Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
1288  Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
1289  Ops.push_back(N->getOperand(0));
1290  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1291
1292  // Transfer memoperands.
1293  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1294  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1295  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1296
1297  return St;
1298}
1299
1300static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1301                                       unsigned &Opc, SDValue &Opd0,
1302                                       unsigned &LSB, unsigned &MSB,
1303                                       unsigned NumberOfIgnoredLowBits,
1304                                       bool BiggerPattern) {
1305  assert(N->getOpcode() == ISD::AND &&
1306         "N must be a AND operation to call this function");
1307
1308  EVT VT = N->getValueType(0);
1309
1310  // Here we can test the type of VT and return false when the type does not
1311  // match, but since it is done prior to that call in the current context
1312  // we turned that into an assert to avoid redundant code.
1313  assert((VT == MVT::i32 || VT == MVT::i64) &&
1314         "Type checking must have been done before calling this function");
1315
1316  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1317  // changed the AND node to a 32-bit mask operation. We'll have to
1318  // undo that as part of the transform here if we want to catch all
1319  // the opportunities.
1320  // Currently the NumberOfIgnoredLowBits argument helps to recover
1321  // form these situations when matching bigger pattern (bitfield insert).
1322
1323  // For unsigned extracts, check for a shift right and mask
1324  uint64_t And_imm = 0;
1325  if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
1326    return false;
1327
1328  const SDNode *Op0 = N->getOperand(0).getNode();
1329
1330  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1331  // simplified. Try to undo that
1332  And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
1333
1334  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1335  if (And_imm & (And_imm + 1))
1336    return false;
1337
1338  bool ClampMSB = false;
1339  uint64_t Srl_imm = 0;
1340  // Handle the SRL + ANY_EXTEND case.
1341  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1342      isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
1343    // Extend the incoming operand of the SRL to 64-bit.
1344    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1345    // Make sure to clamp the MSB so that we preserve the semantics of the
1346    // original operations.
1347    ClampMSB = true;
1348  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1349             isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1350                                   Srl_imm)) {
1351    // If the shift result was truncated, we can still combine them.
1352    Opd0 = Op0->getOperand(0).getOperand(0);
1353
1354    // Use the type of SRL node.
1355    VT = Opd0->getValueType(0);
1356  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
1357    Opd0 = Op0->getOperand(0);
1358  } else if (BiggerPattern) {
1359    // Let's pretend a 0 shift right has been performed.
1360    // The resulting code will be at least as good as the original one
1361    // plus it may expose more opportunities for bitfield insert pattern.
1362    // FIXME: Currently we limit this to the bigger pattern, because
1363    // some optimizations expect AND and not UBFM
1364    Opd0 = N->getOperand(0);
1365  } else
1366    return false;
1367
1368  assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
1369         "bad amount in shift node!");
1370
1371  LSB = Srl_imm;
1372  MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
1373                                  : CountTrailingOnes_64(And_imm)) -
1374        1;
1375  if (ClampMSB)
1376    // Since we're moving the extend before the right shift operation, we need
1377    // to clamp the MSB to make sure we don't shift in undefined bits instead of
1378    // the zeros which would get shifted in with the original right shift
1379    // operation.
1380    MSB = MSB > 31 ? 31 : MSB;
1381
1382  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1383  return true;
1384}
1385
1386static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1387                                     unsigned &LSB, unsigned &MSB) {
1388  // We are looking for the following pattern which basically extracts a single
1389  // bit from the source value and places it in the LSB of the destination
1390  // value, all other bits of the destination value or set to zero:
1391  //
1392  // Value2 = AND Value, MaskImm
1393  // SRL Value2, ShiftImm
1394  //
1395  // with MaskImm >> ShiftImm == 1.
1396  //
1397  // This gets selected into a single UBFM:
1398  //
1399  // UBFM Value, ShiftImm, ShiftImm
1400  //
1401
1402  if (N->getOpcode() != ISD::SRL)
1403    return false;
1404
1405  uint64_t And_mask = 0;
1406  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
1407    return false;
1408
1409  Opd0 = N->getOperand(0).getOperand(0);
1410
1411  uint64_t Srl_imm = 0;
1412  if (!isIntImmediate(N->getOperand(1), Srl_imm))
1413    return false;
1414
1415  // Check whether we really have a one bit extract here.
1416  if (And_mask >> Srl_imm == 0x1) {
1417    if (N->getValueType(0) == MVT::i32)
1418      Opc = AArch64::UBFMWri;
1419    else
1420      Opc = AArch64::UBFMXri;
1421
1422    LSB = MSB = Srl_imm;
1423
1424    return true;
1425  }
1426
1427  return false;
1428}
1429
1430static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1431                                       unsigned &LSB, unsigned &MSB,
1432                                       bool BiggerPattern) {
1433  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1434         "N must be a SHR/SRA operation to call this function");
1435
1436  EVT VT = N->getValueType(0);
1437
1438  // Here we can test the type of VT and return false when the type does not
1439  // match, but since it is done prior to that call in the current context
1440  // we turned that into an assert to avoid redundant code.
1441  assert((VT == MVT::i32 || VT == MVT::i64) &&
1442         "Type checking must have been done before calling this function");
1443
1444  // Check for AND + SRL doing a one bit extract.
1445  if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
1446    return true;
1447
1448  // we're looking for a shift of a shift
1449  uint64_t Shl_imm = 0;
1450  uint64_t Trunc_bits = 0;
1451  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
1452    Opd0 = N->getOperand(0).getOperand(0);
1453  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1454             N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1455    // We are looking for a shift of truncate. Truncate from i64 to i32 could
1456    // be considered as setting high 32 bits as zero. Our strategy here is to
1457    // always generate 64bit UBFM. This consistency will help the CSE pass
1458    // later find more redundancy.
1459    Opd0 = N->getOperand(0).getOperand(0);
1460    Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1461    VT = Opd0->getValueType(0);
1462    assert(VT == MVT::i64 && "the promoted type should be i64");
1463  } else if (BiggerPattern) {
1464    // Let's pretend a 0 shift left has been performed.
1465    // FIXME: Currently we limit this to the bigger pattern case,
1466    // because some optimizations expect AND and not UBFM
1467    Opd0 = N->getOperand(0);
1468  } else
1469    return false;
1470
1471  assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
1472  uint64_t Srl_imm = 0;
1473  if (!isIntImmediate(N->getOperand(1), Srl_imm))
1474    return false;
1475
1476  assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
1477         "bad amount in shift node!");
1478  // Note: The width operand is encoded as width-1.
1479  unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
1480  int sLSB = Srl_imm - Shl_imm;
1481  if (sLSB < 0)
1482    return false;
1483  LSB = sLSB;
1484  MSB = LSB + Width;
1485  // SRA requires a signed extraction
1486  if (VT == MVT::i32)
1487    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1488  else
1489    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1490  return true;
1491}
1492
1493static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1494                                SDValue &Opd0, unsigned &LSB, unsigned &MSB,
1495                                unsigned NumberOfIgnoredLowBits = 0,
1496                                bool BiggerPattern = false) {
1497  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1498    return false;
1499
1500  switch (N->getOpcode()) {
1501  default:
1502    if (!N->isMachineOpcode())
1503      return false;
1504    break;
1505  case ISD::AND:
1506    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
1507                                      NumberOfIgnoredLowBits, BiggerPattern);
1508  case ISD::SRL:
1509  case ISD::SRA:
1510    return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
1511  }
1512
1513  unsigned NOpc = N->getMachineOpcode();
1514  switch (NOpc) {
1515  default:
1516    return false;
1517  case AArch64::SBFMWri:
1518  case AArch64::UBFMWri:
1519  case AArch64::SBFMXri:
1520  case AArch64::UBFMXri:
1521    Opc = NOpc;
1522    Opd0 = N->getOperand(0);
1523    LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1524    MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1525    return true;
1526  }
1527  // Unreachable
1528  return false;
1529}
1530
1531SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
1532  unsigned Opc, LSB, MSB;
1533  SDValue Opd0;
1534  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
1535    return nullptr;
1536
1537  EVT VT = N->getValueType(0);
1538
1539  // If the bit extract operation is 64bit but the original type is 32bit, we
1540  // need to add one EXTRACT_SUBREG.
1541  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1542    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
1543                       CurDAG->getTargetConstant(MSB, MVT::i64)};
1544
1545    SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
1546    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
1547    MachineSDNode *Node =
1548        CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
1549                               SDValue(BFM, 0), SubReg);
1550    return Node;
1551  }
1552
1553  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
1554                   CurDAG->getTargetConstant(MSB, VT)};
1555  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1556}
1557
1558/// Does DstMask form a complementary pair with the mask provided by
1559/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1560/// this asks whether DstMask zeroes precisely those bits that will be set by
1561/// the other half.
1562static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
1563                              unsigned NumberOfIgnoredHighBits, EVT VT) {
1564  assert((VT == MVT::i32 || VT == MVT::i64) &&
1565         "i32 or i64 mask type expected!");
1566  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1567
1568  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1569  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1570
1571  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1572         (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1573}
1574
1575// Look for bits that will be useful for later uses.
1576// A bit is consider useless as soon as it is dropped and never used
1577// before it as been dropped.
1578// E.g., looking for useful bit of x
1579// 1. y = x & 0x7
1580// 2. z = y >> 2
1581// After #1, x useful bits are 0x7, then the useful bits of x, live through
1582// y.
1583// After #2, the useful bits of x are 0x4.
1584// However, if x is used on an unpredicatable instruction, then all its bits
1585// are useful.
1586// E.g.
1587// 1. y = x & 0x7
1588// 2. z = y >> 2
1589// 3. str x, [@x]
1590static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1591
1592static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1593                                              unsigned Depth) {
1594  uint64_t Imm =
1595      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1596  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1597  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1598  getUsefulBits(Op, UsefulBits, Depth + 1);
1599}
1600
1601static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1602                                             uint64_t Imm, uint64_t MSB,
1603                                             unsigned Depth) {
1604  // inherit the bitwidth value
1605  APInt OpUsefulBits(UsefulBits);
1606  OpUsefulBits = 1;
1607
1608  if (MSB >= Imm) {
1609    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1610    --OpUsefulBits;
1611    // The interesting part will be in the lower part of the result
1612    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1613    // The interesting part was starting at Imm in the argument
1614    OpUsefulBits = OpUsefulBits.shl(Imm);
1615  } else {
1616    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1617    --OpUsefulBits;
1618    // The interesting part will be shifted in the result
1619    OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
1620    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1621    // The interesting part was at zero in the argument
1622    OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
1623  }
1624
1625  UsefulBits &= OpUsefulBits;
1626}
1627
1628static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1629                                  unsigned Depth) {
1630  uint64_t Imm =
1631      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1632  uint64_t MSB =
1633      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1634
1635  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1636}
1637
1638static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1639                                              unsigned Depth) {
1640  uint64_t ShiftTypeAndValue =
1641      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1642  APInt Mask(UsefulBits);
1643  Mask.clearAllBits();
1644  Mask.flipAllBits();
1645
1646  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1647    // Shift Left
1648    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1649    Mask = Mask.shl(ShiftAmt);
1650    getUsefulBits(Op, Mask, Depth + 1);
1651    Mask = Mask.lshr(ShiftAmt);
1652  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1653    // Shift Right
1654    // We do not handle AArch64_AM::ASR, because the sign will change the
1655    // number of useful bits
1656    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1657    Mask = Mask.lshr(ShiftAmt);
1658    getUsefulBits(Op, Mask, Depth + 1);
1659    Mask = Mask.shl(ShiftAmt);
1660  } else
1661    return;
1662
1663  UsefulBits &= Mask;
1664}
1665
1666static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1667                                 unsigned Depth) {
1668  uint64_t Imm =
1669      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1670  uint64_t MSB =
1671      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1672
1673  if (Op.getOperand(1) == Orig)
1674    return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1675
1676  APInt OpUsefulBits(UsefulBits);
1677  OpUsefulBits = 1;
1678
1679  if (MSB >= Imm) {
1680    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1681    --OpUsefulBits;
1682    UsefulBits &= ~OpUsefulBits;
1683    getUsefulBits(Op, UsefulBits, Depth + 1);
1684  } else {
1685    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1686    --OpUsefulBits;
1687    UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
1688    getUsefulBits(Op, UsefulBits, Depth + 1);
1689  }
1690}
1691
1692static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1693                                SDValue Orig, unsigned Depth) {
1694
1695  // Users of this node should have already been instruction selected
1696  // FIXME: Can we turn that into an assert?
1697  if (!UserNode->isMachineOpcode())
1698    return;
1699
1700  switch (UserNode->getMachineOpcode()) {
1701  default:
1702    return;
1703  case AArch64::ANDSWri:
1704  case AArch64::ANDSXri:
1705  case AArch64::ANDWri:
1706  case AArch64::ANDXri:
1707    // We increment Depth only when we call the getUsefulBits
1708    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1709                                             Depth);
1710  case AArch64::UBFMWri:
1711  case AArch64::UBFMXri:
1712    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1713
1714  case AArch64::ORRWrs:
1715  case AArch64::ORRXrs:
1716    if (UserNode->getOperand(1) != Orig)
1717      return;
1718    return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
1719                                             Depth);
1720  case AArch64::BFMWri:
1721  case AArch64::BFMXri:
1722    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1723  }
1724}
1725
1726static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
1727  if (Depth >= 6)
1728    return;
1729  // Initialize UsefulBits
1730  if (!Depth) {
1731    unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
1732    // At the beginning, assume every produced bits is useful
1733    UsefulBits = APInt(Bitwidth, 0);
1734    UsefulBits.flipAllBits();
1735  }
1736  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
1737
1738  for (SDNode *Node : Op.getNode()->uses()) {
1739    // A use cannot produce useful bits
1740    APInt UsefulBitsForUse = APInt(UsefulBits);
1741    getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
1742    UsersUsefulBits |= UsefulBitsForUse;
1743  }
1744  // UsefulBits contains the produced bits that are meaningful for the
1745  // current definition, thus a user cannot make a bit meaningful at
1746  // this point
1747  UsefulBits &= UsersUsefulBits;
1748}
1749
1750/// Create a machine node performing a notional SHL of Op by ShlAmount. If
1751/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
1752/// 0, return Op unchanged.
1753static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
1754  if (ShlAmount == 0)
1755    return Op;
1756
1757  EVT VT = Op.getValueType();
1758  unsigned BitWidth = VT.getSizeInBits();
1759  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1760
1761  SDNode *ShiftNode;
1762  if (ShlAmount > 0) {
1763    // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
1764    ShiftNode = CurDAG->getMachineNode(
1765        UBFMOpc, SDLoc(Op), VT, Op,
1766        CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
1767        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
1768  } else {
1769    // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
1770    assert(ShlAmount < 0 && "expected right shift");
1771    int ShrAmount = -ShlAmount;
1772    ShiftNode = CurDAG->getMachineNode(
1773        UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
1774        CurDAG->getTargetConstant(BitWidth - 1, VT));
1775  }
1776
1777  return SDValue(ShiftNode, 0);
1778}
1779
1780/// Does this tree qualify as an attempt to move a bitfield into position,
1781/// essentially "(and (shl VAL, N), Mask)".
1782static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
1783                                    SDValue &Src, int &ShiftAmount,
1784                                    int &MaskWidth) {
1785  EVT VT = Op.getValueType();
1786  unsigned BitWidth = VT.getSizeInBits();
1787  (void)BitWidth;
1788  assert(BitWidth == 32 || BitWidth == 64);
1789
1790  APInt KnownZero, KnownOne;
1791  CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
1792
1793  // Non-zero in the sense that they're not provably zero, which is the key
1794  // point if we want to use this value
1795  uint64_t NonZeroBits = (~KnownZero).getZExtValue();
1796
1797  // Discard a constant AND mask if present. It's safe because the node will
1798  // already have been factored into the computeKnownBits calculation above.
1799  uint64_t AndImm;
1800  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
1801    assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
1802    Op = Op.getOperand(0);
1803  }
1804
1805  uint64_t ShlImm;
1806  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
1807    return false;
1808  Op = Op.getOperand(0);
1809
1810  if (!isShiftedMask_64(NonZeroBits))
1811    return false;
1812
1813  ShiftAmount = countTrailingZeros(NonZeroBits);
1814  MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
1815
1816  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
1817  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
1818  // amount.
1819  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
1820
1821  return true;
1822}
1823
1824// Given a OR operation, check if we have the following pattern
1825// ubfm c, b, imm, imm2 (or something that does the same jobs, see
1826//                       isBitfieldExtractOp)
1827// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
1828//                 countTrailingZeros(mask2) == imm2 - imm + 1
1829// f = d | c
1830// if yes, given reference arguments will be update so that one can replace
1831// the OR instruction with:
1832// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
1833static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
1834                                     SDValue &Src, unsigned &ImmR,
1835                                     unsigned &ImmS, SelectionDAG *CurDAG) {
1836  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
1837
1838  // Set Opc
1839  EVT VT = N->getValueType(0);
1840  if (VT == MVT::i32)
1841    Opc = AArch64::BFMWri;
1842  else if (VT == MVT::i64)
1843    Opc = AArch64::BFMXri;
1844  else
1845    return false;
1846
1847  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
1848  // have the expected shape. Try to undo that.
1849  APInt UsefulBits;
1850  getUsefulBits(SDValue(N, 0), UsefulBits);
1851
1852  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
1853  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
1854
1855  // OR is commutative, check both possibilities (does llvm provide a
1856  // way to do that directely, e.g., via code matcher?)
1857  SDValue OrOpd1Val = N->getOperand(1);
1858  SDNode *OrOpd0 = N->getOperand(0).getNode();
1859  SDNode *OrOpd1 = N->getOperand(1).getNode();
1860  for (int i = 0; i < 2;
1861       ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
1862    unsigned BFXOpc;
1863    int DstLSB, Width;
1864    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
1865                            NumberOfIgnoredLowBits, true)) {
1866      // Check that the returned opcode is compatible with the pattern,
1867      // i.e., same type and zero extended (U and not S)
1868      if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
1869          (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
1870        continue;
1871
1872      // Compute the width of the bitfield insertion
1873      DstLSB = 0;
1874      Width = ImmS - ImmR + 1;
1875      // FIXME: This constraint is to catch bitfield insertion we may
1876      // want to widen the pattern if we want to grab general bitfied
1877      // move case
1878      if (Width <= 0)
1879        continue;
1880
1881      // If the mask on the insertee is correct, we have a BFXIL operation. We
1882      // can share the ImmR and ImmS values from the already-computed UBFM.
1883    } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
1884                                       DstLSB, Width)) {
1885      ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
1886      ImmS = Width - 1;
1887    } else
1888      continue;
1889
1890    // Check the second part of the pattern
1891    EVT VT = OrOpd1->getValueType(0);
1892    assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
1893
1894    // Compute the Known Zero for the candidate of the first operand.
1895    // This allows to catch more general case than just looking for
1896    // AND with imm. Indeed, simplify-demanded-bits may have removed
1897    // the AND instruction because it proves it was useless.
1898    APInt KnownZero, KnownOne;
1899    CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
1900
1901    // Check if there is enough room for the second operand to appear
1902    // in the first one
1903    APInt BitsToBeInserted =
1904        APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
1905
1906    if ((BitsToBeInserted & ~KnownZero) != 0)
1907      continue;
1908
1909    // Set the first operand
1910    uint64_t Imm;
1911    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
1912        isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
1913      // In that case, we can eliminate the AND
1914      Dst = OrOpd1->getOperand(0);
1915    else
1916      // Maybe the AND has been removed by simplify-demanded-bits
1917      // or is useful because it discards more bits
1918      Dst = OrOpd1Val;
1919
1920    // both parts match
1921    return true;
1922  }
1923
1924  return false;
1925}
1926
1927SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
1928  if (N->getOpcode() != ISD::OR)
1929    return nullptr;
1930
1931  unsigned Opc;
1932  unsigned LSB, MSB;
1933  SDValue Opd0, Opd1;
1934
1935  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
1936    return nullptr;
1937
1938  EVT VT = N->getValueType(0);
1939  SDValue Ops[] = { Opd0,
1940                    Opd1,
1941                    CurDAG->getTargetConstant(LSB, VT),
1942                    CurDAG->getTargetConstant(MSB, VT) };
1943  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1944}
1945
1946SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
1947  EVT VT = N->getValueType(0);
1948  unsigned Variant;
1949  unsigned Opc;
1950  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
1951
1952  if (VT == MVT::f32) {
1953    Variant = 0;
1954  } else if (VT == MVT::f64) {
1955    Variant = 1;
1956  } else
1957    return nullptr; // Unrecognized argument type. Fall back on default codegen.
1958
1959  // Pick the FRINTX variant needed to set the flags.
1960  unsigned FRINTXOpc = FRINTXOpcs[Variant];
1961
1962  switch (N->getOpcode()) {
1963  default:
1964    return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
1965  case ISD::FCEIL: {
1966    unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
1967    Opc = FRINTPOpcs[Variant];
1968    break;
1969  }
1970  case ISD::FFLOOR: {
1971    unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
1972    Opc = FRINTMOpcs[Variant];
1973    break;
1974  }
1975  case ISD::FTRUNC: {
1976    unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
1977    Opc = FRINTZOpcs[Variant];
1978    break;
1979  }
1980  case ISD::FROUND: {
1981    unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
1982    Opc = FRINTAOpcs[Variant];
1983    break;
1984  }
1985  }
1986
1987  SDLoc dl(N);
1988  SDValue In = N->getOperand(0);
1989  SmallVector<SDValue, 2> Ops;
1990  Ops.push_back(In);
1991
1992  if (!TM.Options.UnsafeFPMath) {
1993    SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
1994    Ops.push_back(SDValue(FRINTX, 1));
1995  }
1996
1997  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
1998}
1999
2000bool
2001AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2002                                              unsigned RegWidth) {
2003  APFloat FVal(0.0);
2004  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2005    FVal = CN->getValueAPF();
2006  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2007    // Some otherwise illegal constants are allowed in this case.
2008    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2009        !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2010      return false;
2011
2012    ConstantPoolSDNode *CN =
2013        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2014    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2015  } else
2016    return false;
2017
2018  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2019  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2020  // x-register.
2021  //
2022  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2023  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2024  // integers.
2025  bool IsExact;
2026
2027  // fbits is between 1 and 64 in the worst-case, which means the fmul
2028  // could have 2^64 as an actual operand. Need 65 bits of precision.
2029  APSInt IntVal(65, true);
2030  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2031
2032  // N.b. isPowerOf2 also checks for > 0.
2033  if (!IsExact || !IntVal.isPowerOf2()) return false;
2034  unsigned FBits = IntVal.logBase2();
2035
2036  // Checks above should have guaranteed that we haven't lost information in
2037  // finding FBits, but it must still be in range.
2038  if (FBits == 0 || FBits > RegWidth) return false;
2039
2040  FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
2041  return true;
2042}
2043
2044SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
2045  // Dump information about the Node being selected
2046  DEBUG(errs() << "Selecting: ");
2047  DEBUG(Node->dump(CurDAG));
2048  DEBUG(errs() << "\n");
2049
2050  // If we have a custom node, we already have selected!
2051  if (Node->isMachineOpcode()) {
2052    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2053    Node->setNodeId(-1);
2054    return nullptr;
2055  }
2056
2057  // Few custom selection stuff.
2058  SDNode *ResNode = nullptr;
2059  EVT VT = Node->getValueType(0);
2060
2061  switch (Node->getOpcode()) {
2062  default:
2063    break;
2064
2065  case ISD::ADD:
2066    if (SDNode *I = SelectMLAV64LaneV128(Node))
2067      return I;
2068    break;
2069
2070  case ISD::LOAD: {
2071    // Try to select as an indexed load. Fall through to normal processing
2072    // if we can't.
2073    bool Done = false;
2074    SDNode *I = SelectIndexedLoad(Node, Done);
2075    if (Done)
2076      return I;
2077    break;
2078  }
2079
2080  case ISD::SRL:
2081  case ISD::AND:
2082  case ISD::SRA:
2083    if (SDNode *I = SelectBitfieldExtractOp(Node))
2084      return I;
2085    break;
2086
2087  case ISD::OR:
2088    if (SDNode *I = SelectBitfieldInsertOp(Node))
2089      return I;
2090    break;
2091
2092  case ISD::EXTRACT_VECTOR_ELT: {
2093    // Extracting lane zero is a special case where we can just use a plain
2094    // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2095    // the rest of the compiler, especially the register allocator and copyi
2096    // propagation, to reason about, so is preferred when it's possible to
2097    // use it.
2098    ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2099    // Bail and use the default Select() for non-zero lanes.
2100    if (LaneNode->getZExtValue() != 0)
2101      break;
2102    // If the element type is not the same as the result type, likewise
2103    // bail and use the default Select(), as there's more to do than just
2104    // a cross-class COPY. This catches extracts of i8 and i16 elements
2105    // since they will need an explicit zext.
2106    if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2107      break;
2108    unsigned SubReg;
2109    switch (Node->getOperand(0)
2110                .getValueType()
2111                .getVectorElementType()
2112                .getSizeInBits()) {
2113    default:
2114      assert(0 && "Unexpected vector element type!");
2115    case 64:
2116      SubReg = AArch64::dsub;
2117      break;
2118    case 32:
2119      SubReg = AArch64::ssub;
2120      break;
2121    case 16: // FALLTHROUGH
2122    case 8:
2123      llvm_unreachable("unexpected zext-requiring extract element!");
2124    }
2125    SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2126                                                     Node->getOperand(0));
2127    DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2128    DEBUG(Extract->dumpr(CurDAG));
2129    DEBUG(dbgs() << "\n");
2130    return Extract.getNode();
2131  }
2132  case ISD::Constant: {
2133    // Materialize zero constants as copies from WZR/XZR.  This allows
2134    // the coalescer to propagate these into other instructions.
2135    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2136    if (ConstNode->isNullValue()) {
2137      if (VT == MVT::i32)
2138        return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2139                                      AArch64::WZR, MVT::i32).getNode();
2140      else if (VT == MVT::i64)
2141        return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2142                                      AArch64::XZR, MVT::i64).getNode();
2143    }
2144    break;
2145  }
2146
2147  case ISD::FrameIndex: {
2148    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2149    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2150    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2151    const TargetLowering *TLI = getTargetLowering();
2152    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
2153    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2154                      CurDAG->getTargetConstant(Shifter, MVT::i32) };
2155    return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2156  }
2157  case ISD::INTRINSIC_W_CHAIN: {
2158    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2159    switch (IntNo) {
2160    default:
2161      break;
2162    case Intrinsic::aarch64_ldaxp:
2163    case Intrinsic::aarch64_ldxp: {
2164      unsigned Op =
2165          IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2166      SDValue MemAddr = Node->getOperand(2);
2167      SDLoc DL(Node);
2168      SDValue Chain = Node->getOperand(0);
2169
2170      SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2171                                          MVT::Other, MemAddr, Chain);
2172
2173      // Transfer memoperands.
2174      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2175      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2176      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2177      return Ld;
2178    }
2179    case Intrinsic::aarch64_stlxp:
2180    case Intrinsic::aarch64_stxp: {
2181      unsigned Op =
2182          IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2183      SDLoc DL(Node);
2184      SDValue Chain = Node->getOperand(0);
2185      SDValue ValLo = Node->getOperand(2);
2186      SDValue ValHi = Node->getOperand(3);
2187      SDValue MemAddr = Node->getOperand(4);
2188
2189      // Place arguments in the right order.
2190      SmallVector<SDValue, 7> Ops;
2191      Ops.push_back(ValLo);
2192      Ops.push_back(ValHi);
2193      Ops.push_back(MemAddr);
2194      Ops.push_back(Chain);
2195
2196      SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2197      // Transfer memoperands.
2198      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2199      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2200      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2201
2202      return St;
2203    }
2204    case Intrinsic::aarch64_neon_ld1x2:
2205      if (VT == MVT::v8i8)
2206        return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2207      else if (VT == MVT::v16i8)
2208        return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2209      else if (VT == MVT::v4i16)
2210        return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2211      else if (VT == MVT::v8i16)
2212        return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2213      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2214        return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2215      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2216        return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2217      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2218        return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2219      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2220        return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2221      break;
2222    case Intrinsic::aarch64_neon_ld1x3:
2223      if (VT == MVT::v8i8)
2224        return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2225      else if (VT == MVT::v16i8)
2226        return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2227      else if (VT == MVT::v4i16)
2228        return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2229      else if (VT == MVT::v8i16)
2230        return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2231      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2232        return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2233      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2234        return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2235      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2236        return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2237      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2238        return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2239      break;
2240    case Intrinsic::aarch64_neon_ld1x4:
2241      if (VT == MVT::v8i8)
2242        return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2243      else if (VT == MVT::v16i8)
2244        return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2245      else if (VT == MVT::v4i16)
2246        return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2247      else if (VT == MVT::v8i16)
2248        return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2249      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2250        return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2251      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2252        return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2253      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2254        return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2255      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2256        return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2257      break;
2258    case Intrinsic::aarch64_neon_ld2:
2259      if (VT == MVT::v8i8)
2260        return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2261      else if (VT == MVT::v16i8)
2262        return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2263      else if (VT == MVT::v4i16)
2264        return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2265      else if (VT == MVT::v8i16)
2266        return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2267      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2268        return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2269      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2270        return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2271      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2272        return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2273      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2274        return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2275      break;
2276    case Intrinsic::aarch64_neon_ld3:
2277      if (VT == MVT::v8i8)
2278        return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2279      else if (VT == MVT::v16i8)
2280        return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2281      else if (VT == MVT::v4i16)
2282        return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2283      else if (VT == MVT::v8i16)
2284        return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2285      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2286        return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2287      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2288        return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2289      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2290        return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2291      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2292        return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2293      break;
2294    case Intrinsic::aarch64_neon_ld4:
2295      if (VT == MVT::v8i8)
2296        return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
2297      else if (VT == MVT::v16i8)
2298        return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
2299      else if (VT == MVT::v4i16)
2300        return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
2301      else if (VT == MVT::v8i16)
2302        return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
2303      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2304        return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
2305      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2306        return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
2307      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2308        return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2309      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2310        return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
2311      break;
2312    case Intrinsic::aarch64_neon_ld2r:
2313      if (VT == MVT::v8i8)
2314        return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
2315      else if (VT == MVT::v16i8)
2316        return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
2317      else if (VT == MVT::v4i16)
2318        return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
2319      else if (VT == MVT::v8i16)
2320        return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
2321      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2322        return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
2323      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2324        return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
2325      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2326        return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
2327      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2328        return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
2329      break;
2330    case Intrinsic::aarch64_neon_ld3r:
2331      if (VT == MVT::v8i8)
2332        return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
2333      else if (VT == MVT::v16i8)
2334        return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
2335      else if (VT == MVT::v4i16)
2336        return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
2337      else if (VT == MVT::v8i16)
2338        return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
2339      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2340        return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
2341      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2342        return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
2343      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2344        return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
2345      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2346        return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
2347      break;
2348    case Intrinsic::aarch64_neon_ld4r:
2349      if (VT == MVT::v8i8)
2350        return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
2351      else if (VT == MVT::v16i8)
2352        return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
2353      else if (VT == MVT::v4i16)
2354        return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
2355      else if (VT == MVT::v8i16)
2356        return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
2357      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2358        return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
2359      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2360        return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
2361      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2362        return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
2363      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2364        return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
2365      break;
2366    case Intrinsic::aarch64_neon_ld2lane:
2367      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2368        return SelectLoadLane(Node, 2, AArch64::LD2i8);
2369      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2370        return SelectLoadLane(Node, 2, AArch64::LD2i16);
2371      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2372               VT == MVT::v2f32)
2373        return SelectLoadLane(Node, 2, AArch64::LD2i32);
2374      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2375               VT == MVT::v1f64)
2376        return SelectLoadLane(Node, 2, AArch64::LD2i64);
2377      break;
2378    case Intrinsic::aarch64_neon_ld3lane:
2379      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2380        return SelectLoadLane(Node, 3, AArch64::LD3i8);
2381      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2382        return SelectLoadLane(Node, 3, AArch64::LD3i16);
2383      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2384               VT == MVT::v2f32)
2385        return SelectLoadLane(Node, 3, AArch64::LD3i32);
2386      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2387               VT == MVT::v1f64)
2388        return SelectLoadLane(Node, 3, AArch64::LD3i64);
2389      break;
2390    case Intrinsic::aarch64_neon_ld4lane:
2391      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2392        return SelectLoadLane(Node, 4, AArch64::LD4i8);
2393      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2394        return SelectLoadLane(Node, 4, AArch64::LD4i16);
2395      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2396               VT == MVT::v2f32)
2397        return SelectLoadLane(Node, 4, AArch64::LD4i32);
2398      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2399               VT == MVT::v1f64)
2400        return SelectLoadLane(Node, 4, AArch64::LD4i64);
2401      break;
2402    }
2403  } break;
2404  case ISD::INTRINSIC_WO_CHAIN: {
2405    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
2406    switch (IntNo) {
2407    default:
2408      break;
2409    case Intrinsic::aarch64_neon_tbl2:
2410      return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
2411                                                  : AArch64::TBLv16i8Two,
2412                         false);
2413    case Intrinsic::aarch64_neon_tbl3:
2414      return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
2415                                                  : AArch64::TBLv16i8Three,
2416                         false);
2417    case Intrinsic::aarch64_neon_tbl4:
2418      return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
2419                                                  : AArch64::TBLv16i8Four,
2420                         false);
2421    case Intrinsic::aarch64_neon_tbx2:
2422      return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
2423                                                  : AArch64::TBXv16i8Two,
2424                         true);
2425    case Intrinsic::aarch64_neon_tbx3:
2426      return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
2427                                                  : AArch64::TBXv16i8Three,
2428                         true);
2429    case Intrinsic::aarch64_neon_tbx4:
2430      return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
2431                                                  : AArch64::TBXv16i8Four,
2432                         true);
2433    case Intrinsic::aarch64_neon_smull:
2434    case Intrinsic::aarch64_neon_umull:
2435      if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
2436        return N;
2437      break;
2438    }
2439    break;
2440  }
2441  case ISD::INTRINSIC_VOID: {
2442    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2443    if (Node->getNumOperands() >= 3)
2444      VT = Node->getOperand(2)->getValueType(0);
2445    switch (IntNo) {
2446    default:
2447      break;
2448    case Intrinsic::aarch64_neon_st1x2: {
2449      if (VT == MVT::v8i8)
2450        return SelectStore(Node, 2, AArch64::ST1Twov8b);
2451      else if (VT == MVT::v16i8)
2452        return SelectStore(Node, 2, AArch64::ST1Twov16b);
2453      else if (VT == MVT::v4i16)
2454        return SelectStore(Node, 2, AArch64::ST1Twov4h);
2455      else if (VT == MVT::v8i16)
2456        return SelectStore(Node, 2, AArch64::ST1Twov8h);
2457      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2458        return SelectStore(Node, 2, AArch64::ST1Twov2s);
2459      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2460        return SelectStore(Node, 2, AArch64::ST1Twov4s);
2461      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2462        return SelectStore(Node, 2, AArch64::ST1Twov2d);
2463      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2464        return SelectStore(Node, 2, AArch64::ST1Twov1d);
2465      break;
2466    }
2467    case Intrinsic::aarch64_neon_st1x3: {
2468      if (VT == MVT::v8i8)
2469        return SelectStore(Node, 3, AArch64::ST1Threev8b);
2470      else if (VT == MVT::v16i8)
2471        return SelectStore(Node, 3, AArch64::ST1Threev16b);
2472      else if (VT == MVT::v4i16)
2473        return SelectStore(Node, 3, AArch64::ST1Threev4h);
2474      else if (VT == MVT::v8i16)
2475        return SelectStore(Node, 3, AArch64::ST1Threev8h);
2476      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2477        return SelectStore(Node, 3, AArch64::ST1Threev2s);
2478      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2479        return SelectStore(Node, 3, AArch64::ST1Threev4s);
2480      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2481        return SelectStore(Node, 3, AArch64::ST1Threev2d);
2482      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2483        return SelectStore(Node, 3, AArch64::ST1Threev1d);
2484      break;
2485    }
2486    case Intrinsic::aarch64_neon_st1x4: {
2487      if (VT == MVT::v8i8)
2488        return SelectStore(Node, 4, AArch64::ST1Fourv8b);
2489      else if (VT == MVT::v16i8)
2490        return SelectStore(Node, 4, AArch64::ST1Fourv16b);
2491      else if (VT == MVT::v4i16)
2492        return SelectStore(Node, 4, AArch64::ST1Fourv4h);
2493      else if (VT == MVT::v8i16)
2494        return SelectStore(Node, 4, AArch64::ST1Fourv8h);
2495      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2496        return SelectStore(Node, 4, AArch64::ST1Fourv2s);
2497      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2498        return SelectStore(Node, 4, AArch64::ST1Fourv4s);
2499      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2500        return SelectStore(Node, 4, AArch64::ST1Fourv2d);
2501      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2502        return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2503      break;
2504    }
2505    case Intrinsic::aarch64_neon_st2: {
2506      if (VT == MVT::v8i8)
2507        return SelectStore(Node, 2, AArch64::ST2Twov8b);
2508      else if (VT == MVT::v16i8)
2509        return SelectStore(Node, 2, AArch64::ST2Twov16b);
2510      else if (VT == MVT::v4i16)
2511        return SelectStore(Node, 2, AArch64::ST2Twov4h);
2512      else if (VT == MVT::v8i16)
2513        return SelectStore(Node, 2, AArch64::ST2Twov8h);
2514      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2515        return SelectStore(Node, 2, AArch64::ST2Twov2s);
2516      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2517        return SelectStore(Node, 2, AArch64::ST2Twov4s);
2518      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2519        return SelectStore(Node, 2, AArch64::ST2Twov2d);
2520      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2521        return SelectStore(Node, 2, AArch64::ST1Twov1d);
2522      break;
2523    }
2524    case Intrinsic::aarch64_neon_st3: {
2525      if (VT == MVT::v8i8)
2526        return SelectStore(Node, 3, AArch64::ST3Threev8b);
2527      else if (VT == MVT::v16i8)
2528        return SelectStore(Node, 3, AArch64::ST3Threev16b);
2529      else if (VT == MVT::v4i16)
2530        return SelectStore(Node, 3, AArch64::ST3Threev4h);
2531      else if (VT == MVT::v8i16)
2532        return SelectStore(Node, 3, AArch64::ST3Threev8h);
2533      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2534        return SelectStore(Node, 3, AArch64::ST3Threev2s);
2535      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2536        return SelectStore(Node, 3, AArch64::ST3Threev4s);
2537      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2538        return SelectStore(Node, 3, AArch64::ST3Threev2d);
2539      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2540        return SelectStore(Node, 3, AArch64::ST1Threev1d);
2541      break;
2542    }
2543    case Intrinsic::aarch64_neon_st4: {
2544      if (VT == MVT::v8i8)
2545        return SelectStore(Node, 4, AArch64::ST4Fourv8b);
2546      else if (VT == MVT::v16i8)
2547        return SelectStore(Node, 4, AArch64::ST4Fourv16b);
2548      else if (VT == MVT::v4i16)
2549        return SelectStore(Node, 4, AArch64::ST4Fourv4h);
2550      else if (VT == MVT::v8i16)
2551        return SelectStore(Node, 4, AArch64::ST4Fourv8h);
2552      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2553        return SelectStore(Node, 4, AArch64::ST4Fourv2s);
2554      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2555        return SelectStore(Node, 4, AArch64::ST4Fourv4s);
2556      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2557        return SelectStore(Node, 4, AArch64::ST4Fourv2d);
2558      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2559        return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2560      break;
2561    }
2562    case Intrinsic::aarch64_neon_st2lane: {
2563      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2564        return SelectStoreLane(Node, 2, AArch64::ST2i8);
2565      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2566        return SelectStoreLane(Node, 2, AArch64::ST2i16);
2567      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2568               VT == MVT::v2f32)
2569        return SelectStoreLane(Node, 2, AArch64::ST2i32);
2570      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2571               VT == MVT::v1f64)
2572        return SelectStoreLane(Node, 2, AArch64::ST2i64);
2573      break;
2574    }
2575    case Intrinsic::aarch64_neon_st3lane: {
2576      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2577        return SelectStoreLane(Node, 3, AArch64::ST3i8);
2578      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2579        return SelectStoreLane(Node, 3, AArch64::ST3i16);
2580      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2581               VT == MVT::v2f32)
2582        return SelectStoreLane(Node, 3, AArch64::ST3i32);
2583      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2584               VT == MVT::v1f64)
2585        return SelectStoreLane(Node, 3, AArch64::ST3i64);
2586      break;
2587    }
2588    case Intrinsic::aarch64_neon_st4lane: {
2589      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2590        return SelectStoreLane(Node, 4, AArch64::ST4i8);
2591      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2592        return SelectStoreLane(Node, 4, AArch64::ST4i16);
2593      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2594               VT == MVT::v2f32)
2595        return SelectStoreLane(Node, 4, AArch64::ST4i32);
2596      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2597               VT == MVT::v1f64)
2598        return SelectStoreLane(Node, 4, AArch64::ST4i64);
2599      break;
2600    }
2601    }
2602  }
2603  case AArch64ISD::LD2post: {
2604    if (VT == MVT::v8i8)
2605      return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
2606    else if (VT == MVT::v16i8)
2607      return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
2608    else if (VT == MVT::v4i16)
2609      return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
2610    else if (VT == MVT::v8i16)
2611      return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
2612    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2613      return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
2614    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2615      return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
2616    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2617      return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2618    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2619      return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
2620    break;
2621  }
2622  case AArch64ISD::LD3post: {
2623    if (VT == MVT::v8i8)
2624      return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
2625    else if (VT == MVT::v16i8)
2626      return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
2627    else if (VT == MVT::v4i16)
2628      return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
2629    else if (VT == MVT::v8i16)
2630      return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
2631    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2632      return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
2633    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2634      return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
2635    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2636      return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2637    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2638      return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
2639    break;
2640  }
2641  case AArch64ISD::LD4post: {
2642    if (VT == MVT::v8i8)
2643      return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
2644    else if (VT == MVT::v16i8)
2645      return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
2646    else if (VT == MVT::v4i16)
2647      return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
2648    else if (VT == MVT::v8i16)
2649      return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
2650    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2651      return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
2652    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2653      return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
2654    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2655      return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2656    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2657      return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
2658    break;
2659  }
2660  case AArch64ISD::LD1x2post: {
2661    if (VT == MVT::v8i8)
2662      return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
2663    else if (VT == MVT::v16i8)
2664      return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
2665    else if (VT == MVT::v4i16)
2666      return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
2667    else if (VT == MVT::v8i16)
2668      return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
2669    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2670      return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
2671    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2672      return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
2673    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2674      return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2675    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2676      return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
2677    break;
2678  }
2679  case AArch64ISD::LD1x3post: {
2680    if (VT == MVT::v8i8)
2681      return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
2682    else if (VT == MVT::v16i8)
2683      return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
2684    else if (VT == MVT::v4i16)
2685      return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
2686    else if (VT == MVT::v8i16)
2687      return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
2688    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2689      return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
2690    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2691      return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
2692    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2693      return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2694    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2695      return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
2696    break;
2697  }
2698  case AArch64ISD::LD1x4post: {
2699    if (VT == MVT::v8i8)
2700      return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
2701    else if (VT == MVT::v16i8)
2702      return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
2703    else if (VT == MVT::v4i16)
2704      return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
2705    else if (VT == MVT::v8i16)
2706      return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
2707    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2708      return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
2709    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2710      return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
2711    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2712      return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2713    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2714      return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
2715    break;
2716  }
2717  case AArch64ISD::LD1DUPpost: {
2718    if (VT == MVT::v8i8)
2719      return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
2720    else if (VT == MVT::v16i8)
2721      return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
2722    else if (VT == MVT::v4i16)
2723      return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
2724    else if (VT == MVT::v8i16)
2725      return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
2726    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2727      return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
2728    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2729      return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
2730    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2731      return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
2732    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2733      return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
2734    break;
2735  }
2736  case AArch64ISD::LD2DUPpost: {
2737    if (VT == MVT::v8i8)
2738      return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
2739    else if (VT == MVT::v16i8)
2740      return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
2741    else if (VT == MVT::v4i16)
2742      return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
2743    else if (VT == MVT::v8i16)
2744      return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
2745    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2746      return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
2747    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2748      return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
2749    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2750      return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
2751    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2752      return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
2753    break;
2754  }
2755  case AArch64ISD::LD3DUPpost: {
2756    if (VT == MVT::v8i8)
2757      return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
2758    else if (VT == MVT::v16i8)
2759      return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
2760    else if (VT == MVT::v4i16)
2761      return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
2762    else if (VT == MVT::v8i16)
2763      return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
2764    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2765      return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
2766    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2767      return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
2768    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2769      return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
2770    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2771      return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
2772    break;
2773  }
2774  case AArch64ISD::LD4DUPpost: {
2775    if (VT == MVT::v8i8)
2776      return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
2777    else if (VT == MVT::v16i8)
2778      return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
2779    else if (VT == MVT::v4i16)
2780      return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
2781    else if (VT == MVT::v8i16)
2782      return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
2783    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2784      return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
2785    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2786      return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
2787    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2788      return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
2789    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2790      return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
2791    break;
2792  }
2793  case AArch64ISD::LD1LANEpost: {
2794    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2795      return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
2796    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2797      return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
2798    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2799             VT == MVT::v2f32)
2800      return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
2801    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2802             VT == MVT::v1f64)
2803      return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
2804    break;
2805  }
2806  case AArch64ISD::LD2LANEpost: {
2807    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2808      return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
2809    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2810      return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
2811    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2812             VT == MVT::v2f32)
2813      return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
2814    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2815             VT == MVT::v1f64)
2816      return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
2817    break;
2818  }
2819  case AArch64ISD::LD3LANEpost: {
2820    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2821      return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
2822    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2823      return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
2824    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2825             VT == MVT::v2f32)
2826      return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
2827    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2828             VT == MVT::v1f64)
2829      return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
2830    break;
2831  }
2832  case AArch64ISD::LD4LANEpost: {
2833    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2834      return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
2835    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2836      return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
2837    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2838             VT == MVT::v2f32)
2839      return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
2840    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2841             VT == MVT::v1f64)
2842      return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
2843    break;
2844  }
2845  case AArch64ISD::ST2post: {
2846    VT = Node->getOperand(1).getValueType();
2847    if (VT == MVT::v8i8)
2848      return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
2849    else if (VT == MVT::v16i8)
2850      return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
2851    else if (VT == MVT::v4i16)
2852      return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
2853    else if (VT == MVT::v8i16)
2854      return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
2855    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2856      return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
2857    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2858      return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
2859    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2860      return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
2861    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2862      return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
2863    break;
2864  }
2865  case AArch64ISD::ST3post: {
2866    VT = Node->getOperand(1).getValueType();
2867    if (VT == MVT::v8i8)
2868      return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
2869    else if (VT == MVT::v16i8)
2870      return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
2871    else if (VT == MVT::v4i16)
2872      return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
2873    else if (VT == MVT::v8i16)
2874      return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
2875    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2876      return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
2877    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2878      return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
2879    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2880      return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
2881    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2882      return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
2883    break;
2884  }
2885  case AArch64ISD::ST4post: {
2886    VT = Node->getOperand(1).getValueType();
2887    if (VT == MVT::v8i8)
2888      return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
2889    else if (VT == MVT::v16i8)
2890      return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
2891    else if (VT == MVT::v4i16)
2892      return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
2893    else if (VT == MVT::v8i16)
2894      return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
2895    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2896      return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
2897    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2898      return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
2899    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2900      return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
2901    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2902      return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
2903    break;
2904  }
2905  case AArch64ISD::ST1x2post: {
2906    VT = Node->getOperand(1).getValueType();
2907    if (VT == MVT::v8i8)
2908      return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
2909    else if (VT == MVT::v16i8)
2910      return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
2911    else if (VT == MVT::v4i16)
2912      return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
2913    else if (VT == MVT::v8i16)
2914      return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
2915    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2916      return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
2917    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2918      return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
2919    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2920      return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
2921    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2922      return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
2923    break;
2924  }
2925  case AArch64ISD::ST1x3post: {
2926    VT = Node->getOperand(1).getValueType();
2927    if (VT == MVT::v8i8)
2928      return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
2929    else if (VT == MVT::v16i8)
2930      return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
2931    else if (VT == MVT::v4i16)
2932      return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
2933    else if (VT == MVT::v8i16)
2934      return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
2935    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2936      return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
2937    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2938      return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
2939    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2940      return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
2941    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2942      return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
2943    break;
2944  }
2945  case AArch64ISD::ST1x4post: {
2946    VT = Node->getOperand(1).getValueType();
2947    if (VT == MVT::v8i8)
2948      return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
2949    else if (VT == MVT::v16i8)
2950      return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
2951    else if (VT == MVT::v4i16)
2952      return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
2953    else if (VT == MVT::v8i16)
2954      return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
2955    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2956      return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
2957    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2958      return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
2959    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2960      return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
2961    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2962      return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
2963    break;
2964  }
2965  case AArch64ISD::ST2LANEpost: {
2966    VT = Node->getOperand(1).getValueType();
2967    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2968      return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
2969    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2970      return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
2971    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2972             VT == MVT::v2f32)
2973      return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
2974    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2975             VT == MVT::v1f64)
2976      return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
2977    break;
2978  }
2979  case AArch64ISD::ST3LANEpost: {
2980    VT = Node->getOperand(1).getValueType();
2981    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2982      return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
2983    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2984      return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
2985    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2986             VT == MVT::v2f32)
2987      return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
2988    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2989             VT == MVT::v1f64)
2990      return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
2991    break;
2992  }
2993  case AArch64ISD::ST4LANEpost: {
2994    VT = Node->getOperand(1).getValueType();
2995    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2996      return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
2997    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2998      return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
2999    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3000             VT == MVT::v2f32)
3001      return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
3002    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3003             VT == MVT::v1f64)
3004      return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
3005    break;
3006  }
3007
3008  case ISD::FCEIL:
3009  case ISD::FFLOOR:
3010  case ISD::FTRUNC:
3011  case ISD::FROUND:
3012    if (SDNode *I = SelectLIBM(Node))
3013      return I;
3014    break;
3015  }
3016
3017  // Select the default instruction
3018  ResNode = SelectCode(Node);
3019
3020  DEBUG(errs() << "=> ");
3021  if (ResNode == nullptr || ResNode == Node)
3022    DEBUG(Node->dump(CurDAG));
3023  else
3024    DEBUG(ResNode->dump(CurDAG));
3025  DEBUG(errs() << "\n");
3026
3027  return ResNode;
3028}
3029
3030/// createAArch64ISelDag - This pass converts a legalized DAG into a
3031/// AArch64-specific DAG, ready for instruction scheduling.
3032FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
3033                                         CodeGenOpt::Level OptLevel) {
3034  return new AArch64DAGToDAGISel(TM, OptLevel);
3035}
3036