AArch64ISelDAGToDAG.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the AArch64 target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64TargetMachine.h"
15#include "MCTargetDesc/AArch64AddressingModes.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/CodeGen/SelectionDAGISel.h"
18#include "llvm/IR/Function.h" // To access function attributes.
19#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/Intrinsics.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/ErrorHandling.h"
23#include "llvm/Support/MathExtras.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "aarch64-isel"
29
30//===--------------------------------------------------------------------===//
31/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32/// instructions for SelectionDAG operations.
33///
34namespace {
35
36class AArch64DAGToDAGISel : public SelectionDAGISel {
37  AArch64TargetMachine &TM;
38
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42
43  bool ForCodeSize;
44
45public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47                               CodeGenOpt::Level OptLevel)
48      : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
49        ForCodeSize(false) {}
50
51  const char *getPassName() const override {
52    return "AArch64 Instruction Selection";
53  }
54
55  bool runOnMachineFunction(MachineFunction &MF) override {
56    AttributeSet FnAttrs = MF.getFunction()->getAttributes();
57    ForCodeSize =
58        FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
59                             Attribute::OptimizeForSize) ||
60        FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
61    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
62    return SelectionDAGISel::runOnMachineFunction(MF);
63  }
64
65  SDNode *Select(SDNode *Node) override;
66
67  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
68  /// inline asm expressions.
69  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
70                                    char ConstraintCode,
71                                    std::vector<SDValue> &OutOps) override;
72
73  SDNode *SelectMLAV64LaneV128(SDNode *N);
74  SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
75  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79    return SelectShiftedRegister(N, false, Reg, Shift);
80  }
81  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82    return SelectShiftedRegister(N, true, Reg, Shift);
83  }
84  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
85    return SelectAddrModeIndexed(N, 1, Base, OffImm);
86  }
87  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
88    return SelectAddrModeIndexed(N, 2, Base, OffImm);
89  }
90  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
91    return SelectAddrModeIndexed(N, 4, Base, OffImm);
92  }
93  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
94    return SelectAddrModeIndexed(N, 8, Base, OffImm);
95  }
96  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
97    return SelectAddrModeIndexed(N, 16, Base, OffImm);
98  }
99  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
100    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
101  }
102  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
103    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
104  }
105  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
106    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
107  }
108  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
109    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
110  }
111  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
112    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
113  }
114
115  template<int Width>
116  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
117                         SDValue &SignExtend, SDValue &DoShift) {
118    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
119  }
120
121  template<int Width>
122  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
123                         SDValue &SignExtend, SDValue &DoShift) {
124    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
125  }
126
127
128  /// Form sequences of consecutive 64/128-bit registers for use in NEON
129  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
130  /// between 1 and 4 elements. If it contains a single element that is returned
131  /// unchanged; otherwise a REG_SEQUENCE value is returned.
132  SDValue createDTuple(ArrayRef<SDValue> Vecs);
133  SDValue createQTuple(ArrayRef<SDValue> Vecs);
134
135  /// Generic helper for the createDTuple/createQTuple
136  /// functions. Those should almost always be called instead.
137  SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
138                      unsigned SubRegs[]);
139
140  SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
141
142  SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
143
144  SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
145                     unsigned SubRegIdx);
146  SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
147                         unsigned SubRegIdx);
148  SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
149  SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
150
151  SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
152  SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
153  SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
154  SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
155
156  SDNode *SelectBitfieldExtractOp(SDNode *N);
157  SDNode *SelectBitfieldInsertOp(SDNode *N);
158
159  SDNode *SelectLIBM(SDNode *N);
160
161// Include the pieces autogenerated from the target description.
162#include "AArch64GenDAGISel.inc"
163
164private:
165  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
166                             SDValue &Shift);
167  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
168                             SDValue &OffImm);
169  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
170                              SDValue &OffImm);
171  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
172                         SDValue &Offset, SDValue &SignExtend,
173                         SDValue &DoShift);
174  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
175                         SDValue &Offset, SDValue &SignExtend,
176                         SDValue &DoShift);
177  bool isWorthFolding(SDValue V) const;
178  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
179                         SDValue &Offset, SDValue &SignExtend);
180
181  template<unsigned RegWidth>
182  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
183    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
184  }
185
186  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
187};
188} // end anonymous namespace
189
190/// isIntImmediate - This method tests to see if the node is a constant
191/// operand. If so Imm will receive the 32-bit value.
192static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
193  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
194    Imm = C->getZExtValue();
195    return true;
196  }
197  return false;
198}
199
200// isIntImmediate - This method tests to see if a constant operand.
201// If so Imm will receive the value.
202static bool isIntImmediate(SDValue N, uint64_t &Imm) {
203  return isIntImmediate(N.getNode(), Imm);
204}
205
206// isOpcWithIntImmediate - This method tests to see if the node is a specific
207// opcode and that it has a immediate integer right operand.
208// If so Imm will receive the 32 bit value.
209static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
210                                  uint64_t &Imm) {
211  return N->getOpcode() == Opc &&
212         isIntImmediate(N->getOperand(1).getNode(), Imm);
213}
214
215bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
216    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
217  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
218  // Require the address to be in a register.  That is safe for all AArch64
219  // variants and it is hard to do anything much smarter without knowing
220  // how the operand is used.
221  OutOps.push_back(Op);
222  return false;
223}
224
225/// SelectArithImmed - Select an immediate value that can be represented as
226/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
227/// Val set to the 12-bit value and Shift set to the shifter operand.
228bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
229                                           SDValue &Shift) {
230  // This function is called from the addsub_shifted_imm ComplexPattern,
231  // which lists [imm] as the list of opcode it's interested in, however
232  // we still need to check whether the operand is actually an immediate
233  // here because the ComplexPattern opcode list is only used in
234  // root-level opcode matching.
235  if (!isa<ConstantSDNode>(N.getNode()))
236    return false;
237
238  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
239  unsigned ShiftAmt;
240
241  if (Immed >> 12 == 0) {
242    ShiftAmt = 0;
243  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
244    ShiftAmt = 12;
245    Immed = Immed >> 12;
246  } else
247    return false;
248
249  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
250  Val = CurDAG->getTargetConstant(Immed, MVT::i32);
251  Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
252  return true;
253}
254
255/// SelectNegArithImmed - As above, but negates the value before trying to
256/// select it.
257bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
258                                              SDValue &Shift) {
259  // This function is called from the addsub_shifted_imm ComplexPattern,
260  // which lists [imm] as the list of opcode it's interested in, however
261  // we still need to check whether the operand is actually an immediate
262  // here because the ComplexPattern opcode list is only used in
263  // root-level opcode matching.
264  if (!isa<ConstantSDNode>(N.getNode()))
265    return false;
266
267  // The immediate operand must be a 24-bit zero-extended immediate.
268  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
269
270  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
271  // have the opposite effect on the C flag, so this pattern mustn't match under
272  // those circumstances.
273  if (Immed == 0)
274    return false;
275
276  if (N.getValueType() == MVT::i32)
277    Immed = ~((uint32_t)Immed) + 1;
278  else
279    Immed = ~Immed + 1ULL;
280  if (Immed & 0xFFFFFFFFFF000000ULL)
281    return false;
282
283  Immed &= 0xFFFFFFULL;
284  return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
285}
286
287/// getShiftTypeForNode - Translate a shift node to the corresponding
288/// ShiftType value.
289static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
290  switch (N.getOpcode()) {
291  default:
292    return AArch64_AM::InvalidShiftExtend;
293  case ISD::SHL:
294    return AArch64_AM::LSL;
295  case ISD::SRL:
296    return AArch64_AM::LSR;
297  case ISD::SRA:
298    return AArch64_AM::ASR;
299  case ISD::ROTR:
300    return AArch64_AM::ROR;
301  }
302}
303
304/// \brief Determine wether it is worth to fold V into an extended register.
305bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
306  // it hurts if the a value is used at least twice, unless we are optimizing
307  // for code size.
308  if (ForCodeSize || V.hasOneUse())
309    return true;
310  return false;
311}
312
313/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
314/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
315/// instructions allow the shifted register to be rotated, but the arithmetic
316/// instructions do not.  The AllowROR parameter specifies whether ROR is
317/// supported.
318bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
319                                                SDValue &Reg, SDValue &Shift) {
320  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
321  if (ShType == AArch64_AM::InvalidShiftExtend)
322    return false;
323  if (!AllowROR && ShType == AArch64_AM::ROR)
324    return false;
325
326  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
327    unsigned BitSize = N.getValueType().getSizeInBits();
328    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
329    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
330
331    Reg = N.getOperand(0);
332    Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
333    return isWorthFolding(N);
334  }
335
336  return false;
337}
338
339/// getExtendTypeForNode - Translate an extend node to the corresponding
340/// ExtendType value.
341static AArch64_AM::ShiftExtendType
342getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
343  if (N.getOpcode() == ISD::SIGN_EXTEND ||
344      N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
345    EVT SrcVT;
346    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
347      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
348    else
349      SrcVT = N.getOperand(0).getValueType();
350
351    if (!IsLoadStore && SrcVT == MVT::i8)
352      return AArch64_AM::SXTB;
353    else if (!IsLoadStore && SrcVT == MVT::i16)
354      return AArch64_AM::SXTH;
355    else if (SrcVT == MVT::i32)
356      return AArch64_AM::SXTW;
357    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
358
359    return AArch64_AM::InvalidShiftExtend;
360  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
361             N.getOpcode() == ISD::ANY_EXTEND) {
362    EVT SrcVT = N.getOperand(0).getValueType();
363    if (!IsLoadStore && SrcVT == MVT::i8)
364      return AArch64_AM::UXTB;
365    else if (!IsLoadStore && SrcVT == MVT::i16)
366      return AArch64_AM::UXTH;
367    else if (SrcVT == MVT::i32)
368      return AArch64_AM::UXTW;
369    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
370
371    return AArch64_AM::InvalidShiftExtend;
372  } else if (N.getOpcode() == ISD::AND) {
373    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
374    if (!CSD)
375      return AArch64_AM::InvalidShiftExtend;
376    uint64_t AndMask = CSD->getZExtValue();
377
378    switch (AndMask) {
379    default:
380      return AArch64_AM::InvalidShiftExtend;
381    case 0xFF:
382      return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
383    case 0xFFFF:
384      return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
385    case 0xFFFFFFFF:
386      return AArch64_AM::UXTW;
387    }
388  }
389
390  return AArch64_AM::InvalidShiftExtend;
391}
392
393// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
394static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
395  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
396      DL->getOpcode() != AArch64ISD::DUPLANE32)
397    return false;
398
399  SDValue SV = DL->getOperand(0);
400  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
401    return false;
402
403  SDValue EV = SV.getOperand(1);
404  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
405    return false;
406
407  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
408  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
409  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
410  LaneOp = EV.getOperand(0);
411
412  return true;
413}
414
415// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
416// high lane extract.
417static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
418                             SDValue &LaneOp, int &LaneIdx) {
419
420  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
421    std::swap(Op0, Op1);
422    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
423      return false;
424  }
425  StdOp = Op1;
426  return true;
427}
428
429/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
430/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
431/// so that we don't emit unnecessary lane extracts.
432SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
433  SDValue Op0 = N->getOperand(0);
434  SDValue Op1 = N->getOperand(1);
435  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
436  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
437  int LaneIdx = -1; // Will hold the lane index.
438
439  if (Op1.getOpcode() != ISD::MUL ||
440      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
441                        LaneIdx)) {
442    std::swap(Op0, Op1);
443    if (Op1.getOpcode() != ISD::MUL ||
444        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
445                          LaneIdx))
446      return nullptr;
447  }
448
449  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
450
451  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
452
453  unsigned MLAOpc = ~0U;
454
455  switch (N->getSimpleValueType(0).SimpleTy) {
456  default:
457    llvm_unreachable("Unrecognized MLA.");
458  case MVT::v4i16:
459    MLAOpc = AArch64::MLAv4i16_indexed;
460    break;
461  case MVT::v8i16:
462    MLAOpc = AArch64::MLAv8i16_indexed;
463    break;
464  case MVT::v2i32:
465    MLAOpc = AArch64::MLAv2i32_indexed;
466    break;
467  case MVT::v4i32:
468    MLAOpc = AArch64::MLAv4i32_indexed;
469    break;
470  }
471
472  return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
473}
474
475SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
476  SDValue SMULLOp0;
477  SDValue SMULLOp1;
478  int LaneIdx;
479
480  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
481                        LaneIdx))
482    return nullptr;
483
484  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
485
486  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
487
488  unsigned SMULLOpc = ~0U;
489
490  if (IntNo == Intrinsic::aarch64_neon_smull) {
491    switch (N->getSimpleValueType(0).SimpleTy) {
492    default:
493      llvm_unreachable("Unrecognized SMULL.");
494    case MVT::v4i32:
495      SMULLOpc = AArch64::SMULLv4i16_indexed;
496      break;
497    case MVT::v2i64:
498      SMULLOpc = AArch64::SMULLv2i32_indexed;
499      break;
500    }
501  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
502    switch (N->getSimpleValueType(0).SimpleTy) {
503    default:
504      llvm_unreachable("Unrecognized SMULL.");
505    case MVT::v4i32:
506      SMULLOpc = AArch64::UMULLv4i16_indexed;
507      break;
508    case MVT::v2i64:
509      SMULLOpc = AArch64::UMULLv2i32_indexed;
510      break;
511    }
512  } else
513    llvm_unreachable("Unrecognized intrinsic.");
514
515  return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
516}
517
518/// Instructions that accept extend modifiers like UXTW expect the register
519/// being extended to be a GPR32, but the incoming DAG might be acting on a
520/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
521/// this is the case.
522static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
523  if (N.getValueType() == MVT::i32)
524    return N;
525
526  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
527  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
528                                               SDLoc(N), MVT::i32, N, SubReg);
529  return SDValue(Node, 0);
530}
531
532
533/// SelectArithExtendedRegister - Select a "extended register" operand.  This
534/// operand folds in an extend followed by an optional left shift.
535bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
536                                                      SDValue &Shift) {
537  unsigned ShiftVal = 0;
538  AArch64_AM::ShiftExtendType Ext;
539
540  if (N.getOpcode() == ISD::SHL) {
541    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
542    if (!CSD)
543      return false;
544    ShiftVal = CSD->getZExtValue();
545    if (ShiftVal > 4)
546      return false;
547
548    Ext = getExtendTypeForNode(N.getOperand(0));
549    if (Ext == AArch64_AM::InvalidShiftExtend)
550      return false;
551
552    Reg = N.getOperand(0).getOperand(0);
553  } else {
554    Ext = getExtendTypeForNode(N);
555    if (Ext == AArch64_AM::InvalidShiftExtend)
556      return false;
557
558    Reg = N.getOperand(0);
559  }
560
561  // AArch64 mandates that the RHS of the operation must use the smallest
562  // register classs that could contain the size being extended from.  Thus,
563  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
564  // there might not be an actual 32-bit value in the program.  We can
565  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
566  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
567  Reg = narrowIfNeeded(CurDAG, Reg);
568  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
569  return isWorthFolding(N);
570}
571
572/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
573/// immediate" address.  The "Size" argument is the size in bytes of the memory
574/// reference, which determines the scale.
575bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
576                                              SDValue &Base, SDValue &OffImm) {
577  const TargetLowering *TLI = getTargetLowering();
578  if (N.getOpcode() == ISD::FrameIndex) {
579    int FI = cast<FrameIndexSDNode>(N)->getIndex();
580    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
581    OffImm = CurDAG->getTargetConstant(0, MVT::i64);
582    return true;
583  }
584
585  if (N.getOpcode() == AArch64ISD::ADDlow) {
586    GlobalAddressSDNode *GAN =
587        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
588    Base = N.getOperand(0);
589    OffImm = N.getOperand(1);
590    if (!GAN)
591      return true;
592
593    const GlobalValue *GV = GAN->getGlobal();
594    unsigned Alignment = GV->getAlignment();
595    const DataLayout *DL = TLI->getDataLayout();
596    Type *Ty = GV->getType()->getElementType();
597    if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
598      Alignment = DL->getABITypeAlignment(Ty);
599
600    if (Alignment >= Size)
601      return true;
602  }
603
604  if (CurDAG->isBaseWithConstantOffset(N)) {
605    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
606      int64_t RHSC = (int64_t)RHS->getZExtValue();
607      unsigned Scale = Log2_32(Size);
608      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
609        Base = N.getOperand(0);
610        if (Base.getOpcode() == ISD::FrameIndex) {
611          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
612          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
613        }
614        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
615        return true;
616      }
617    }
618  }
619
620  // Before falling back to our general case, check if the unscaled
621  // instructions can handle this. If so, that's preferable.
622  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
623    return false;
624
625  // Base only. The address will be materialized into a register before
626  // the memory is accessed.
627  //    add x0, Xbase, #offset
628  //    ldr x0, [x0]
629  Base = N;
630  OffImm = CurDAG->getTargetConstant(0, MVT::i64);
631  return true;
632}
633
634/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
635/// immediate" address.  This should only match when there is an offset that
636/// is not valid for a scaled immediate addressing mode.  The "Size" argument
637/// is the size in bytes of the memory reference, which is needed here to know
638/// what is valid for a scaled immediate.
639bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
640                                                 SDValue &Base,
641                                                 SDValue &OffImm) {
642  if (!CurDAG->isBaseWithConstantOffset(N))
643    return false;
644  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
645    int64_t RHSC = RHS->getSExtValue();
646    // If the offset is valid as a scaled immediate, don't match here.
647    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
648        RHSC < (0x1000 << Log2_32(Size)))
649      return false;
650    if (RHSC >= -256 && RHSC < 256) {
651      Base = N.getOperand(0);
652      if (Base.getOpcode() == ISD::FrameIndex) {
653        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
654        const TargetLowering *TLI = getTargetLowering();
655        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
656      }
657      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
658      return true;
659    }
660  }
661  return false;
662}
663
664static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
665  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
666  SDValue ImpDef = SDValue(
667      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
668      0);
669  MachineSDNode *Node = CurDAG->getMachineNode(
670      TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
671  return SDValue(Node, 0);
672}
673
674/// \brief Check if the given SHL node (\p N), can be used to form an
675/// extended register for an addressing mode.
676bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
677                                            bool WantExtend, SDValue &Offset,
678                                            SDValue &SignExtend) {
679  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
680  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
681  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
682    return false;
683
684  if (WantExtend) {
685    AArch64_AM::ShiftExtendType Ext =
686        getExtendTypeForNode(N.getOperand(0), true);
687    if (Ext == AArch64_AM::InvalidShiftExtend)
688      return false;
689
690    Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
691    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
692  } else {
693    Offset = N.getOperand(0);
694    SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
695  }
696
697  unsigned LegalShiftVal = Log2_32(Size);
698  unsigned ShiftVal = CSD->getZExtValue();
699
700  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
701    return false;
702
703  if (isWorthFolding(N))
704    return true;
705
706  return false;
707}
708
709bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
710                                            SDValue &Base, SDValue &Offset,
711                                            SDValue &SignExtend,
712                                            SDValue &DoShift) {
713  if (N.getOpcode() != ISD::ADD)
714    return false;
715  SDValue LHS = N.getOperand(0);
716  SDValue RHS = N.getOperand(1);
717
718  // We don't want to match immediate adds here, because they are better lowered
719  // to the register-immediate addressing modes.
720  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
721    return false;
722
723  // Check if this particular node is reused in any non-memory related
724  // operation.  If yes, do not try to fold this node into the address
725  // computation, since the computation will be kept.
726  const SDNode *Node = N.getNode();
727  for (SDNode *UI : Node->uses()) {
728    if (!isa<MemSDNode>(*UI))
729      return false;
730  }
731
732  // Remember if it is worth folding N when it produces extended register.
733  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
734
735  // Try to match a shifted extend on the RHS.
736  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
737      SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
738    Base = LHS;
739    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
740    return true;
741  }
742
743  // Try to match a shifted extend on the LHS.
744  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
745      SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
746    Base = RHS;
747    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
748    return true;
749  }
750
751  // There was no shift, whatever else we find.
752  DoShift = CurDAG->getTargetConstant(false, MVT::i32);
753
754  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
755  // Try to match an unshifted extend on the LHS.
756  if (IsExtendedRegisterWorthFolding &&
757      (Ext = getExtendTypeForNode(LHS, true)) !=
758          AArch64_AM::InvalidShiftExtend) {
759    Base = RHS;
760    Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
761    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
762    if (isWorthFolding(LHS))
763      return true;
764  }
765
766  // Try to match an unshifted extend on the RHS.
767  if (IsExtendedRegisterWorthFolding &&
768      (Ext = getExtendTypeForNode(RHS, true)) !=
769          AArch64_AM::InvalidShiftExtend) {
770    Base = LHS;
771    Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
772    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
773    if (isWorthFolding(RHS))
774      return true;
775  }
776
777  return false;
778}
779
780bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
781                                            SDValue &Base, SDValue &Offset,
782                                            SDValue &SignExtend,
783                                            SDValue &DoShift) {
784  if (N.getOpcode() != ISD::ADD)
785    return false;
786  SDValue LHS = N.getOperand(0);
787  SDValue RHS = N.getOperand(1);
788
789  // We don't want to match immediate adds here, because they are better lowered
790  // to the register-immediate addressing modes.
791  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
792    return false;
793
794  // Check if this particular node is reused in any non-memory related
795  // operation.  If yes, do not try to fold this node into the address
796  // computation, since the computation will be kept.
797  const SDNode *Node = N.getNode();
798  for (SDNode *UI : Node->uses()) {
799    if (!isa<MemSDNode>(*UI))
800      return false;
801  }
802
803  // Remember if it is worth folding N when it produces extended register.
804  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
805
806  // Try to match a shifted extend on the RHS.
807  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
808      SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
809    Base = LHS;
810    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
811    return true;
812  }
813
814  // Try to match a shifted extend on the LHS.
815  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
816      SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
817    Base = RHS;
818    DoShift = CurDAG->getTargetConstant(true, MVT::i32);
819    return true;
820  }
821
822  // Match any non-shifted, non-extend, non-immediate add expression.
823  Base = LHS;
824  Offset = RHS;
825  SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
826  DoShift = CurDAG->getTargetConstant(false, MVT::i32);
827  // Reg1 + Reg2 is free: no check needed.
828  return true;
829}
830
831SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
832  static unsigned RegClassIDs[] = {
833      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
834  static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
835                                AArch64::dsub2, AArch64::dsub3 };
836
837  return createTuple(Regs, RegClassIDs, SubRegs);
838}
839
840SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
841  static unsigned RegClassIDs[] = {
842      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
843  static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
844                                AArch64::qsub2, AArch64::qsub3 };
845
846  return createTuple(Regs, RegClassIDs, SubRegs);
847}
848
849SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
850                                         unsigned RegClassIDs[],
851                                         unsigned SubRegs[]) {
852  // There's no special register-class for a vector-list of 1 element: it's just
853  // a vector.
854  if (Regs.size() == 1)
855    return Regs[0];
856
857  assert(Regs.size() >= 2 && Regs.size() <= 4);
858
859  SDLoc DL(Regs[0].getNode());
860
861  SmallVector<SDValue, 4> Ops;
862
863  // First operand of REG_SEQUENCE is the desired RegClass.
864  Ops.push_back(
865      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
866
867  // Then we get pairs of source & subregister-position for the components.
868  for (unsigned i = 0; i < Regs.size(); ++i) {
869    Ops.push_back(Regs[i]);
870    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
871  }
872
873  SDNode *N =
874      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
875  return SDValue(N, 0);
876}
877
878SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
879                                         unsigned Opc, bool isExt) {
880  SDLoc dl(N);
881  EVT VT = N->getValueType(0);
882
883  unsigned ExtOff = isExt;
884
885  // Form a REG_SEQUENCE to force register allocation.
886  unsigned Vec0Off = ExtOff + 1;
887  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
888                               N->op_begin() + Vec0Off + NumVecs);
889  SDValue RegSeq = createQTuple(Regs);
890
891  SmallVector<SDValue, 6> Ops;
892  if (isExt)
893    Ops.push_back(N->getOperand(1));
894  Ops.push_back(RegSeq);
895  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
896  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
897}
898
899SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
900  LoadSDNode *LD = cast<LoadSDNode>(N);
901  if (LD->isUnindexed())
902    return nullptr;
903  EVT VT = LD->getMemoryVT();
904  EVT DstVT = N->getValueType(0);
905  ISD::MemIndexedMode AM = LD->getAddressingMode();
906  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
907
908  // We're not doing validity checking here. That was done when checking
909  // if we should mark the load as indexed or not. We're just selecting
910  // the right instruction.
911  unsigned Opcode = 0;
912
913  ISD::LoadExtType ExtType = LD->getExtensionType();
914  bool InsertTo64 = false;
915  if (VT == MVT::i64)
916    Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
917  else if (VT == MVT::i32) {
918    if (ExtType == ISD::NON_EXTLOAD)
919      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
920    else if (ExtType == ISD::SEXTLOAD)
921      Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
922    else {
923      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
924      InsertTo64 = true;
925      // The result of the load is only i32. It's the subreg_to_reg that makes
926      // it into an i64.
927      DstVT = MVT::i32;
928    }
929  } else if (VT == MVT::i16) {
930    if (ExtType == ISD::SEXTLOAD) {
931      if (DstVT == MVT::i64)
932        Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
933      else
934        Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
935    } else {
936      Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
937      InsertTo64 = DstVT == MVT::i64;
938      // The result of the load is only i32. It's the subreg_to_reg that makes
939      // it into an i64.
940      DstVT = MVT::i32;
941    }
942  } else if (VT == MVT::i8) {
943    if (ExtType == ISD::SEXTLOAD) {
944      if (DstVT == MVT::i64)
945        Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
946      else
947        Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
948    } else {
949      Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
950      InsertTo64 = DstVT == MVT::i64;
951      // The result of the load is only i32. It's the subreg_to_reg that makes
952      // it into an i64.
953      DstVT = MVT::i32;
954    }
955  } else if (VT == MVT::f32) {
956    Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
957  } else if (VT == MVT::f64 || VT.is64BitVector()) {
958    Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
959  } else if (VT.is128BitVector()) {
960    Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
961  } else
962    return nullptr;
963  SDValue Chain = LD->getChain();
964  SDValue Base = LD->getBasePtr();
965  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
966  int OffsetVal = (int)OffsetOp->getZExtValue();
967  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
968  SDValue Ops[] = { Base, Offset, Chain };
969  SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
970                                       MVT::Other, Ops);
971  // Either way, we're replacing the node, so tell the caller that.
972  Done = true;
973  SDValue LoadedVal = SDValue(Res, 1);
974  if (InsertTo64) {
975    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
976    LoadedVal =
977        SDValue(CurDAG->getMachineNode(
978                    AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
979                    CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
980                0);
981  }
982
983  ReplaceUses(SDValue(N, 0), LoadedVal);
984  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
985  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
986
987  return nullptr;
988}
989
990SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
991                                        unsigned Opc, unsigned SubRegIdx) {
992  SDLoc dl(N);
993  EVT VT = N->getValueType(0);
994  SDValue Chain = N->getOperand(0);
995
996  SmallVector<SDValue, 6> Ops;
997  Ops.push_back(N->getOperand(2)); // Mem operand;
998  Ops.push_back(Chain);
999
1000  std::vector<EVT> ResTys;
1001  ResTys.push_back(MVT::Untyped);
1002  ResTys.push_back(MVT::Other);
1003
1004  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1005  SDValue SuperReg = SDValue(Ld, 0);
1006  for (unsigned i = 0; i < NumVecs; ++i)
1007    ReplaceUses(SDValue(N, i),
1008        CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1009
1010  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1011  return nullptr;
1012}
1013
1014SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1015                                            unsigned Opc, unsigned SubRegIdx) {
1016  SDLoc dl(N);
1017  EVT VT = N->getValueType(0);
1018  SDValue Chain = N->getOperand(0);
1019
1020  SmallVector<SDValue, 6> Ops;
1021  Ops.push_back(N->getOperand(1)); // Mem operand
1022  Ops.push_back(N->getOperand(2)); // Incremental
1023  Ops.push_back(Chain);
1024
1025  std::vector<EVT> ResTys;
1026  ResTys.push_back(MVT::i64); // Type of the write back register
1027  ResTys.push_back(MVT::Untyped);
1028  ResTys.push_back(MVT::Other);
1029
1030  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1031
1032  // Update uses of write back register
1033  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1034
1035  // Update uses of vector list
1036  SDValue SuperReg = SDValue(Ld, 1);
1037  if (NumVecs == 1)
1038    ReplaceUses(SDValue(N, 0), SuperReg);
1039  else
1040    for (unsigned i = 0; i < NumVecs; ++i)
1041      ReplaceUses(SDValue(N, i),
1042          CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1043
1044  // Update the chain
1045  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1046  return nullptr;
1047}
1048
1049SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1050                                         unsigned Opc) {
1051  SDLoc dl(N);
1052  EVT VT = N->getOperand(2)->getValueType(0);
1053
1054  // Form a REG_SEQUENCE to force register allocation.
1055  bool Is128Bit = VT.getSizeInBits() == 128;
1056  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1057  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1058
1059  SmallVector<SDValue, 6> Ops;
1060  Ops.push_back(RegSeq);
1061  Ops.push_back(N->getOperand(NumVecs + 2));
1062  Ops.push_back(N->getOperand(0));
1063  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1064
1065  return St;
1066}
1067
1068SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1069                                             unsigned Opc) {
1070  SDLoc dl(N);
1071  EVT VT = N->getOperand(2)->getValueType(0);
1072  SmallVector<EVT, 2> ResTys;
1073  ResTys.push_back(MVT::i64);   // Type of the write back register
1074  ResTys.push_back(MVT::Other); // Type for the Chain
1075
1076  // Form a REG_SEQUENCE to force register allocation.
1077  bool Is128Bit = VT.getSizeInBits() == 128;
1078  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1079  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1080
1081  SmallVector<SDValue, 6> Ops;
1082  Ops.push_back(RegSeq);
1083  Ops.push_back(N->getOperand(NumVecs + 1)); // base register
1084  Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
1085  Ops.push_back(N->getOperand(0)); // Chain
1086  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1087
1088  return St;
1089}
1090
1091/// WidenVector - Given a value in the V64 register class, produce the
1092/// equivalent value in the V128 register class.
1093class WidenVector {
1094  SelectionDAG &DAG;
1095
1096public:
1097  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1098
1099  SDValue operator()(SDValue V64Reg) {
1100    EVT VT = V64Reg.getValueType();
1101    unsigned NarrowSize = VT.getVectorNumElements();
1102    MVT EltTy = VT.getVectorElementType().getSimpleVT();
1103    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1104    SDLoc DL(V64Reg);
1105
1106    SDValue Undef =
1107        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1108    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1109  }
1110};
1111
1112/// NarrowVector - Given a value in the V128 register class, produce the
1113/// equivalent value in the V64 register class.
1114static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1115  EVT VT = V128Reg.getValueType();
1116  unsigned WideSize = VT.getVectorNumElements();
1117  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1118  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1119
1120  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1121                                    V128Reg);
1122}
1123
1124SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1125                                            unsigned Opc) {
1126  SDLoc dl(N);
1127  EVT VT = N->getValueType(0);
1128  bool Narrow = VT.getSizeInBits() == 64;
1129
1130  // Form a REG_SEQUENCE to force register allocation.
1131  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1132
1133  if (Narrow)
1134    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1135                   WidenVector(*CurDAG));
1136
1137  SDValue RegSeq = createQTuple(Regs);
1138
1139  std::vector<EVT> ResTys;
1140  ResTys.push_back(MVT::Untyped);
1141  ResTys.push_back(MVT::Other);
1142
1143  unsigned LaneNo =
1144      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1145
1146  SmallVector<SDValue, 6> Ops;
1147  Ops.push_back(RegSeq);
1148  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1149  Ops.push_back(N->getOperand(NumVecs + 3));
1150  Ops.push_back(N->getOperand(0));
1151  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1152  SDValue SuperReg = SDValue(Ld, 0);
1153
1154  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1155  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1156                              AArch64::qsub3 };
1157  for (unsigned i = 0; i < NumVecs; ++i) {
1158    SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1159    if (Narrow)
1160      NV = NarrowVector(NV, *CurDAG);
1161    ReplaceUses(SDValue(N, i), NV);
1162  }
1163
1164  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1165
1166  return Ld;
1167}
1168
1169SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1170                                                unsigned Opc) {
1171  SDLoc dl(N);
1172  EVT VT = N->getValueType(0);
1173  bool Narrow = VT.getSizeInBits() == 64;
1174
1175  // Form a REG_SEQUENCE to force register allocation.
1176  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1177
1178  if (Narrow)
1179    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1180                   WidenVector(*CurDAG));
1181
1182  SDValue RegSeq = createQTuple(Regs);
1183
1184  std::vector<EVT> ResTys;
1185  ResTys.push_back(MVT::i64); // Type of the write back register
1186  ResTys.push_back(MVT::Untyped);
1187  ResTys.push_back(MVT::Other);
1188
1189  unsigned LaneNo =
1190      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1191
1192  SmallVector<SDValue, 6> Ops;
1193  Ops.push_back(RegSeq);
1194  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
1195  Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
1196  Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
1197  Ops.push_back(N->getOperand(0));
1198  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1199
1200  // Update uses of the write back register
1201  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1202
1203  // Update uses of the vector list
1204  SDValue SuperReg = SDValue(Ld, 1);
1205  if (NumVecs == 1) {
1206    ReplaceUses(SDValue(N, 0),
1207                Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1208  } else {
1209    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1210    static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1211                                AArch64::qsub3 };
1212    for (unsigned i = 0; i < NumVecs; ++i) {
1213      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1214                                                  SuperReg);
1215      if (Narrow)
1216        NV = NarrowVector(NV, *CurDAG);
1217      ReplaceUses(SDValue(N, i), NV);
1218    }
1219  }
1220
1221  // Update the Chain
1222  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1223
1224  return Ld;
1225}
1226
1227SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1228                                             unsigned Opc) {
1229  SDLoc dl(N);
1230  EVT VT = N->getOperand(2)->getValueType(0);
1231  bool Narrow = VT.getSizeInBits() == 64;
1232
1233  // Form a REG_SEQUENCE to force register allocation.
1234  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1235
1236  if (Narrow)
1237    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1238                   WidenVector(*CurDAG));
1239
1240  SDValue RegSeq = createQTuple(Regs);
1241
1242  unsigned LaneNo =
1243      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1244
1245  SmallVector<SDValue, 6> Ops;
1246  Ops.push_back(RegSeq);
1247  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1248  Ops.push_back(N->getOperand(NumVecs + 3));
1249  Ops.push_back(N->getOperand(0));
1250  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1251
1252  // Transfer memoperands.
1253  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1254  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1255  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1256
1257  return St;
1258}
1259
1260SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1261                                                 unsigned Opc) {
1262  SDLoc dl(N);
1263  EVT VT = N->getOperand(2)->getValueType(0);
1264  bool Narrow = VT.getSizeInBits() == 64;
1265
1266  // Form a REG_SEQUENCE to force register allocation.
1267  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1268
1269  if (Narrow)
1270    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1271                   WidenVector(*CurDAG));
1272
1273  SDValue RegSeq = createQTuple(Regs);
1274
1275  SmallVector<EVT, 2> ResTys;
1276  ResTys.push_back(MVT::i64);   // Type of the write back register
1277  ResTys.push_back(MVT::Other);
1278
1279  unsigned LaneNo =
1280      cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1281
1282  SmallVector<SDValue, 6> Ops;
1283  Ops.push_back(RegSeq);
1284  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
1285  Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
1286  Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
1287  Ops.push_back(N->getOperand(0));
1288  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1289
1290  // Transfer memoperands.
1291  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1292  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1293  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1294
1295  return St;
1296}
1297
1298static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1299                                       unsigned &Opc, SDValue &Opd0,
1300                                       unsigned &LSB, unsigned &MSB,
1301                                       unsigned NumberOfIgnoredLowBits,
1302                                       bool BiggerPattern) {
1303  assert(N->getOpcode() == ISD::AND &&
1304         "N must be a AND operation to call this function");
1305
1306  EVT VT = N->getValueType(0);
1307
1308  // Here we can test the type of VT and return false when the type does not
1309  // match, but since it is done prior to that call in the current context
1310  // we turned that into an assert to avoid redundant code.
1311  assert((VT == MVT::i32 || VT == MVT::i64) &&
1312         "Type checking must have been done before calling this function");
1313
1314  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1315  // changed the AND node to a 32-bit mask operation. We'll have to
1316  // undo that as part of the transform here if we want to catch all
1317  // the opportunities.
1318  // Currently the NumberOfIgnoredLowBits argument helps to recover
1319  // form these situations when matching bigger pattern (bitfield insert).
1320
1321  // For unsigned extracts, check for a shift right and mask
1322  uint64_t And_imm = 0;
1323  if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
1324    return false;
1325
1326  const SDNode *Op0 = N->getOperand(0).getNode();
1327
1328  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1329  // simplified. Try to undo that
1330  And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
1331
1332  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1333  if (And_imm & (And_imm + 1))
1334    return false;
1335
1336  bool ClampMSB = false;
1337  uint64_t Srl_imm = 0;
1338  // Handle the SRL + ANY_EXTEND case.
1339  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1340      isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
1341    // Extend the incoming operand of the SRL to 64-bit.
1342    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1343    // Make sure to clamp the MSB so that we preserve the semantics of the
1344    // original operations.
1345    ClampMSB = true;
1346  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1347             isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1348                                   Srl_imm)) {
1349    // If the shift result was truncated, we can still combine them.
1350    Opd0 = Op0->getOperand(0).getOperand(0);
1351
1352    // Use the type of SRL node.
1353    VT = Opd0->getValueType(0);
1354  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
1355    Opd0 = Op0->getOperand(0);
1356  } else if (BiggerPattern) {
1357    // Let's pretend a 0 shift right has been performed.
1358    // The resulting code will be at least as good as the original one
1359    // plus it may expose more opportunities for bitfield insert pattern.
1360    // FIXME: Currently we limit this to the bigger pattern, because
1361    // some optimizations expect AND and not UBFM
1362    Opd0 = N->getOperand(0);
1363  } else
1364    return false;
1365
1366  assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
1367         "bad amount in shift node!");
1368
1369  LSB = Srl_imm;
1370  MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
1371                                  : CountTrailingOnes_64(And_imm)) -
1372        1;
1373  if (ClampMSB)
1374    // Since we're moving the extend before the right shift operation, we need
1375    // to clamp the MSB to make sure we don't shift in undefined bits instead of
1376    // the zeros which would get shifted in with the original right shift
1377    // operation.
1378    MSB = MSB > 31 ? 31 : MSB;
1379
1380  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1381  return true;
1382}
1383
1384static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1385                                     unsigned &LSB, unsigned &MSB) {
1386  // We are looking for the following pattern which basically extracts a single
1387  // bit from the source value and places it in the LSB of the destination
1388  // value, all other bits of the destination value or set to zero:
1389  //
1390  // Value2 = AND Value, MaskImm
1391  // SRL Value2, ShiftImm
1392  //
1393  // with MaskImm >> ShiftImm == 1.
1394  //
1395  // This gets selected into a single UBFM:
1396  //
1397  // UBFM Value, ShiftImm, ShiftImm
1398  //
1399
1400  if (N->getOpcode() != ISD::SRL)
1401    return false;
1402
1403  uint64_t And_mask = 0;
1404  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
1405    return false;
1406
1407  Opd0 = N->getOperand(0).getOperand(0);
1408
1409  uint64_t Srl_imm = 0;
1410  if (!isIntImmediate(N->getOperand(1), Srl_imm))
1411    return false;
1412
1413  // Check whether we really have a one bit extract here.
1414  if (And_mask >> Srl_imm == 0x1) {
1415    if (N->getValueType(0) == MVT::i32)
1416      Opc = AArch64::UBFMWri;
1417    else
1418      Opc = AArch64::UBFMXri;
1419
1420    LSB = MSB = Srl_imm;
1421
1422    return true;
1423  }
1424
1425  return false;
1426}
1427
1428static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1429                                       unsigned &LSB, unsigned &MSB,
1430                                       bool BiggerPattern) {
1431  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1432         "N must be a SHR/SRA operation to call this function");
1433
1434  EVT VT = N->getValueType(0);
1435
1436  // Here we can test the type of VT and return false when the type does not
1437  // match, but since it is done prior to that call in the current context
1438  // we turned that into an assert to avoid redundant code.
1439  assert((VT == MVT::i32 || VT == MVT::i64) &&
1440         "Type checking must have been done before calling this function");
1441
1442  // Check for AND + SRL doing a one bit extract.
1443  if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
1444    return true;
1445
1446  // we're looking for a shift of a shift
1447  uint64_t Shl_imm = 0;
1448  uint64_t Trunc_bits = 0;
1449  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
1450    Opd0 = N->getOperand(0).getOperand(0);
1451  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1452             N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1453    // We are looking for a shift of truncate. Truncate from i64 to i32 could
1454    // be considered as setting high 32 bits as zero. Our strategy here is to
1455    // always generate 64bit UBFM. This consistency will help the CSE pass
1456    // later find more redundancy.
1457    Opd0 = N->getOperand(0).getOperand(0);
1458    Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1459    VT = Opd0->getValueType(0);
1460    assert(VT == MVT::i64 && "the promoted type should be i64");
1461  } else if (BiggerPattern) {
1462    // Let's pretend a 0 shift left has been performed.
1463    // FIXME: Currently we limit this to the bigger pattern case,
1464    // because some optimizations expect AND and not UBFM
1465    Opd0 = N->getOperand(0);
1466  } else
1467    return false;
1468
1469  assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
1470  uint64_t Srl_imm = 0;
1471  if (!isIntImmediate(N->getOperand(1), Srl_imm))
1472    return false;
1473
1474  assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
1475         "bad amount in shift node!");
1476  // Note: The width operand is encoded as width-1.
1477  unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
1478  int sLSB = Srl_imm - Shl_imm;
1479  if (sLSB < 0)
1480    return false;
1481  LSB = sLSB;
1482  MSB = LSB + Width;
1483  // SRA requires a signed extraction
1484  if (VT == MVT::i32)
1485    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1486  else
1487    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1488  return true;
1489}
1490
1491static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1492                                SDValue &Opd0, unsigned &LSB, unsigned &MSB,
1493                                unsigned NumberOfIgnoredLowBits = 0,
1494                                bool BiggerPattern = false) {
1495  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1496    return false;
1497
1498  switch (N->getOpcode()) {
1499  default:
1500    if (!N->isMachineOpcode())
1501      return false;
1502    break;
1503  case ISD::AND:
1504    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
1505                                      NumberOfIgnoredLowBits, BiggerPattern);
1506  case ISD::SRL:
1507  case ISD::SRA:
1508    return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
1509  }
1510
1511  unsigned NOpc = N->getMachineOpcode();
1512  switch (NOpc) {
1513  default:
1514    return false;
1515  case AArch64::SBFMWri:
1516  case AArch64::UBFMWri:
1517  case AArch64::SBFMXri:
1518  case AArch64::UBFMXri:
1519    Opc = NOpc;
1520    Opd0 = N->getOperand(0);
1521    LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1522    MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1523    return true;
1524  }
1525  // Unreachable
1526  return false;
1527}
1528
1529SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
1530  unsigned Opc, LSB, MSB;
1531  SDValue Opd0;
1532  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
1533    return nullptr;
1534
1535  EVT VT = N->getValueType(0);
1536
1537  // If the bit extract operation is 64bit but the original type is 32bit, we
1538  // need to add one EXTRACT_SUBREG.
1539  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1540    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
1541                       CurDAG->getTargetConstant(MSB, MVT::i64)};
1542
1543    SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
1544    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
1545    MachineSDNode *Node =
1546        CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
1547                               SDValue(BFM, 0), SubReg);
1548    return Node;
1549  }
1550
1551  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
1552                   CurDAG->getTargetConstant(MSB, VT)};
1553  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1554}
1555
1556/// Does DstMask form a complementary pair with the mask provided by
1557/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1558/// this asks whether DstMask zeroes precisely those bits that will be set by
1559/// the other half.
1560static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
1561                              unsigned NumberOfIgnoredHighBits, EVT VT) {
1562  assert((VT == MVT::i32 || VT == MVT::i64) &&
1563         "i32 or i64 mask type expected!");
1564  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1565
1566  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1567  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1568
1569  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1570         (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1571}
1572
1573// Look for bits that will be useful for later uses.
1574// A bit is consider useless as soon as it is dropped and never used
1575// before it as been dropped.
1576// E.g., looking for useful bit of x
1577// 1. y = x & 0x7
1578// 2. z = y >> 2
1579// After #1, x useful bits are 0x7, then the useful bits of x, live through
1580// y.
1581// After #2, the useful bits of x are 0x4.
1582// However, if x is used on an unpredicatable instruction, then all its bits
1583// are useful.
1584// E.g.
1585// 1. y = x & 0x7
1586// 2. z = y >> 2
1587// 3. str x, [@x]
1588static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1589
1590static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1591                                              unsigned Depth) {
1592  uint64_t Imm =
1593      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1594  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1595  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1596  getUsefulBits(Op, UsefulBits, Depth + 1);
1597}
1598
1599static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1600                                             uint64_t Imm, uint64_t MSB,
1601                                             unsigned Depth) {
1602  // inherit the bitwidth value
1603  APInt OpUsefulBits(UsefulBits);
1604  OpUsefulBits = 1;
1605
1606  if (MSB >= Imm) {
1607    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1608    --OpUsefulBits;
1609    // The interesting part will be in the lower part of the result
1610    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1611    // The interesting part was starting at Imm in the argument
1612    OpUsefulBits = OpUsefulBits.shl(Imm);
1613  } else {
1614    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1615    --OpUsefulBits;
1616    // The interesting part will be shifted in the result
1617    OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
1618    getUsefulBits(Op, OpUsefulBits, Depth + 1);
1619    // The interesting part was at zero in the argument
1620    OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
1621  }
1622
1623  UsefulBits &= OpUsefulBits;
1624}
1625
1626static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1627                                  unsigned Depth) {
1628  uint64_t Imm =
1629      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1630  uint64_t MSB =
1631      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1632
1633  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1634}
1635
1636static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1637                                              unsigned Depth) {
1638  uint64_t ShiftTypeAndValue =
1639      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1640  APInt Mask(UsefulBits);
1641  Mask.clearAllBits();
1642  Mask.flipAllBits();
1643
1644  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1645    // Shift Left
1646    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1647    Mask = Mask.shl(ShiftAmt);
1648    getUsefulBits(Op, Mask, Depth + 1);
1649    Mask = Mask.lshr(ShiftAmt);
1650  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1651    // Shift Right
1652    // We do not handle AArch64_AM::ASR, because the sign will change the
1653    // number of useful bits
1654    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1655    Mask = Mask.lshr(ShiftAmt);
1656    getUsefulBits(Op, Mask, Depth + 1);
1657    Mask = Mask.shl(ShiftAmt);
1658  } else
1659    return;
1660
1661  UsefulBits &= Mask;
1662}
1663
1664static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1665                                 unsigned Depth) {
1666  uint64_t Imm =
1667      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1668  uint64_t MSB =
1669      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1670
1671  if (Op.getOperand(1) == Orig)
1672    return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1673
1674  APInt OpUsefulBits(UsefulBits);
1675  OpUsefulBits = 1;
1676
1677  if (MSB >= Imm) {
1678    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1679    --OpUsefulBits;
1680    UsefulBits &= ~OpUsefulBits;
1681    getUsefulBits(Op, UsefulBits, Depth + 1);
1682  } else {
1683    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1684    --OpUsefulBits;
1685    UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
1686    getUsefulBits(Op, UsefulBits, Depth + 1);
1687  }
1688}
1689
1690static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1691                                SDValue Orig, unsigned Depth) {
1692
1693  // Users of this node should have already been instruction selected
1694  // FIXME: Can we turn that into an assert?
1695  if (!UserNode->isMachineOpcode())
1696    return;
1697
1698  switch (UserNode->getMachineOpcode()) {
1699  default:
1700    return;
1701  case AArch64::ANDSWri:
1702  case AArch64::ANDSXri:
1703  case AArch64::ANDWri:
1704  case AArch64::ANDXri:
1705    // We increment Depth only when we call the getUsefulBits
1706    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1707                                             Depth);
1708  case AArch64::UBFMWri:
1709  case AArch64::UBFMXri:
1710    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1711
1712  case AArch64::ORRWrs:
1713  case AArch64::ORRXrs:
1714    if (UserNode->getOperand(1) != Orig)
1715      return;
1716    return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
1717                                             Depth);
1718  case AArch64::BFMWri:
1719  case AArch64::BFMXri:
1720    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1721  }
1722}
1723
1724static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
1725  if (Depth >= 6)
1726    return;
1727  // Initialize UsefulBits
1728  if (!Depth) {
1729    unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
1730    // At the beginning, assume every produced bits is useful
1731    UsefulBits = APInt(Bitwidth, 0);
1732    UsefulBits.flipAllBits();
1733  }
1734  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
1735
1736  for (SDNode *Node : Op.getNode()->uses()) {
1737    // A use cannot produce useful bits
1738    APInt UsefulBitsForUse = APInt(UsefulBits);
1739    getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
1740    UsersUsefulBits |= UsefulBitsForUse;
1741  }
1742  // UsefulBits contains the produced bits that are meaningful for the
1743  // current definition, thus a user cannot make a bit meaningful at
1744  // this point
1745  UsefulBits &= UsersUsefulBits;
1746}
1747
1748/// Create a machine node performing a notional SHL of Op by ShlAmount. If
1749/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
1750/// 0, return Op unchanged.
1751static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
1752  if (ShlAmount == 0)
1753    return Op;
1754
1755  EVT VT = Op.getValueType();
1756  unsigned BitWidth = VT.getSizeInBits();
1757  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1758
1759  SDNode *ShiftNode;
1760  if (ShlAmount > 0) {
1761    // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
1762    ShiftNode = CurDAG->getMachineNode(
1763        UBFMOpc, SDLoc(Op), VT, Op,
1764        CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
1765        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
1766  } else {
1767    // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
1768    assert(ShlAmount < 0 && "expected right shift");
1769    int ShrAmount = -ShlAmount;
1770    ShiftNode = CurDAG->getMachineNode(
1771        UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
1772        CurDAG->getTargetConstant(BitWidth - 1, VT));
1773  }
1774
1775  return SDValue(ShiftNode, 0);
1776}
1777
1778/// Does this tree qualify as an attempt to move a bitfield into position,
1779/// essentially "(and (shl VAL, N), Mask)".
1780static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
1781                                    SDValue &Src, int &ShiftAmount,
1782                                    int &MaskWidth) {
1783  EVT VT = Op.getValueType();
1784  unsigned BitWidth = VT.getSizeInBits();
1785  (void)BitWidth;
1786  assert(BitWidth == 32 || BitWidth == 64);
1787
1788  APInt KnownZero, KnownOne;
1789  CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
1790
1791  // Non-zero in the sense that they're not provably zero, which is the key
1792  // point if we want to use this value
1793  uint64_t NonZeroBits = (~KnownZero).getZExtValue();
1794
1795  // Discard a constant AND mask if present. It's safe because the node will
1796  // already have been factored into the computeKnownBits calculation above.
1797  uint64_t AndImm;
1798  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
1799    assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
1800    Op = Op.getOperand(0);
1801  }
1802
1803  uint64_t ShlImm;
1804  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
1805    return false;
1806  Op = Op.getOperand(0);
1807
1808  if (!isShiftedMask_64(NonZeroBits))
1809    return false;
1810
1811  ShiftAmount = countTrailingZeros(NonZeroBits);
1812  MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
1813
1814  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
1815  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
1816  // amount.
1817  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
1818
1819  return true;
1820}
1821
1822// Given a OR operation, check if we have the following pattern
1823// ubfm c, b, imm, imm2 (or something that does the same jobs, see
1824//                       isBitfieldExtractOp)
1825// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
1826//                 countTrailingZeros(mask2) == imm2 - imm + 1
1827// f = d | c
1828// if yes, given reference arguments will be update so that one can replace
1829// the OR instruction with:
1830// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
1831static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
1832                                     SDValue &Src, unsigned &ImmR,
1833                                     unsigned &ImmS, SelectionDAG *CurDAG) {
1834  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
1835
1836  // Set Opc
1837  EVT VT = N->getValueType(0);
1838  if (VT == MVT::i32)
1839    Opc = AArch64::BFMWri;
1840  else if (VT == MVT::i64)
1841    Opc = AArch64::BFMXri;
1842  else
1843    return false;
1844
1845  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
1846  // have the expected shape. Try to undo that.
1847  APInt UsefulBits;
1848  getUsefulBits(SDValue(N, 0), UsefulBits);
1849
1850  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
1851  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
1852
1853  // OR is commutative, check both possibilities (does llvm provide a
1854  // way to do that directely, e.g., via code matcher?)
1855  SDValue OrOpd1Val = N->getOperand(1);
1856  SDNode *OrOpd0 = N->getOperand(0).getNode();
1857  SDNode *OrOpd1 = N->getOperand(1).getNode();
1858  for (int i = 0; i < 2;
1859       ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
1860    unsigned BFXOpc;
1861    int DstLSB, Width;
1862    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
1863                            NumberOfIgnoredLowBits, true)) {
1864      // Check that the returned opcode is compatible with the pattern,
1865      // i.e., same type and zero extended (U and not S)
1866      if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
1867          (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
1868        continue;
1869
1870      // Compute the width of the bitfield insertion
1871      DstLSB = 0;
1872      Width = ImmS - ImmR + 1;
1873      // FIXME: This constraint is to catch bitfield insertion we may
1874      // want to widen the pattern if we want to grab general bitfied
1875      // move case
1876      if (Width <= 0)
1877        continue;
1878
1879      // If the mask on the insertee is correct, we have a BFXIL operation. We
1880      // can share the ImmR and ImmS values from the already-computed UBFM.
1881    } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
1882                                       DstLSB, Width)) {
1883      ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
1884      ImmS = Width - 1;
1885    } else
1886      continue;
1887
1888    // Check the second part of the pattern
1889    EVT VT = OrOpd1->getValueType(0);
1890    assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
1891
1892    // Compute the Known Zero for the candidate of the first operand.
1893    // This allows to catch more general case than just looking for
1894    // AND with imm. Indeed, simplify-demanded-bits may have removed
1895    // the AND instruction because it proves it was useless.
1896    APInt KnownZero, KnownOne;
1897    CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
1898
1899    // Check if there is enough room for the second operand to appear
1900    // in the first one
1901    APInt BitsToBeInserted =
1902        APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
1903
1904    if ((BitsToBeInserted & ~KnownZero) != 0)
1905      continue;
1906
1907    // Set the first operand
1908    uint64_t Imm;
1909    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
1910        isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
1911      // In that case, we can eliminate the AND
1912      Dst = OrOpd1->getOperand(0);
1913    else
1914      // Maybe the AND has been removed by simplify-demanded-bits
1915      // or is useful because it discards more bits
1916      Dst = OrOpd1Val;
1917
1918    // both parts match
1919    return true;
1920  }
1921
1922  return false;
1923}
1924
1925SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
1926  if (N->getOpcode() != ISD::OR)
1927    return nullptr;
1928
1929  unsigned Opc;
1930  unsigned LSB, MSB;
1931  SDValue Opd0, Opd1;
1932
1933  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
1934    return nullptr;
1935
1936  EVT VT = N->getValueType(0);
1937  SDValue Ops[] = { Opd0,
1938                    Opd1,
1939                    CurDAG->getTargetConstant(LSB, VT),
1940                    CurDAG->getTargetConstant(MSB, VT) };
1941  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1942}
1943
1944SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
1945  EVT VT = N->getValueType(0);
1946  unsigned Variant;
1947  unsigned Opc;
1948  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
1949
1950  if (VT == MVT::f32) {
1951    Variant = 0;
1952  } else if (VT == MVT::f64) {
1953    Variant = 1;
1954  } else
1955    return nullptr; // Unrecognized argument type. Fall back on default codegen.
1956
1957  // Pick the FRINTX variant needed to set the flags.
1958  unsigned FRINTXOpc = FRINTXOpcs[Variant];
1959
1960  switch (N->getOpcode()) {
1961  default:
1962    return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
1963  case ISD::FCEIL: {
1964    unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
1965    Opc = FRINTPOpcs[Variant];
1966    break;
1967  }
1968  case ISD::FFLOOR: {
1969    unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
1970    Opc = FRINTMOpcs[Variant];
1971    break;
1972  }
1973  case ISD::FTRUNC: {
1974    unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
1975    Opc = FRINTZOpcs[Variant];
1976    break;
1977  }
1978  case ISD::FROUND: {
1979    unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
1980    Opc = FRINTAOpcs[Variant];
1981    break;
1982  }
1983  }
1984
1985  SDLoc dl(N);
1986  SDValue In = N->getOperand(0);
1987  SmallVector<SDValue, 2> Ops;
1988  Ops.push_back(In);
1989
1990  if (!TM.Options.UnsafeFPMath) {
1991    SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
1992    Ops.push_back(SDValue(FRINTX, 1));
1993  }
1994
1995  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
1996}
1997
1998bool
1999AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2000                                              unsigned RegWidth) {
2001  APFloat FVal(0.0);
2002  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2003    FVal = CN->getValueAPF();
2004  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2005    // Some otherwise illegal constants are allowed in this case.
2006    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2007        !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2008      return false;
2009
2010    ConstantPoolSDNode *CN =
2011        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2012    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2013  } else
2014    return false;
2015
2016  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2017  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2018  // x-register.
2019  //
2020  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2021  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2022  // integers.
2023  bool IsExact;
2024
2025  // fbits is between 1 and 64 in the worst-case, which means the fmul
2026  // could have 2^64 as an actual operand. Need 65 bits of precision.
2027  APSInt IntVal(65, true);
2028  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2029
2030  // N.b. isPowerOf2 also checks for > 0.
2031  if (!IsExact || !IntVal.isPowerOf2()) return false;
2032  unsigned FBits = IntVal.logBase2();
2033
2034  // Checks above should have guaranteed that we haven't lost information in
2035  // finding FBits, but it must still be in range.
2036  if (FBits == 0 || FBits > RegWidth) return false;
2037
2038  FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
2039  return true;
2040}
2041
2042SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
2043  // Dump information about the Node being selected
2044  DEBUG(errs() << "Selecting: ");
2045  DEBUG(Node->dump(CurDAG));
2046  DEBUG(errs() << "\n");
2047
2048  // If we have a custom node, we already have selected!
2049  if (Node->isMachineOpcode()) {
2050    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2051    Node->setNodeId(-1);
2052    return nullptr;
2053  }
2054
2055  // Few custom selection stuff.
2056  SDNode *ResNode = nullptr;
2057  EVT VT = Node->getValueType(0);
2058
2059  switch (Node->getOpcode()) {
2060  default:
2061    break;
2062
2063  case ISD::ADD:
2064    if (SDNode *I = SelectMLAV64LaneV128(Node))
2065      return I;
2066    break;
2067
2068  case ISD::LOAD: {
2069    // Try to select as an indexed load. Fall through to normal processing
2070    // if we can't.
2071    bool Done = false;
2072    SDNode *I = SelectIndexedLoad(Node, Done);
2073    if (Done)
2074      return I;
2075    break;
2076  }
2077
2078  case ISD::SRL:
2079  case ISD::AND:
2080  case ISD::SRA:
2081    if (SDNode *I = SelectBitfieldExtractOp(Node))
2082      return I;
2083    break;
2084
2085  case ISD::OR:
2086    if (SDNode *I = SelectBitfieldInsertOp(Node))
2087      return I;
2088    break;
2089
2090  case ISD::EXTRACT_VECTOR_ELT: {
2091    // Extracting lane zero is a special case where we can just use a plain
2092    // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2093    // the rest of the compiler, especially the register allocator and copyi
2094    // propagation, to reason about, so is preferred when it's possible to
2095    // use it.
2096    ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2097    // Bail and use the default Select() for non-zero lanes.
2098    if (LaneNode->getZExtValue() != 0)
2099      break;
2100    // If the element type is not the same as the result type, likewise
2101    // bail and use the default Select(), as there's more to do than just
2102    // a cross-class COPY. This catches extracts of i8 and i16 elements
2103    // since they will need an explicit zext.
2104    if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2105      break;
2106    unsigned SubReg;
2107    switch (Node->getOperand(0)
2108                .getValueType()
2109                .getVectorElementType()
2110                .getSizeInBits()) {
2111    default:
2112      llvm_unreachable("Unexpected vector element type!");
2113    case 64:
2114      SubReg = AArch64::dsub;
2115      break;
2116    case 32:
2117      SubReg = AArch64::ssub;
2118      break;
2119    case 16: // FALLTHROUGH
2120    case 8:
2121      llvm_unreachable("unexpected zext-requiring extract element!");
2122    }
2123    SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2124                                                     Node->getOperand(0));
2125    DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2126    DEBUG(Extract->dumpr(CurDAG));
2127    DEBUG(dbgs() << "\n");
2128    return Extract.getNode();
2129  }
2130  case ISD::Constant: {
2131    // Materialize zero constants as copies from WZR/XZR.  This allows
2132    // the coalescer to propagate these into other instructions.
2133    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2134    if (ConstNode->isNullValue()) {
2135      if (VT == MVT::i32)
2136        return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2137                                      AArch64::WZR, MVT::i32).getNode();
2138      else if (VT == MVT::i64)
2139        return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2140                                      AArch64::XZR, MVT::i64).getNode();
2141    }
2142    break;
2143  }
2144
2145  case ISD::FrameIndex: {
2146    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2147    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2148    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2149    const TargetLowering *TLI = getTargetLowering();
2150    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
2151    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2152                      CurDAG->getTargetConstant(Shifter, MVT::i32) };
2153    return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2154  }
2155  case ISD::INTRINSIC_W_CHAIN: {
2156    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2157    switch (IntNo) {
2158    default:
2159      break;
2160    case Intrinsic::aarch64_ldaxp:
2161    case Intrinsic::aarch64_ldxp: {
2162      unsigned Op =
2163          IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2164      SDValue MemAddr = Node->getOperand(2);
2165      SDLoc DL(Node);
2166      SDValue Chain = Node->getOperand(0);
2167
2168      SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2169                                          MVT::Other, MemAddr, Chain);
2170
2171      // Transfer memoperands.
2172      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2173      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2174      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2175      return Ld;
2176    }
2177    case Intrinsic::aarch64_stlxp:
2178    case Intrinsic::aarch64_stxp: {
2179      unsigned Op =
2180          IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2181      SDLoc DL(Node);
2182      SDValue Chain = Node->getOperand(0);
2183      SDValue ValLo = Node->getOperand(2);
2184      SDValue ValHi = Node->getOperand(3);
2185      SDValue MemAddr = Node->getOperand(4);
2186
2187      // Place arguments in the right order.
2188      SmallVector<SDValue, 7> Ops;
2189      Ops.push_back(ValLo);
2190      Ops.push_back(ValHi);
2191      Ops.push_back(MemAddr);
2192      Ops.push_back(Chain);
2193
2194      SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2195      // Transfer memoperands.
2196      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2197      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2198      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2199
2200      return St;
2201    }
2202    case Intrinsic::aarch64_neon_ld1x2:
2203      if (VT == MVT::v8i8)
2204        return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2205      else if (VT == MVT::v16i8)
2206        return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2207      else if (VT == MVT::v4i16)
2208        return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2209      else if (VT == MVT::v8i16)
2210        return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2211      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2212        return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2213      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2214        return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2215      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2216        return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2217      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2218        return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2219      break;
2220    case Intrinsic::aarch64_neon_ld1x3:
2221      if (VT == MVT::v8i8)
2222        return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2223      else if (VT == MVT::v16i8)
2224        return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2225      else if (VT == MVT::v4i16)
2226        return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2227      else if (VT == MVT::v8i16)
2228        return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2229      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2230        return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2231      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2232        return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2233      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2234        return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2235      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2236        return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2237      break;
2238    case Intrinsic::aarch64_neon_ld1x4:
2239      if (VT == MVT::v8i8)
2240        return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2241      else if (VT == MVT::v16i8)
2242        return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2243      else if (VT == MVT::v4i16)
2244        return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2245      else if (VT == MVT::v8i16)
2246        return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2247      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2248        return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2249      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2250        return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2251      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2252        return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2253      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2254        return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2255      break;
2256    case Intrinsic::aarch64_neon_ld2:
2257      if (VT == MVT::v8i8)
2258        return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2259      else if (VT == MVT::v16i8)
2260        return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2261      else if (VT == MVT::v4i16)
2262        return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2263      else if (VT == MVT::v8i16)
2264        return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2265      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2266        return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2267      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2268        return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2269      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2270        return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2271      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2272        return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2273      break;
2274    case Intrinsic::aarch64_neon_ld3:
2275      if (VT == MVT::v8i8)
2276        return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2277      else if (VT == MVT::v16i8)
2278        return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2279      else if (VT == MVT::v4i16)
2280        return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2281      else if (VT == MVT::v8i16)
2282        return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2283      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2284        return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2285      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2286        return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2287      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2288        return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2289      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2290        return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2291      break;
2292    case Intrinsic::aarch64_neon_ld4:
2293      if (VT == MVT::v8i8)
2294        return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
2295      else if (VT == MVT::v16i8)
2296        return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
2297      else if (VT == MVT::v4i16)
2298        return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
2299      else if (VT == MVT::v8i16)
2300        return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
2301      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2302        return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
2303      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2304        return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
2305      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2306        return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2307      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2308        return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
2309      break;
2310    case Intrinsic::aarch64_neon_ld2r:
2311      if (VT == MVT::v8i8)
2312        return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
2313      else if (VT == MVT::v16i8)
2314        return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
2315      else if (VT == MVT::v4i16)
2316        return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
2317      else if (VT == MVT::v8i16)
2318        return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
2319      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2320        return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
2321      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2322        return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
2323      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2324        return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
2325      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2326        return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
2327      break;
2328    case Intrinsic::aarch64_neon_ld3r:
2329      if (VT == MVT::v8i8)
2330        return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
2331      else if (VT == MVT::v16i8)
2332        return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
2333      else if (VT == MVT::v4i16)
2334        return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
2335      else if (VT == MVT::v8i16)
2336        return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
2337      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2338        return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
2339      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2340        return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
2341      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2342        return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
2343      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2344        return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
2345      break;
2346    case Intrinsic::aarch64_neon_ld4r:
2347      if (VT == MVT::v8i8)
2348        return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
2349      else if (VT == MVT::v16i8)
2350        return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
2351      else if (VT == MVT::v4i16)
2352        return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
2353      else if (VT == MVT::v8i16)
2354        return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
2355      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2356        return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
2357      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2358        return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
2359      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2360        return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
2361      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2362        return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
2363      break;
2364    case Intrinsic::aarch64_neon_ld2lane:
2365      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2366        return SelectLoadLane(Node, 2, AArch64::LD2i8);
2367      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2368        return SelectLoadLane(Node, 2, AArch64::LD2i16);
2369      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2370               VT == MVT::v2f32)
2371        return SelectLoadLane(Node, 2, AArch64::LD2i32);
2372      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2373               VT == MVT::v1f64)
2374        return SelectLoadLane(Node, 2, AArch64::LD2i64);
2375      break;
2376    case Intrinsic::aarch64_neon_ld3lane:
2377      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2378        return SelectLoadLane(Node, 3, AArch64::LD3i8);
2379      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2380        return SelectLoadLane(Node, 3, AArch64::LD3i16);
2381      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2382               VT == MVT::v2f32)
2383        return SelectLoadLane(Node, 3, AArch64::LD3i32);
2384      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2385               VT == MVT::v1f64)
2386        return SelectLoadLane(Node, 3, AArch64::LD3i64);
2387      break;
2388    case Intrinsic::aarch64_neon_ld4lane:
2389      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2390        return SelectLoadLane(Node, 4, AArch64::LD4i8);
2391      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2392        return SelectLoadLane(Node, 4, AArch64::LD4i16);
2393      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2394               VT == MVT::v2f32)
2395        return SelectLoadLane(Node, 4, AArch64::LD4i32);
2396      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2397               VT == MVT::v1f64)
2398        return SelectLoadLane(Node, 4, AArch64::LD4i64);
2399      break;
2400    }
2401  } break;
2402  case ISD::INTRINSIC_WO_CHAIN: {
2403    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
2404    switch (IntNo) {
2405    default:
2406      break;
2407    case Intrinsic::aarch64_neon_tbl2:
2408      return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
2409                                                  : AArch64::TBLv16i8Two,
2410                         false);
2411    case Intrinsic::aarch64_neon_tbl3:
2412      return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
2413                                                  : AArch64::TBLv16i8Three,
2414                         false);
2415    case Intrinsic::aarch64_neon_tbl4:
2416      return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
2417                                                  : AArch64::TBLv16i8Four,
2418                         false);
2419    case Intrinsic::aarch64_neon_tbx2:
2420      return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
2421                                                  : AArch64::TBXv16i8Two,
2422                         true);
2423    case Intrinsic::aarch64_neon_tbx3:
2424      return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
2425                                                  : AArch64::TBXv16i8Three,
2426                         true);
2427    case Intrinsic::aarch64_neon_tbx4:
2428      return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
2429                                                  : AArch64::TBXv16i8Four,
2430                         true);
2431    case Intrinsic::aarch64_neon_smull:
2432    case Intrinsic::aarch64_neon_umull:
2433      if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
2434        return N;
2435      break;
2436    }
2437    break;
2438  }
2439  case ISD::INTRINSIC_VOID: {
2440    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2441    if (Node->getNumOperands() >= 3)
2442      VT = Node->getOperand(2)->getValueType(0);
2443    switch (IntNo) {
2444    default:
2445      break;
2446    case Intrinsic::aarch64_neon_st1x2: {
2447      if (VT == MVT::v8i8)
2448        return SelectStore(Node, 2, AArch64::ST1Twov8b);
2449      else if (VT == MVT::v16i8)
2450        return SelectStore(Node, 2, AArch64::ST1Twov16b);
2451      else if (VT == MVT::v4i16)
2452        return SelectStore(Node, 2, AArch64::ST1Twov4h);
2453      else if (VT == MVT::v8i16)
2454        return SelectStore(Node, 2, AArch64::ST1Twov8h);
2455      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2456        return SelectStore(Node, 2, AArch64::ST1Twov2s);
2457      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2458        return SelectStore(Node, 2, AArch64::ST1Twov4s);
2459      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2460        return SelectStore(Node, 2, AArch64::ST1Twov2d);
2461      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2462        return SelectStore(Node, 2, AArch64::ST1Twov1d);
2463      break;
2464    }
2465    case Intrinsic::aarch64_neon_st1x3: {
2466      if (VT == MVT::v8i8)
2467        return SelectStore(Node, 3, AArch64::ST1Threev8b);
2468      else if (VT == MVT::v16i8)
2469        return SelectStore(Node, 3, AArch64::ST1Threev16b);
2470      else if (VT == MVT::v4i16)
2471        return SelectStore(Node, 3, AArch64::ST1Threev4h);
2472      else if (VT == MVT::v8i16)
2473        return SelectStore(Node, 3, AArch64::ST1Threev8h);
2474      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2475        return SelectStore(Node, 3, AArch64::ST1Threev2s);
2476      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2477        return SelectStore(Node, 3, AArch64::ST1Threev4s);
2478      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2479        return SelectStore(Node, 3, AArch64::ST1Threev2d);
2480      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2481        return SelectStore(Node, 3, AArch64::ST1Threev1d);
2482      break;
2483    }
2484    case Intrinsic::aarch64_neon_st1x4: {
2485      if (VT == MVT::v8i8)
2486        return SelectStore(Node, 4, AArch64::ST1Fourv8b);
2487      else if (VT == MVT::v16i8)
2488        return SelectStore(Node, 4, AArch64::ST1Fourv16b);
2489      else if (VT == MVT::v4i16)
2490        return SelectStore(Node, 4, AArch64::ST1Fourv4h);
2491      else if (VT == MVT::v8i16)
2492        return SelectStore(Node, 4, AArch64::ST1Fourv8h);
2493      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2494        return SelectStore(Node, 4, AArch64::ST1Fourv2s);
2495      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2496        return SelectStore(Node, 4, AArch64::ST1Fourv4s);
2497      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2498        return SelectStore(Node, 4, AArch64::ST1Fourv2d);
2499      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2500        return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2501      break;
2502    }
2503    case Intrinsic::aarch64_neon_st2: {
2504      if (VT == MVT::v8i8)
2505        return SelectStore(Node, 2, AArch64::ST2Twov8b);
2506      else if (VT == MVT::v16i8)
2507        return SelectStore(Node, 2, AArch64::ST2Twov16b);
2508      else if (VT == MVT::v4i16)
2509        return SelectStore(Node, 2, AArch64::ST2Twov4h);
2510      else if (VT == MVT::v8i16)
2511        return SelectStore(Node, 2, AArch64::ST2Twov8h);
2512      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2513        return SelectStore(Node, 2, AArch64::ST2Twov2s);
2514      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2515        return SelectStore(Node, 2, AArch64::ST2Twov4s);
2516      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2517        return SelectStore(Node, 2, AArch64::ST2Twov2d);
2518      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2519        return SelectStore(Node, 2, AArch64::ST1Twov1d);
2520      break;
2521    }
2522    case Intrinsic::aarch64_neon_st3: {
2523      if (VT == MVT::v8i8)
2524        return SelectStore(Node, 3, AArch64::ST3Threev8b);
2525      else if (VT == MVT::v16i8)
2526        return SelectStore(Node, 3, AArch64::ST3Threev16b);
2527      else if (VT == MVT::v4i16)
2528        return SelectStore(Node, 3, AArch64::ST3Threev4h);
2529      else if (VT == MVT::v8i16)
2530        return SelectStore(Node, 3, AArch64::ST3Threev8h);
2531      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2532        return SelectStore(Node, 3, AArch64::ST3Threev2s);
2533      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2534        return SelectStore(Node, 3, AArch64::ST3Threev4s);
2535      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2536        return SelectStore(Node, 3, AArch64::ST3Threev2d);
2537      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2538        return SelectStore(Node, 3, AArch64::ST1Threev1d);
2539      break;
2540    }
2541    case Intrinsic::aarch64_neon_st4: {
2542      if (VT == MVT::v8i8)
2543        return SelectStore(Node, 4, AArch64::ST4Fourv8b);
2544      else if (VT == MVT::v16i8)
2545        return SelectStore(Node, 4, AArch64::ST4Fourv16b);
2546      else if (VT == MVT::v4i16)
2547        return SelectStore(Node, 4, AArch64::ST4Fourv4h);
2548      else if (VT == MVT::v8i16)
2549        return SelectStore(Node, 4, AArch64::ST4Fourv8h);
2550      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2551        return SelectStore(Node, 4, AArch64::ST4Fourv2s);
2552      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2553        return SelectStore(Node, 4, AArch64::ST4Fourv4s);
2554      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2555        return SelectStore(Node, 4, AArch64::ST4Fourv2d);
2556      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2557        return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2558      break;
2559    }
2560    case Intrinsic::aarch64_neon_st2lane: {
2561      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2562        return SelectStoreLane(Node, 2, AArch64::ST2i8);
2563      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2564        return SelectStoreLane(Node, 2, AArch64::ST2i16);
2565      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2566               VT == MVT::v2f32)
2567        return SelectStoreLane(Node, 2, AArch64::ST2i32);
2568      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2569               VT == MVT::v1f64)
2570        return SelectStoreLane(Node, 2, AArch64::ST2i64);
2571      break;
2572    }
2573    case Intrinsic::aarch64_neon_st3lane: {
2574      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2575        return SelectStoreLane(Node, 3, AArch64::ST3i8);
2576      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2577        return SelectStoreLane(Node, 3, AArch64::ST3i16);
2578      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2579               VT == MVT::v2f32)
2580        return SelectStoreLane(Node, 3, AArch64::ST3i32);
2581      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2582               VT == MVT::v1f64)
2583        return SelectStoreLane(Node, 3, AArch64::ST3i64);
2584      break;
2585    }
2586    case Intrinsic::aarch64_neon_st4lane: {
2587      if (VT == MVT::v16i8 || VT == MVT::v8i8)
2588        return SelectStoreLane(Node, 4, AArch64::ST4i8);
2589      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2590        return SelectStoreLane(Node, 4, AArch64::ST4i16);
2591      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2592               VT == MVT::v2f32)
2593        return SelectStoreLane(Node, 4, AArch64::ST4i32);
2594      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2595               VT == MVT::v1f64)
2596        return SelectStoreLane(Node, 4, AArch64::ST4i64);
2597      break;
2598    }
2599    }
2600  }
2601  case AArch64ISD::LD2post: {
2602    if (VT == MVT::v8i8)
2603      return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
2604    else if (VT == MVT::v16i8)
2605      return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
2606    else if (VT == MVT::v4i16)
2607      return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
2608    else if (VT == MVT::v8i16)
2609      return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
2610    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2611      return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
2612    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2613      return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
2614    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2615      return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2616    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2617      return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
2618    break;
2619  }
2620  case AArch64ISD::LD3post: {
2621    if (VT == MVT::v8i8)
2622      return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
2623    else if (VT == MVT::v16i8)
2624      return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
2625    else if (VT == MVT::v4i16)
2626      return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
2627    else if (VT == MVT::v8i16)
2628      return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
2629    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2630      return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
2631    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2632      return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
2633    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2634      return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2635    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2636      return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
2637    break;
2638  }
2639  case AArch64ISD::LD4post: {
2640    if (VT == MVT::v8i8)
2641      return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
2642    else if (VT == MVT::v16i8)
2643      return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
2644    else if (VT == MVT::v4i16)
2645      return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
2646    else if (VT == MVT::v8i16)
2647      return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
2648    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2649      return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
2650    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2651      return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
2652    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2653      return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2654    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2655      return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
2656    break;
2657  }
2658  case AArch64ISD::LD1x2post: {
2659    if (VT == MVT::v8i8)
2660      return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
2661    else if (VT == MVT::v16i8)
2662      return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
2663    else if (VT == MVT::v4i16)
2664      return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
2665    else if (VT == MVT::v8i16)
2666      return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
2667    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2668      return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
2669    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2670      return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
2671    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2672      return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2673    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2674      return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
2675    break;
2676  }
2677  case AArch64ISD::LD1x3post: {
2678    if (VT == MVT::v8i8)
2679      return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
2680    else if (VT == MVT::v16i8)
2681      return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
2682    else if (VT == MVT::v4i16)
2683      return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
2684    else if (VT == MVT::v8i16)
2685      return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
2686    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2687      return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
2688    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2689      return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
2690    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2691      return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2692    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2693      return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
2694    break;
2695  }
2696  case AArch64ISD::LD1x4post: {
2697    if (VT == MVT::v8i8)
2698      return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
2699    else if (VT == MVT::v16i8)
2700      return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
2701    else if (VT == MVT::v4i16)
2702      return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
2703    else if (VT == MVT::v8i16)
2704      return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
2705    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2706      return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
2707    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2708      return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
2709    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2710      return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2711    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2712      return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
2713    break;
2714  }
2715  case AArch64ISD::LD1DUPpost: {
2716    if (VT == MVT::v8i8)
2717      return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
2718    else if (VT == MVT::v16i8)
2719      return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
2720    else if (VT == MVT::v4i16)
2721      return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
2722    else if (VT == MVT::v8i16)
2723      return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
2724    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2725      return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
2726    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2727      return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
2728    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2729      return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
2730    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2731      return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
2732    break;
2733  }
2734  case AArch64ISD::LD2DUPpost: {
2735    if (VT == MVT::v8i8)
2736      return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
2737    else if (VT == MVT::v16i8)
2738      return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
2739    else if (VT == MVT::v4i16)
2740      return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
2741    else if (VT == MVT::v8i16)
2742      return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
2743    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2744      return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
2745    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2746      return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
2747    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2748      return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
2749    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2750      return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
2751    break;
2752  }
2753  case AArch64ISD::LD3DUPpost: {
2754    if (VT == MVT::v8i8)
2755      return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
2756    else if (VT == MVT::v16i8)
2757      return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
2758    else if (VT == MVT::v4i16)
2759      return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
2760    else if (VT == MVT::v8i16)
2761      return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
2762    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2763      return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
2764    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2765      return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
2766    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2767      return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
2768    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2769      return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
2770    break;
2771  }
2772  case AArch64ISD::LD4DUPpost: {
2773    if (VT == MVT::v8i8)
2774      return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
2775    else if (VT == MVT::v16i8)
2776      return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
2777    else if (VT == MVT::v4i16)
2778      return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
2779    else if (VT == MVT::v8i16)
2780      return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
2781    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2782      return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
2783    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2784      return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
2785    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2786      return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
2787    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2788      return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
2789    break;
2790  }
2791  case AArch64ISD::LD1LANEpost: {
2792    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2793      return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
2794    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2795      return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
2796    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2797             VT == MVT::v2f32)
2798      return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
2799    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2800             VT == MVT::v1f64)
2801      return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
2802    break;
2803  }
2804  case AArch64ISD::LD2LANEpost: {
2805    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2806      return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
2807    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2808      return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
2809    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2810             VT == MVT::v2f32)
2811      return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
2812    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2813             VT == MVT::v1f64)
2814      return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
2815    break;
2816  }
2817  case AArch64ISD::LD3LANEpost: {
2818    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2819      return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
2820    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2821      return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
2822    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2823             VT == MVT::v2f32)
2824      return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
2825    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2826             VT == MVT::v1f64)
2827      return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
2828    break;
2829  }
2830  case AArch64ISD::LD4LANEpost: {
2831    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2832      return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
2833    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2834      return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
2835    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2836             VT == MVT::v2f32)
2837      return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
2838    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2839             VT == MVT::v1f64)
2840      return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
2841    break;
2842  }
2843  case AArch64ISD::ST2post: {
2844    VT = Node->getOperand(1).getValueType();
2845    if (VT == MVT::v8i8)
2846      return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
2847    else if (VT == MVT::v16i8)
2848      return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
2849    else if (VT == MVT::v4i16)
2850      return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
2851    else if (VT == MVT::v8i16)
2852      return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
2853    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2854      return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
2855    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2856      return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
2857    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2858      return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
2859    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2860      return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
2861    break;
2862  }
2863  case AArch64ISD::ST3post: {
2864    VT = Node->getOperand(1).getValueType();
2865    if (VT == MVT::v8i8)
2866      return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
2867    else if (VT == MVT::v16i8)
2868      return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
2869    else if (VT == MVT::v4i16)
2870      return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
2871    else if (VT == MVT::v8i16)
2872      return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
2873    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2874      return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
2875    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2876      return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
2877    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2878      return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
2879    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2880      return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
2881    break;
2882  }
2883  case AArch64ISD::ST4post: {
2884    VT = Node->getOperand(1).getValueType();
2885    if (VT == MVT::v8i8)
2886      return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
2887    else if (VT == MVT::v16i8)
2888      return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
2889    else if (VT == MVT::v4i16)
2890      return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
2891    else if (VT == MVT::v8i16)
2892      return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
2893    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2894      return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
2895    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2896      return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
2897    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2898      return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
2899    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2900      return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
2901    break;
2902  }
2903  case AArch64ISD::ST1x2post: {
2904    VT = Node->getOperand(1).getValueType();
2905    if (VT == MVT::v8i8)
2906      return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
2907    else if (VT == MVT::v16i8)
2908      return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
2909    else if (VT == MVT::v4i16)
2910      return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
2911    else if (VT == MVT::v8i16)
2912      return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
2913    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2914      return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
2915    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2916      return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
2917    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2918      return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
2919    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2920      return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
2921    break;
2922  }
2923  case AArch64ISD::ST1x3post: {
2924    VT = Node->getOperand(1).getValueType();
2925    if (VT == MVT::v8i8)
2926      return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
2927    else if (VT == MVT::v16i8)
2928      return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
2929    else if (VT == MVT::v4i16)
2930      return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
2931    else if (VT == MVT::v8i16)
2932      return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
2933    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2934      return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
2935    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2936      return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
2937    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2938      return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
2939    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2940      return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
2941    break;
2942  }
2943  case AArch64ISD::ST1x4post: {
2944    VT = Node->getOperand(1).getValueType();
2945    if (VT == MVT::v8i8)
2946      return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
2947    else if (VT == MVT::v16i8)
2948      return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
2949    else if (VT == MVT::v4i16)
2950      return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
2951    else if (VT == MVT::v8i16)
2952      return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
2953    else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2954      return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
2955    else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2956      return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
2957    else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2958      return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
2959    else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2960      return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
2961    break;
2962  }
2963  case AArch64ISD::ST2LANEpost: {
2964    VT = Node->getOperand(1).getValueType();
2965    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2966      return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
2967    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2968      return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
2969    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2970             VT == MVT::v2f32)
2971      return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
2972    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2973             VT == MVT::v1f64)
2974      return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
2975    break;
2976  }
2977  case AArch64ISD::ST3LANEpost: {
2978    VT = Node->getOperand(1).getValueType();
2979    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2980      return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
2981    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2982      return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
2983    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2984             VT == MVT::v2f32)
2985      return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
2986    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2987             VT == MVT::v1f64)
2988      return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
2989    break;
2990  }
2991  case AArch64ISD::ST4LANEpost: {
2992    VT = Node->getOperand(1).getValueType();
2993    if (VT == MVT::v16i8 || VT == MVT::v8i8)
2994      return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
2995    else if (VT == MVT::v8i16 || VT == MVT::v4i16)
2996      return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
2997    else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2998             VT == MVT::v2f32)
2999      return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
3000    else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3001             VT == MVT::v1f64)
3002      return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
3003    break;
3004  }
3005
3006  case ISD::FCEIL:
3007  case ISD::FFLOOR:
3008  case ISD::FTRUNC:
3009  case ISD::FROUND:
3010    if (SDNode *I = SelectLIBM(Node))
3011      return I;
3012    break;
3013  }
3014
3015  // Select the default instruction
3016  ResNode = SelectCode(Node);
3017
3018  DEBUG(errs() << "=> ");
3019  if (ResNode == nullptr || ResNode == Node)
3020    DEBUG(Node->dump(CurDAG));
3021  else
3022    DEBUG(ResNode->dump(CurDAG));
3023  DEBUG(errs() << "\n");
3024
3025  return ResNode;
3026}
3027
3028/// createAArch64ISelDag - This pass converts a legalized DAG into a
3029/// AArch64-specific DAG, ready for instruction scheduling.
3030FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
3031                                         CodeGenOpt::Level OptLevel) {
3032  return new AArch64DAGToDAGISel(TM, OptLevel);
3033}
3034