AArch64ISelDAGToDAG.cpp revision f116f8a63fb26feb0f8c59ba4d69d7954393d7d1
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the AArch64 target.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "aarch64-isel"
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64Subtarget.h"
18#include "AArch64TargetMachine.h"
19#include "Utils/AArch64BaseInfo.h"
20#include "llvm/ADT/APSInt.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/IR/GlobalValue.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_ostream.h"
25
26using namespace llvm;
27
28//===--------------------------------------------------------------------===//
29/// AArch64 specific code to select AArch64 machine instructions for
30/// SelectionDAG operations.
31///
32namespace {
33
34class AArch64DAGToDAGISel : public SelectionDAGISel {
35  AArch64TargetMachine &TM;
36
37  /// Keep a pointer to the AArch64Subtarget around so that we can
38  /// make the right decision when generating code for different targets.
39  const AArch64Subtarget *Subtarget;
40
41public:
42  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
43                               CodeGenOpt::Level OptLevel)
44    : SelectionDAGISel(tm, OptLevel), TM(tm),
45      Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
46  }
47
48  virtual const char *getPassName() const {
49    return "AArch64 Instruction Selection";
50  }
51
52  // Include the pieces autogenerated from the target description.
53#include "AArch64GenDAGISel.inc"
54
55  template<unsigned MemSize>
56  bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
57    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
58    if (!CN || CN->getZExtValue() % MemSize != 0
59        || CN->getZExtValue() / MemSize > 0xfff)
60      return false;
61
62    UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
63    return true;
64  }
65
66  template<unsigned RegWidth>
67  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
68    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
69  }
70
71  /// Used for pre-lowered address-reference nodes, so we already know
72  /// the fields match. This operand's job is simply to add an
73  /// appropriate shift operand to the MOVZ/MOVK instruction.
74  template<unsigned LogShift>
75  bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
76    Imm = N;
77    Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
78    return true;
79  }
80
81  bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
82
83  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
84                                unsigned RegWidth);
85
86  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
87                                    char ConstraintCode,
88                                    std::vector<SDValue> &OutOps);
89
90  bool SelectLogicalImm(SDValue N, SDValue &Imm);
91
92  template<unsigned RegWidth>
93  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
94    return SelectTSTBOperand(N, FixedPos, RegWidth);
95  }
96
97  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
98
99  SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
100                       unsigned Op64);
101
102  /// Put the given constant into a pool and return a DAG which will give its
103  /// address.
104  SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
105
106  SDNode *TrySelectToMoveImm(SDNode *N);
107  SDNode *LowerToFPLitPool(SDNode *Node);
108  SDNode *SelectToLitPool(SDNode *N);
109
110  SDNode* Select(SDNode*);
111private:
112  /// Get the opcode for table lookup instruction
113  unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
114
115  /// Select NEON table lookup intrinsics.  NumVecs should be 1, 2, 3 or 4.
116  /// IsExt is to indicate if the result will be extended with an argument.
117  SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
118
119  /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
120  SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
121                    const uint16_t *Opcode);
122
123  /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
124  SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating,
125                    const uint16_t *Opcodes);
126
127  /// Form sequences of consecutive 64/128-bit registers for use in NEON
128  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
129  /// between 1 and 4 elements. If it contains a single element that is returned
130  /// unchanged; otherwise a REG_SEQUENCE value is returned.
131  SDValue createDTuple(ArrayRef<SDValue> Vecs);
132  SDValue createQTuple(ArrayRef<SDValue> Vecs);
133
134  /// Generic helper for the createDTuple/createQTuple
135  /// functions. Those should almost always be called instead.
136  SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
137                      unsigned SubRegs[]);
138};
139}
140
141bool
142AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
143                                              unsigned RegWidth) {
144  const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
145  if (!CN) return false;
146
147  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
148  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
149  // x-register.
150  //
151  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
152  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
153  // integers.
154  bool IsExact;
155
156  // fbits is between 1 and 64 in the worst-case, which means the fmul
157  // could have 2^64 as an actual operand. Need 65 bits of precision.
158  APSInt IntVal(65, true);
159  CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
160
161  // N.b. isPowerOf2 also checks for > 0.
162  if (!IsExact || !IntVal.isPowerOf2()) return false;
163  unsigned FBits = IntVal.logBase2();
164
165  // Checks above should have guaranteed that we haven't lost information in
166  // finding FBits, but it must still be in range.
167  if (FBits == 0 || FBits > RegWidth) return false;
168
169  FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
170  return true;
171}
172
173bool
174AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
175                                                 char ConstraintCode,
176                                                 std::vector<SDValue> &OutOps) {
177  switch (ConstraintCode) {
178  default: llvm_unreachable("Unrecognised AArch64 memory constraint");
179  case 'm':
180    // FIXME: more freedom is actually permitted for 'm'. We can go
181    // hunting for a base and an offset if we want. Of course, since
182    // we don't really know how the operand is going to be used we're
183    // probably restricted to the load/store pair's simm7 as an offset
184    // range anyway.
185  case 'Q':
186    OutOps.push_back(Op);
187  }
188
189  return false;
190}
191
192bool
193AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
194  ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
195  if (!Imm || !Imm->getValueAPF().isPosZero())
196    return false;
197
198  // Doesn't actually carry any information, but keeps TableGen quiet.
199  Dummy = CurDAG->getTargetConstant(0, MVT::i32);
200  return true;
201}
202
203bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
204  uint32_t Bits;
205  uint32_t RegWidth = N.getValueType().getSizeInBits();
206
207  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
208  if (!CN) return false;
209
210  if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
211    return false;
212
213  Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
214  return true;
215}
216
217SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
218  SDNode *ResNode;
219  SDLoc dl(Node);
220  EVT DestType = Node->getValueType(0);
221  unsigned DestWidth = DestType.getSizeInBits();
222
223  unsigned MOVOpcode;
224  EVT MOVType;
225  int UImm16, Shift;
226  uint32_t LogicalBits;
227
228  uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
229  if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
230    MOVType = DestType;
231    MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
232  } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
233    MOVType = DestType;
234    MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
235  } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
236    // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
237    // use a 32-bit instruction: "movn w0, 0xedbc".
238    MOVType = MVT::i32;
239    MOVOpcode = AArch64::MOVNwii;
240  } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
241    MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
242    uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
243
244    return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
245                              CurDAG->getRegister(ZR, DestType),
246                              CurDAG->getTargetConstant(LogicalBits, MVT::i32));
247  } else {
248    // Can't handle it in one instruction. There's scope for permitting two (or
249    // more) instructions, but that'll need more thought.
250    return NULL;
251  }
252
253  ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
254                                   CurDAG->getTargetConstant(UImm16, MVT::i32),
255                                   CurDAG->getTargetConstant(Shift, MVT::i32));
256
257  if (MOVType != DestType) {
258    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
259                          MVT::i64, MVT::i32, MVT::Other,
260                          CurDAG->getTargetConstant(0, MVT::i64),
261                          SDValue(ResNode, 0),
262                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
263  }
264
265  return ResNode;
266}
267
268SDValue
269AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
270                                                const Constant *CV) {
271  EVT PtrVT = getTargetLowering()->getPointerTy();
272
273  switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
274  case CodeModel::Small: {
275    unsigned Alignment =
276      getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
277    return CurDAG->getNode(
278        AArch64ISD::WrapperSmall, DL, PtrVT,
279        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
280        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
281        CurDAG->getConstant(Alignment, MVT::i32));
282  }
283  case CodeModel::Large: {
284    SDNode *LitAddr;
285    LitAddr = CurDAG->getMachineNode(
286        AArch64::MOVZxii, DL, PtrVT,
287        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
288        CurDAG->getTargetConstant(3, MVT::i32));
289    LitAddr = CurDAG->getMachineNode(
290        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
291        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
292        CurDAG->getTargetConstant(2, MVT::i32));
293    LitAddr = CurDAG->getMachineNode(
294        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
295        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
296        CurDAG->getTargetConstant(1, MVT::i32));
297    LitAddr = CurDAG->getMachineNode(
298        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
299        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
300        CurDAG->getTargetConstant(0, MVT::i32));
301    return SDValue(LitAddr, 0);
302  }
303  default:
304    llvm_unreachable("Only small and large code models supported now");
305  }
306}
307
308SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
309  SDLoc DL(Node);
310  uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
311  int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
312  EVT DestType = Node->getValueType(0);
313
314  // Since we may end up loading a 64-bit constant from a 32-bit entry the
315  // constant in the pool may have a different type to the eventual node.
316  ISD::LoadExtType Extension;
317  EVT MemType;
318
319  assert((DestType == MVT::i64 || DestType == MVT::i32)
320         && "Only expect integer constants at the moment");
321
322  if (DestType == MVT::i32) {
323    Extension = ISD::NON_EXTLOAD;
324    MemType = MVT::i32;
325  } else if (UnsignedVal <= UINT32_MAX) {
326    Extension = ISD::ZEXTLOAD;
327    MemType = MVT::i32;
328  } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
329    Extension = ISD::SEXTLOAD;
330    MemType = MVT::i32;
331  } else {
332    Extension = ISD::NON_EXTLOAD;
333    MemType = MVT::i64;
334  }
335
336  Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
337                                                  MemType.getSizeInBits()),
338                                  UnsignedVal);
339  SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
340  unsigned Alignment =
341    getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
342
343  return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
344                            PoolAddr,
345                            MachinePointerInfo::getConstantPool(), MemType,
346                            /* isVolatile = */ false,
347                            /* isNonTemporal = */ false,
348                            Alignment).getNode();
349}
350
351SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
352  SDLoc DL(Node);
353  const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
354  EVT DestType = Node->getValueType(0);
355
356  unsigned Alignment =
357    getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
358  SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
359
360  return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
361                         MachinePointerInfo::getConstantPool(),
362                         /* isVolatile = */ false,
363                         /* isNonTemporal = */ false,
364                         /* isInvariant = */ true,
365                         Alignment).getNode();
366}
367
368bool
369AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
370                                       unsigned RegWidth) {
371  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
372  if (!CN) return false;
373
374  uint64_t Val = CN->getZExtValue();
375
376  if (!isPowerOf2_64(Val)) return false;
377
378  unsigned TestedBit = Log2_64(Val);
379  // Checks above should have guaranteed that we haven't lost information in
380  // finding TestedBit, but it must still be in range.
381  if (TestedBit >= RegWidth) return false;
382
383  FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
384  return true;
385}
386
387SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
388                                          unsigned Op16,unsigned Op32,
389                                          unsigned Op64) {
390  // Mostly direct translation to the given operations, except that we preserve
391  // the AtomicOrdering for use later on.
392  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
393  EVT VT = AN->getMemoryVT();
394
395  unsigned Op;
396  if (VT == MVT::i8)
397    Op = Op8;
398  else if (VT == MVT::i16)
399    Op = Op16;
400  else if (VT == MVT::i32)
401    Op = Op32;
402  else if (VT == MVT::i64)
403    Op = Op64;
404  else
405    llvm_unreachable("Unexpected atomic operation");
406
407  SmallVector<SDValue, 4> Ops;
408  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
409      Ops.push_back(AN->getOperand(i));
410
411  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
412  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
413
414  return CurDAG->SelectNodeTo(Node, Op,
415                              AN->getValueType(0), MVT::Other,
416                              &Ops[0], Ops.size());
417}
418
419SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
420  static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
421                                    AArch64::DTripleRegClassID,
422                                    AArch64::DQuadRegClassID };
423  static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
424                                AArch64::dsub_2, AArch64::dsub_3 };
425
426  return createTuple(Regs, RegClassIDs, SubRegs);
427}
428
429SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
430  static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
431                                    AArch64::QTripleRegClassID,
432                                    AArch64::QQuadRegClassID };
433  static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
434                                AArch64::qsub_2, AArch64::qsub_3 };
435
436  return createTuple(Regs, RegClassIDs, SubRegs);
437}
438
439SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
440                                         unsigned RegClassIDs[],
441                                         unsigned SubRegs[]) {
442  // There's no special register-class for a vector-list of 1 element: it's just
443  // a vector.
444  if (Regs.size() == 1)
445    return Regs[0];
446
447  assert(Regs.size() >= 2 && Regs.size() <= 4);
448
449  SDLoc DL(Regs[0].getNode());
450
451  SmallVector<SDValue, 4> Ops;
452
453  // First operand of REG_SEQUENCE is the desired RegClass.
454  Ops.push_back(
455      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
456
457  // Then we get pairs of source & subregister-position for the components.
458  for (unsigned i = 0; i < Regs.size(); ++i) {
459    Ops.push_back(Regs[i]);
460    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
461  }
462
463  SDNode *N =
464      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
465  return SDValue(N, 0);
466}
467
468
469// Get the register stride update opcode of a VLD/VST instruction that
470// is otherwise equivalent to the given fixed stride updating instruction.
471static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
472  switch (Opc) {
473  default: break;
474  case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
475  case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
476  case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
477  case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
478  case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
479  case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
480  case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
481  case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
482
483  case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
484  case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
485  case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
486  case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register;
487  case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
488  case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
489  case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
490  case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
491
492  case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
493  case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
494  case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
495  case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register;
496  case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
497  case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
498  case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
499  case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
500
501  case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
502  case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
503  case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
504  case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register;
505  case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
506  case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
507  case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
508  case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
509
510  case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
511  case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
512  case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
513  case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
514  case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
515  case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
516  case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
517  case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
518
519  case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
520  case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
521  case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
522  case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register;
523  case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
524  case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
525  case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
526  case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
527
528  case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
529  case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
530  case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
531  case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register;
532  case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
533  case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
534  case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
535  case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
536
537  case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
538  case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
539  case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
540  case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register;
541  case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
542  case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
543  case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
544  case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
545  }
546  return Opc; // If not one we handle, return it unchanged.
547}
548
549SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
550                                       bool isUpdating,
551                                       const uint16_t *Opcodes) {
552  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
553
554  EVT VT = N->getValueType(0);
555  unsigned OpcodeIndex;
556  switch (VT.getSimpleVT().SimpleTy) {
557  default: llvm_unreachable("unhandled vector load type");
558  case MVT::v8i8:  OpcodeIndex = 0; break;
559  case MVT::v4i16: OpcodeIndex = 1; break;
560  case MVT::v2f32:
561  case MVT::v2i32: OpcodeIndex = 2; break;
562  case MVT::v1f64:
563  case MVT::v1i64: OpcodeIndex = 3; break;
564  case MVT::v16i8: OpcodeIndex = 4; break;
565  case MVT::v8f16:
566  case MVT::v8i16: OpcodeIndex = 5; break;
567  case MVT::v4f32:
568  case MVT::v4i32: OpcodeIndex = 6; break;
569  case MVT::v2f64:
570  case MVT::v2i64: OpcodeIndex = 7; break;
571  }
572  unsigned Opc = Opcodes[OpcodeIndex];
573
574  SmallVector<SDValue, 2> Ops;
575  unsigned AddrOpIdx = isUpdating ? 1 : 2;
576  Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
577
578  if (isUpdating) {
579    SDValue Inc = N->getOperand(AddrOpIdx + 1);
580    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
581      Opc = getVLDSTRegisterUpdateOpcode(Opc);
582    Ops.push_back(Inc);
583  }
584
585  Ops.push_back(N->getOperand(0)); // Push back the Chain
586
587  std::vector<EVT> ResTys;
588  bool is64BitVector = VT.is64BitVector();
589
590  if (NumVecs == 1)
591    ResTys.push_back(VT);
592  else if (NumVecs == 3)
593    ResTys.push_back(MVT::Untyped);
594  else {
595    EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
596                                 is64BitVector ? NumVecs : NumVecs * 2);
597    ResTys.push_back(ResTy);
598  }
599
600  if (isUpdating)
601    ResTys.push_back(MVT::i64); // Type of the updated register
602  ResTys.push_back(MVT::Other); // Type of the Chain
603  SDLoc dl(N);
604  SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
605
606  // Transfer memoperands.
607  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
608  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
609  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
610
611  if (NumVecs == 1)
612    return VLd;
613
614  // If NumVecs > 1, the return result is a super register containing 2-4
615  // consecutive vector registers.
616  SDValue SuperReg = SDValue(VLd, 0);
617
618  unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
619  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
620    ReplaceUses(SDValue(N, Vec),
621                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
622  // Update users of the Chain
623  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
624  if (isUpdating)
625    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
626
627  return NULL;
628}
629
630SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
631                                       bool isUpdating,
632                                       const uint16_t *Opcodes) {
633  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
634  SDLoc dl(N);
635
636  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
637  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
638
639  unsigned AddrOpIdx = isUpdating ? 1 : 2;
640  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
641  EVT VT = N->getOperand(Vec0Idx).getValueType();
642  unsigned OpcodeIndex;
643  switch (VT.getSimpleVT().SimpleTy) {
644  default: llvm_unreachable("unhandled vector store type");
645  case MVT::v8i8:  OpcodeIndex = 0; break;
646  case MVT::v4i16: OpcodeIndex = 1; break;
647  case MVT::v2f32:
648  case MVT::v2i32: OpcodeIndex = 2; break;
649  case MVT::v1f64:
650  case MVT::v1i64: OpcodeIndex = 3; break;
651  case MVT::v16i8: OpcodeIndex = 4; break;
652  case MVT::v8f16:
653  case MVT::v8i16: OpcodeIndex = 5; break;
654  case MVT::v4f32:
655  case MVT::v4i32: OpcodeIndex = 6; break;
656  case MVT::v2f64:
657  case MVT::v2i64: OpcodeIndex = 7; break;
658  }
659  unsigned Opc = Opcodes[OpcodeIndex];
660
661  std::vector<EVT> ResTys;
662  if (isUpdating)
663    ResTys.push_back(MVT::i64);
664  ResTys.push_back(MVT::Other); // Type for the Chain
665
666  SmallVector<SDValue, 6> Ops;
667  Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
668
669  if (isUpdating) {
670    SDValue Inc = N->getOperand(AddrOpIdx + 1);
671    if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
672      Opc = getVLDSTRegisterUpdateOpcode(Opc);
673    Ops.push_back(Inc);
674  }
675  bool is64BitVector = VT.is64BitVector();
676
677  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
678                               N->op_begin() + Vec0Idx + NumVecs);
679  SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
680  Ops.push_back(SrcReg);
681
682  // Push back the Chain
683  Ops.push_back(N->getOperand(0));
684
685  // Transfer memoperands.
686  SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
687  cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
688
689  return VSt;
690}
691
692unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
693                                        unsigned NumOfVec) {
694  assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
695
696  unsigned Opc = 0;
697  switch (NumOfVec) {
698  default:
699    break;
700  case 1:
701    if (IsExt)
702      Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
703    else
704      Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
705    break;
706  case 2:
707    if (IsExt)
708      Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
709    else
710      Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
711    break;
712  case 3:
713    if (IsExt)
714      Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
715    else
716      Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
717    break;
718  case 4:
719    if (IsExt)
720      Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
721    else
722      Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
723    break;
724  }
725
726  return Opc;
727}
728
729SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
730                                        bool IsExt) {
731  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
732  SDLoc dl(N);
733
734  // Check the element of look up table is 64-bit or not
735  unsigned Vec0Idx = IsExt ? 2 : 1;
736  assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
737         "The element of lookup table for vtbl and vtbx must be 128-bit");
738
739  // Check the return value type is 64-bit or not
740  EVT ResVT = N->getValueType(0);
741  bool is64BitRes = ResVT.is64BitVector();
742
743  // Create new SDValue for vector list
744  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
745                               N->op_begin() + Vec0Idx + NumVecs);
746  SDValue TblReg = createQTuple(Regs);
747  unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
748
749  SmallVector<SDValue, 3> Ops;
750  if (IsExt)
751    Ops.push_back(N->getOperand(1));
752  Ops.push_back(TblReg);
753  Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
754  return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
755}
756
757SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
758  // Dump information about the Node being selected
759  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
760
761  if (Node->isMachineOpcode()) {
762    DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
763    Node->setNodeId(-1);
764    return NULL;
765  }
766
767  switch (Node->getOpcode()) {
768  case ISD::ATOMIC_LOAD_ADD:
769    return SelectAtomic(Node,
770                        AArch64::ATOMIC_LOAD_ADD_I8,
771                        AArch64::ATOMIC_LOAD_ADD_I16,
772                        AArch64::ATOMIC_LOAD_ADD_I32,
773                        AArch64::ATOMIC_LOAD_ADD_I64);
774  case ISD::ATOMIC_LOAD_SUB:
775    return SelectAtomic(Node,
776                        AArch64::ATOMIC_LOAD_SUB_I8,
777                        AArch64::ATOMIC_LOAD_SUB_I16,
778                        AArch64::ATOMIC_LOAD_SUB_I32,
779                        AArch64::ATOMIC_LOAD_SUB_I64);
780  case ISD::ATOMIC_LOAD_AND:
781    return SelectAtomic(Node,
782                        AArch64::ATOMIC_LOAD_AND_I8,
783                        AArch64::ATOMIC_LOAD_AND_I16,
784                        AArch64::ATOMIC_LOAD_AND_I32,
785                        AArch64::ATOMIC_LOAD_AND_I64);
786  case ISD::ATOMIC_LOAD_OR:
787    return SelectAtomic(Node,
788                        AArch64::ATOMIC_LOAD_OR_I8,
789                        AArch64::ATOMIC_LOAD_OR_I16,
790                        AArch64::ATOMIC_LOAD_OR_I32,
791                        AArch64::ATOMIC_LOAD_OR_I64);
792  case ISD::ATOMIC_LOAD_XOR:
793    return SelectAtomic(Node,
794                        AArch64::ATOMIC_LOAD_XOR_I8,
795                        AArch64::ATOMIC_LOAD_XOR_I16,
796                        AArch64::ATOMIC_LOAD_XOR_I32,
797                        AArch64::ATOMIC_LOAD_XOR_I64);
798  case ISD::ATOMIC_LOAD_NAND:
799    return SelectAtomic(Node,
800                        AArch64::ATOMIC_LOAD_NAND_I8,
801                        AArch64::ATOMIC_LOAD_NAND_I16,
802                        AArch64::ATOMIC_LOAD_NAND_I32,
803                        AArch64::ATOMIC_LOAD_NAND_I64);
804  case ISD::ATOMIC_LOAD_MIN:
805    return SelectAtomic(Node,
806                        AArch64::ATOMIC_LOAD_MIN_I8,
807                        AArch64::ATOMIC_LOAD_MIN_I16,
808                        AArch64::ATOMIC_LOAD_MIN_I32,
809                        AArch64::ATOMIC_LOAD_MIN_I64);
810  case ISD::ATOMIC_LOAD_MAX:
811    return SelectAtomic(Node,
812                        AArch64::ATOMIC_LOAD_MAX_I8,
813                        AArch64::ATOMIC_LOAD_MAX_I16,
814                        AArch64::ATOMIC_LOAD_MAX_I32,
815                        AArch64::ATOMIC_LOAD_MAX_I64);
816  case ISD::ATOMIC_LOAD_UMIN:
817    return SelectAtomic(Node,
818                        AArch64::ATOMIC_LOAD_UMIN_I8,
819                        AArch64::ATOMIC_LOAD_UMIN_I16,
820                        AArch64::ATOMIC_LOAD_UMIN_I32,
821                        AArch64::ATOMIC_LOAD_UMIN_I64);
822  case ISD::ATOMIC_LOAD_UMAX:
823    return SelectAtomic(Node,
824                        AArch64::ATOMIC_LOAD_UMAX_I8,
825                        AArch64::ATOMIC_LOAD_UMAX_I16,
826                        AArch64::ATOMIC_LOAD_UMAX_I32,
827                        AArch64::ATOMIC_LOAD_UMAX_I64);
828  case ISD::ATOMIC_SWAP:
829    return SelectAtomic(Node,
830                        AArch64::ATOMIC_SWAP_I8,
831                        AArch64::ATOMIC_SWAP_I16,
832                        AArch64::ATOMIC_SWAP_I32,
833                        AArch64::ATOMIC_SWAP_I64);
834  case ISD::ATOMIC_CMP_SWAP:
835    return SelectAtomic(Node,
836                        AArch64::ATOMIC_CMP_SWAP_I8,
837                        AArch64::ATOMIC_CMP_SWAP_I16,
838                        AArch64::ATOMIC_CMP_SWAP_I32,
839                        AArch64::ATOMIC_CMP_SWAP_I64);
840  case ISD::FrameIndex: {
841    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
842    EVT PtrTy = getTargetLowering()->getPointerTy();
843    SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
844    return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
845                                TFI, CurDAG->getTargetConstant(0, PtrTy));
846  }
847  case ISD::ConstantPool: {
848    // Constant pools are fine, just create a Target entry.
849    ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
850    const Constant *C = CN->getConstVal();
851    SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
852
853    ReplaceUses(SDValue(Node, 0), CP);
854    return NULL;
855  }
856  case ISD::Constant: {
857    SDNode *ResNode = 0;
858    if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
859      // XZR and WZR are probably even better than an actual move: most of the
860      // time they can be folded into another instruction with *no* cost.
861
862      EVT Ty = Node->getValueType(0);
863      assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
864      uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
865      ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
866                                       SDLoc(Node),
867                                       Register, Ty).getNode();
868    }
869
870    // Next best option is a move-immediate, see if we can do that.
871    if (!ResNode) {
872      ResNode = TrySelectToMoveImm(Node);
873    }
874
875    if (ResNode)
876      return ResNode;
877
878    // If even that fails we fall back to a lit-pool entry at the moment. Future
879    // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
880    ResNode = SelectToLitPool(Node);
881    assert(ResNode && "We need *some* way to materialise a constant");
882
883    // We want to continue selection at this point since the litpool access
884    // generated used generic nodes for simplicity.
885    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
886    Node = ResNode;
887    break;
888  }
889  case ISD::ConstantFP: {
890    if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
891      // FMOV will take care of it from TableGen
892      break;
893    }
894
895    SDNode *ResNode = LowerToFPLitPool(Node);
896    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
897
898    // We want to continue selection at this point since the litpool access
899    // generated used generic nodes for simplicity.
900    Node = ResNode;
901    break;
902  }
903  case AArch64ISD::NEON_LD1_UPD: {
904    static const uint16_t Opcodes[] = {
905      AArch64::LD1WB_8B_fixed,  AArch64::LD1WB_4H_fixed,
906      AArch64::LD1WB_2S_fixed,  AArch64::LD1WB_1D_fixed,
907      AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
908      AArch64::LD1WB_4S_fixed,  AArch64::LD1WB_2D_fixed
909    };
910    return SelectVLD(Node, 1, true, Opcodes);
911  }
912  case AArch64ISD::NEON_LD2_UPD: {
913    static const uint16_t Opcodes[] = {
914      AArch64::LD2WB_8B_fixed,  AArch64::LD2WB_4H_fixed,
915      AArch64::LD2WB_2S_fixed,  AArch64::LD1WB2V_1D_fixed,
916      AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
917      AArch64::LD2WB_4S_fixed,  AArch64::LD2WB_2D_fixed
918    };
919    return SelectVLD(Node, 2, true, Opcodes);
920  }
921  case AArch64ISD::NEON_LD3_UPD: {
922    static const uint16_t Opcodes[] = {
923      AArch64::LD3WB_8B_fixed,  AArch64::LD3WB_4H_fixed,
924      AArch64::LD3WB_2S_fixed,  AArch64::LD1WB3V_1D_fixed,
925      AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
926      AArch64::LD3WB_4S_fixed,  AArch64::LD3WB_2D_fixed
927    };
928    return SelectVLD(Node, 3, true, Opcodes);
929  }
930  case AArch64ISD::NEON_LD4_UPD: {
931    static const uint16_t Opcodes[] = {
932      AArch64::LD4WB_8B_fixed,  AArch64::LD4WB_4H_fixed,
933      AArch64::LD4WB_2S_fixed,  AArch64::LD1WB4V_1D_fixed,
934      AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
935      AArch64::LD4WB_4S_fixed,  AArch64::LD4WB_2D_fixed
936    };
937    return SelectVLD(Node, 4, true, Opcodes);
938  }
939  case AArch64ISD::NEON_ST1_UPD: {
940    static const uint16_t Opcodes[] = {
941      AArch64::ST1WB_8B_fixed,  AArch64::ST1WB_4H_fixed,
942      AArch64::ST1WB_2S_fixed,  AArch64::ST1WB_1D_fixed,
943      AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
944      AArch64::ST1WB_4S_fixed,  AArch64::ST1WB_2D_fixed
945    };
946    return SelectVST(Node, 1, true, Opcodes);
947  }
948  case AArch64ISD::NEON_ST2_UPD: {
949    static const uint16_t Opcodes[] = {
950      AArch64::ST2WB_8B_fixed,  AArch64::ST2WB_4H_fixed,
951      AArch64::ST2WB_2S_fixed,  AArch64::ST1WB2V_1D_fixed,
952      AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
953      AArch64::ST2WB_4S_fixed,  AArch64::ST2WB_2D_fixed
954    };
955    return SelectVST(Node, 2, true, Opcodes);
956  }
957  case AArch64ISD::NEON_ST3_UPD: {
958    static const uint16_t Opcodes[] = {
959      AArch64::ST3WB_8B_fixed,  AArch64::ST3WB_4H_fixed,
960      AArch64::ST3WB_2S_fixed,  AArch64::ST1WB3V_1D_fixed,
961      AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
962      AArch64::ST3WB_4S_fixed,  AArch64::ST3WB_2D_fixed
963    };
964    return SelectVST(Node, 3, true, Opcodes);
965  }
966  case AArch64ISD::NEON_ST4_UPD: {
967    static const uint16_t Opcodes[] = {
968      AArch64::ST4WB_8B_fixed,  AArch64::ST4WB_4H_fixed,
969      AArch64::ST4WB_2S_fixed,  AArch64::ST1WB4V_1D_fixed,
970      AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
971      AArch64::ST4WB_4S_fixed,  AArch64::ST4WB_2D_fixed
972    };
973    return SelectVST(Node, 4, true, Opcodes);
974  }
975  case ISD::INTRINSIC_WO_CHAIN: {
976    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
977    bool IsExt = false;
978    switch (IntNo) {
979      default:
980        break;
981      case Intrinsic::aarch64_neon_vtbx1:
982        IsExt = true;
983      case Intrinsic::aarch64_neon_vtbl1:
984        return SelectVTBL(Node, 1, IsExt);
985      case Intrinsic::aarch64_neon_vtbx2:
986        IsExt = true;
987      case Intrinsic::aarch64_neon_vtbl2:
988        return SelectVTBL(Node, 2, IsExt);
989      case Intrinsic::aarch64_neon_vtbx3:
990        IsExt = true;
991      case Intrinsic::aarch64_neon_vtbl3:
992        return SelectVTBL(Node, 3, IsExt);
993      case Intrinsic::aarch64_neon_vtbx4:
994        IsExt = true;
995      case Intrinsic::aarch64_neon_vtbl4:
996        return SelectVTBL(Node, 4, IsExt);
997    }
998    break;
999  }
1000  case ISD::INTRINSIC_VOID:
1001  case ISD::INTRINSIC_W_CHAIN: {
1002    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1003    switch (IntNo) {
1004    default:
1005      break;
1006
1007    case Intrinsic::arm_neon_vld1: {
1008      static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H,
1009                                          AArch64::LD1_2S,  AArch64::LD1_1D,
1010                                          AArch64::LD1_16B, AArch64::LD1_8H,
1011                                          AArch64::LD1_4S,  AArch64::LD1_2D };
1012      return SelectVLD(Node, 1, false, Opcodes);
1013    }
1014    case Intrinsic::arm_neon_vld2: {
1015      static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H,
1016                                          AArch64::LD2_2S,  AArch64::LD1_2V_1D,
1017                                          AArch64::LD2_16B, AArch64::LD2_8H,
1018                                          AArch64::LD2_4S,  AArch64::LD2_2D };
1019      return SelectVLD(Node, 2, false, Opcodes);
1020    }
1021    case Intrinsic::arm_neon_vld3: {
1022      static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H,
1023                                          AArch64::LD3_2S,  AArch64::LD1_3V_1D,
1024                                          AArch64::LD3_16B, AArch64::LD3_8H,
1025                                          AArch64::LD3_4S,  AArch64::LD3_2D };
1026      return SelectVLD(Node, 3, false, Opcodes);
1027    }
1028    case Intrinsic::arm_neon_vld4: {
1029      static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H,
1030                                          AArch64::LD4_2S,  AArch64::LD1_4V_1D,
1031                                          AArch64::LD4_16B, AArch64::LD4_8H,
1032                                          AArch64::LD4_4S,  AArch64::LD4_2D };
1033      return SelectVLD(Node, 4, false, Opcodes);
1034    }
1035    case Intrinsic::arm_neon_vst1: {
1036      static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H,
1037                                          AArch64::ST1_2S,  AArch64::ST1_1D,
1038                                          AArch64::ST1_16B, AArch64::ST1_8H,
1039                                          AArch64::ST1_4S,  AArch64::ST1_2D };
1040      return SelectVST(Node, 1, false, Opcodes);
1041    }
1042    case Intrinsic::arm_neon_vst2: {
1043      static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H,
1044                                          AArch64::ST2_2S,  AArch64::ST1_2V_1D,
1045                                          AArch64::ST2_16B, AArch64::ST2_8H,
1046                                          AArch64::ST2_4S,  AArch64::ST2_2D };
1047      return SelectVST(Node, 2, false, Opcodes);
1048    }
1049    case Intrinsic::arm_neon_vst3: {
1050      static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H,
1051                                          AArch64::ST3_2S,  AArch64::ST1_3V_1D,
1052                                          AArch64::ST3_16B, AArch64::ST3_8H,
1053                                          AArch64::ST3_4S,  AArch64::ST3_2D };
1054      return SelectVST(Node, 3, false, Opcodes);
1055    }
1056    case Intrinsic::arm_neon_vst4: {
1057      static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H,
1058                                          AArch64::ST4_2S,  AArch64::ST1_4V_1D,
1059                                          AArch64::ST4_16B, AArch64::ST4_8H,
1060                                          AArch64::ST4_4S,  AArch64::ST4_2D };
1061      return SelectVST(Node, 4, false, Opcodes);
1062    }
1063    }
1064    break;
1065  }
1066  default:
1067    break; // Let generic code handle it
1068  }
1069
1070  SDNode *ResNode = SelectCode(Node);
1071
1072  DEBUG(dbgs() << "=> ";
1073        if (ResNode == NULL || ResNode == Node)
1074          Node->dump(CurDAG);
1075        else
1076          ResNode->dump(CurDAG);
1077        dbgs() << "\n");
1078
1079  return ResNode;
1080}
1081
1082/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
1083/// instruction scheduling.
1084FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
1085                                         CodeGenOpt::Level OptLevel) {
1086  return new AArch64DAGToDAGISel(TM, OptLevel);
1087}
1088