1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the ARM target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMTargetMachine.h"
17#include "MCTargetDesc/ARMAddressingModes.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGISel.h"
25#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Compiler.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetLowering.h"
36#include "llvm/Target/TargetOptions.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41
42static cl::opt<bool>
43DisableShifterOp("disable-shifter-op", cl::Hidden,
44  cl::desc("Disable isel of shifter-op"),
45  cl::init(false));
46
47static cl::opt<bool>
48CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
49  cl::desc("Check fp vmla / vmls hazard at isel time"),
50  cl::init(true));
51
52//===--------------------------------------------------------------------===//
53/// ARMDAGToDAGISel - ARM specific code to select ARM machine
54/// instructions for SelectionDAG operations.
55///
56namespace {
57
58enum AddrMode2Type {
59  AM2_BASE, // Simple AM2 (+-imm12)
60  AM2_SHOP  // Shifter-op AM2
61};
62
63class ARMDAGToDAGISel : public SelectionDAGISel {
64  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
65  /// make the right decision when generating code for different targets.
66  const ARMSubtarget *Subtarget;
67
68public:
69  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
70      : SelectionDAGISel(tm, OptLevel) {}
71
72  bool runOnMachineFunction(MachineFunction &MF) override {
73    // Reset the subtarget each time through.
74    Subtarget = &MF.getSubtarget<ARMSubtarget>();
75    SelectionDAGISel::runOnMachineFunction(MF);
76    return true;
77  }
78
79  const char *getPassName() const override {
80    return "ARM Instruction Selection";
81  }
82
83  void PreprocessISelDAG() override;
84
85  /// getI32Imm - Return a target constant of type i32 with the specified
86  /// value.
87  inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
88    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
89  }
90
91  SDNode *Select(SDNode *N) override;
92
93
94  bool hasNoVMLxHazardUse(SDNode *N) const;
95  bool isShifterOpProfitable(const SDValue &Shift,
96                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
97  bool SelectRegShifterOperand(SDValue N, SDValue &A,
98                               SDValue &B, SDValue &C,
99                               bool CheckProfitability = true);
100  bool SelectImmShifterOperand(SDValue N, SDValue &A,
101                               SDValue &B, bool CheckProfitability = true);
102  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
103                                    SDValue &B, SDValue &C) {
104    // Don't apply the profitability check
105    return SelectRegShifterOperand(N, A, B, C, false);
106  }
107  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
108                                    SDValue &B) {
109    // Don't apply the profitability check
110    return SelectImmShifterOperand(N, A, B, false);
111  }
112
113  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
114  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
115
116  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
117                                      SDValue &Offset, SDValue &Opc);
118  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
119                           SDValue &Opc) {
120    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
121  }
122
123  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
124                           SDValue &Opc) {
125    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
126  }
127
128  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
129                       SDValue &Opc) {
130    SelectAddrMode2Worker(N, Base, Offset, Opc);
131//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
132    // This always matches one way or another.
133    return true;
134  }
135
136  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
137    const ConstantSDNode *CN = cast<ConstantSDNode>(N);
138    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
139    Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
140    return true;
141  }
142
143  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
144                             SDValue &Offset, SDValue &Opc);
145  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
146                             SDValue &Offset, SDValue &Opc);
147  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
148                             SDValue &Offset, SDValue &Opc);
149  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
150  bool SelectAddrMode3(SDValue N, SDValue &Base,
151                       SDValue &Offset, SDValue &Opc);
152  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
153                             SDValue &Offset, SDValue &Opc);
154  bool SelectAddrMode5(SDValue N, SDValue &Base,
155                       SDValue &Offset);
156  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
157  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
158
159  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
160
161  // Thumb Addressing Modes:
162  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
163  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
164                                SDValue &OffImm);
165  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
166                                 SDValue &OffImm);
167  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
168                                 SDValue &OffImm);
169  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
170                                 SDValue &OffImm);
171  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
172
173  // Thumb 2 Addressing Modes:
174  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
175  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
176                            SDValue &OffImm);
177  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
178                                 SDValue &OffImm);
179  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
180                             SDValue &OffReg, SDValue &ShImm);
181  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
182
183  inline bool is_so_imm(unsigned Imm) const {
184    return ARM_AM::getSOImmVal(Imm) != -1;
185  }
186
187  inline bool is_so_imm_not(unsigned Imm) const {
188    return ARM_AM::getSOImmVal(~Imm) != -1;
189  }
190
191  inline bool is_t2_so_imm(unsigned Imm) const {
192    return ARM_AM::getT2SOImmVal(Imm) != -1;
193  }
194
195  inline bool is_t2_so_imm_not(unsigned Imm) const {
196    return ARM_AM::getT2SOImmVal(~Imm) != -1;
197  }
198
199  // Include the pieces autogenerated from the target description.
200#include "ARMGenDAGISel.inc"
201
202private:
203  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
204  /// ARM.
205  SDNode *SelectARMIndexedLoad(SDNode *N);
206  SDNode *SelectT2IndexedLoad(SDNode *N);
207
208  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
209  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
210  /// loads of D registers and even subregs and odd subregs of Q registers.
211  /// For NumVecs <= 2, QOpcodes1 is not used.
212  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
213                    const uint16_t *DOpcodes,
214                    const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
215
216  /// SelectVST - Select NEON store intrinsics.  NumVecs should
217  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
218  /// stores of D registers and even subregs and odd subregs of Q registers.
219  /// For NumVecs <= 2, QOpcodes1 is not used.
220  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
221                    const uint16_t *DOpcodes,
222                    const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
223
224  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
225  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
226  /// load/store of D registers and Q registers.
227  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
228                          bool isUpdating, unsigned NumVecs,
229                          const uint16_t *DOpcodes, const uint16_t *QOpcodes);
230
231  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
232  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
233  /// for loading D registers.  (Q registers are not supported.)
234  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
235                       const uint16_t *Opcodes);
236
237  /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
238  /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
239  /// generated to force the table registers to be consecutive.
240  SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
241
242  /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
243  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
244
245  // Select special operations if node forms integer ABS pattern
246  SDNode *SelectABSOp(SDNode *N);
247
248  SDNode *SelectReadRegister(SDNode *N);
249  SDNode *SelectWriteRegister(SDNode *N);
250
251  SDNode *SelectInlineAsm(SDNode *N);
252
253  SDNode *SelectConcatVector(SDNode *N);
254
255  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
256  /// inline asm expressions.
257  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
258                                    std::vector<SDValue> &OutOps) override;
259
260  // Form pairs of consecutive R, S, D, or Q registers.
261  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
262  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
263  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
264  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
265
266  // Form sequences of 4 consecutive S, D, or Q registers.
267  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
268  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
269  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
270
271  // Get the alignment operand for a NEON VLD or VST instruction.
272  SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
273                        bool is64BitVector);
274
275  /// Returns the number of instructions required to materialize the given
276  /// constant in a register, or 3 if a literal pool load is needed.
277  unsigned ConstantMaterializationCost(unsigned Val) const;
278
279  /// Checks if N is a multiplication by a constant where we can extract out a
280  /// power of two from the constant so that it can be used in a shift, but only
281  /// if it simplifies the materialization of the constant. Returns true if it
282  /// is, and assigns to PowerOfTwo the power of two that should be extracted
283  /// out and to NewMulConst the new constant to be multiplied by.
284  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
285                              unsigned &PowerOfTwo, SDValue &NewMulConst) const;
286
287  /// Replace N with M in CurDAG, in a way that also ensures that M gets
288  /// selected when N would have been selected.
289  void replaceDAGValue(const SDValue &N, SDValue M);
290};
291}
292
293/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
294/// operand. If so Imm will receive the 32-bit value.
295static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
296  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
297    Imm = cast<ConstantSDNode>(N)->getZExtValue();
298    return true;
299  }
300  return false;
301}
302
303// isInt32Immediate - This method tests to see if a constant operand.
304// If so Imm will receive the 32 bit value.
305static bool isInt32Immediate(SDValue N, unsigned &Imm) {
306  return isInt32Immediate(N.getNode(), Imm);
307}
308
309// isOpcWithIntImmediate - This method tests to see if the node is a specific
310// opcode and that it has a immediate integer right operand.
311// If so Imm will receive the 32 bit value.
312static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
313  return N->getOpcode() == Opc &&
314         isInt32Immediate(N->getOperand(1).getNode(), Imm);
315}
316
317/// \brief Check whether a particular node is a constant value representable as
318/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
319///
320/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
321static bool isScaledConstantInRange(SDValue Node, int Scale,
322                                    int RangeMin, int RangeMax,
323                                    int &ScaledConstant) {
324  assert(Scale > 0 && "Invalid scale!");
325
326  // Check that this is a constant.
327  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
328  if (!C)
329    return false;
330
331  ScaledConstant = (int) C->getZExtValue();
332  if ((ScaledConstant % Scale) != 0)
333    return false;
334
335  ScaledConstant /= Scale;
336  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
337}
338
339void ARMDAGToDAGISel::PreprocessISelDAG() {
340  if (!Subtarget->hasV6T2Ops())
341    return;
342
343  bool isThumb2 = Subtarget->isThumb();
344  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
345       E = CurDAG->allnodes_end(); I != E; ) {
346    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
347
348    if (N->getOpcode() != ISD::ADD)
349      continue;
350
351    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
352    // leading zeros, followed by consecutive set bits, followed by 1 or 2
353    // trailing zeros, e.g. 1020.
354    // Transform the expression to
355    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
356    // of trailing zeros of c2. The left shift would be folded as an shifter
357    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
358    // node (UBFX).
359
360    SDValue N0 = N->getOperand(0);
361    SDValue N1 = N->getOperand(1);
362    unsigned And_imm = 0;
363    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
364      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
365        std::swap(N0, N1);
366    }
367    if (!And_imm)
368      continue;
369
370    // Check if the AND mask is an immediate of the form: 000.....1111111100
371    unsigned TZ = countTrailingZeros(And_imm);
372    if (TZ != 1 && TZ != 2)
373      // Be conservative here. Shifter operands aren't always free. e.g. On
374      // Swift, left shifter operand of 1 / 2 for free but others are not.
375      // e.g.
376      //  ubfx   r3, r1, #16, #8
377      //  ldr.w  r3, [r0, r3, lsl #2]
378      // vs.
379      //  mov.w  r9, #1020
380      //  and.w  r2, r9, r1, lsr #14
381      //  ldr    r2, [r0, r2]
382      continue;
383    And_imm >>= TZ;
384    if (And_imm & (And_imm + 1))
385      continue;
386
387    // Look for (and (srl X, c1), c2).
388    SDValue Srl = N1.getOperand(0);
389    unsigned Srl_imm = 0;
390    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
391        (Srl_imm <= 2))
392      continue;
393
394    // Make sure first operand is not a shifter operand which would prevent
395    // folding of the left shift.
396    SDValue CPTmp0;
397    SDValue CPTmp1;
398    SDValue CPTmp2;
399    if (isThumb2) {
400      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
401        continue;
402    } else {
403      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
404          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
405        continue;
406    }
407
408    // Now make the transformation.
409    Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
410                          Srl.getOperand(0),
411                          CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
412                                              MVT::i32));
413    N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
414                         Srl,
415                         CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
416    N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
417                         N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
418    CurDAG->UpdateNodeOperands(N, N0, N1);
419  }
420}
421
422/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
423/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
424/// least on current ARM implementations) which should be avoidded.
425bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
426  if (OptLevel == CodeGenOpt::None)
427    return true;
428
429  if (!CheckVMLxHazard)
430    return true;
431
432  if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
433      !Subtarget->isCortexA9() && !Subtarget->isSwift())
434    return true;
435
436  if (!N->hasOneUse())
437    return false;
438
439  SDNode *Use = *N->use_begin();
440  if (Use->getOpcode() == ISD::CopyToReg)
441    return true;
442  if (Use->isMachineOpcode()) {
443    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
444        CurDAG->getSubtarget().getInstrInfo());
445
446    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
447    if (MCID.mayStore())
448      return true;
449    unsigned Opcode = MCID.getOpcode();
450    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
451      return true;
452    // vmlx feeding into another vmlx. We actually want to unfold
453    // the use later in the MLxExpansion pass. e.g.
454    // vmla
455    // vmla (stall 8 cycles)
456    //
457    // vmul (5 cycles)
458    // vadd (5 cycles)
459    // vmla
460    // This adds up to about 18 - 19 cycles.
461    //
462    // vmla
463    // vmul (stall 4 cycles)
464    // vadd adds up to about 14 cycles.
465    return TII->isFpMLxInstruction(Opcode);
466  }
467
468  return false;
469}
470
471bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
472                                            ARM_AM::ShiftOpc ShOpcVal,
473                                            unsigned ShAmt) {
474  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
475    return true;
476  if (Shift.hasOneUse())
477    return true;
478  // R << 2 is free.
479  return ShOpcVal == ARM_AM::lsl &&
480         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
481}
482
483unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
484  if (Subtarget->isThumb()) {
485    if (Val <= 255) return 1;                               // MOV
486    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
487    if (~Val <= 255) return 2;                              // MOV + MVN
488    if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
489  } else {
490    if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
491    if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
492    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
493    if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
494  }
495  if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
496  return 3; // Literal pool load
497}
498
499bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
500                                             unsigned MaxShift,
501                                             unsigned &PowerOfTwo,
502                                             SDValue &NewMulConst) const {
503  assert(N.getOpcode() == ISD::MUL);
504  assert(MaxShift > 0);
505
506  // If the multiply is used in more than one place then changing the constant
507  // will make other uses incorrect, so don't.
508  if (!N.hasOneUse()) return false;
509  // Check if the multiply is by a constant
510  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
511  if (!MulConst) return false;
512  // If the constant is used in more than one place then modifying it will mean
513  // we need to materialize two constants instead of one, which is a bad idea.
514  if (!MulConst->hasOneUse()) return false;
515  unsigned MulConstVal = MulConst->getZExtValue();
516  if (MulConstVal == 0) return false;
517
518  // Find the largest power of 2 that MulConstVal is a multiple of
519  PowerOfTwo = MaxShift;
520  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
521    --PowerOfTwo;
522    if (PowerOfTwo == 0) return false;
523  }
524
525  // Only optimise if the new cost is better
526  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
527  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
528  unsigned OldCost = ConstantMaterializationCost(MulConstVal);
529  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
530  return NewCost < OldCost;
531}
532
533void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
534  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
535  CurDAG->ReplaceAllUsesWith(N, M);
536}
537
538bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
539                                              SDValue &BaseReg,
540                                              SDValue &Opc,
541                                              bool CheckProfitability) {
542  if (DisableShifterOp)
543    return false;
544
545  // If N is a multiply-by-constant and it's profitable to extract a shift and
546  // use it in a shifted operand do so.
547  if (N.getOpcode() == ISD::MUL) {
548    unsigned PowerOfTwo = 0;
549    SDValue NewMulConst;
550    if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
551      BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
552                                               N.getOperand(0), NewMulConst)
553                                   .getNode()),
554                        0);
555      replaceDAGValue(N.getOperand(1), NewMulConst);
556      Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
557                                                          PowerOfTwo),
558                                      SDLoc(N), MVT::i32);
559      return true;
560    }
561  }
562
563  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
564
565  // Don't match base register only case. That is matched to a separate
566  // lower complexity pattern with explicit register operand.
567  if (ShOpcVal == ARM_AM::no_shift) return false;
568
569  BaseReg = N.getOperand(0);
570  unsigned ShImmVal = 0;
571  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
572  if (!RHS) return false;
573  ShImmVal = RHS->getZExtValue() & 31;
574  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
575                                  SDLoc(N), MVT::i32);
576  return true;
577}
578
579bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
580                                              SDValue &BaseReg,
581                                              SDValue &ShReg,
582                                              SDValue &Opc,
583                                              bool CheckProfitability) {
584  if (DisableShifterOp)
585    return false;
586
587  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
588
589  // Don't match base register only case. That is matched to a separate
590  // lower complexity pattern with explicit register operand.
591  if (ShOpcVal == ARM_AM::no_shift) return false;
592
593  BaseReg = N.getOperand(0);
594  unsigned ShImmVal = 0;
595  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
596  if (RHS) return false;
597
598  ShReg = N.getOperand(1);
599  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
600    return false;
601  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
602                                  SDLoc(N), MVT::i32);
603  return true;
604}
605
606
607bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
608                                          SDValue &Base,
609                                          SDValue &OffImm) {
610  // Match simple R + imm12 operands.
611
612  // Base only.
613  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
614      !CurDAG->isBaseWithConstantOffset(N)) {
615    if (N.getOpcode() == ISD::FrameIndex) {
616      // Match frame index.
617      int FI = cast<FrameIndexSDNode>(N)->getIndex();
618      Base = CurDAG->getTargetFrameIndex(
619          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
620      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
621      return true;
622    }
623
624    if (N.getOpcode() == ARMISD::Wrapper &&
625        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
626      Base = N.getOperand(0);
627    } else
628      Base = N;
629    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
630    return true;
631  }
632
633  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
634    int RHSC = (int)RHS->getSExtValue();
635    if (N.getOpcode() == ISD::SUB)
636      RHSC = -RHSC;
637
638    if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
639      Base   = N.getOperand(0);
640      if (Base.getOpcode() == ISD::FrameIndex) {
641        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
642        Base = CurDAG->getTargetFrameIndex(
643            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
644      }
645      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
646      return true;
647    }
648  }
649
650  // Base only.
651  Base = N;
652  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
653  return true;
654}
655
656
657
658bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
659                                      SDValue &Opc) {
660  if (N.getOpcode() == ISD::MUL &&
661      ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
662    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
663      // X * [3,5,9] -> X + X * [2,4,8] etc.
664      int RHSC = (int)RHS->getZExtValue();
665      if (RHSC & 1) {
666        RHSC = RHSC & ~1;
667        ARM_AM::AddrOpc AddSub = ARM_AM::add;
668        if (RHSC < 0) {
669          AddSub = ARM_AM::sub;
670          RHSC = - RHSC;
671        }
672        if (isPowerOf2_32(RHSC)) {
673          unsigned ShAmt = Log2_32(RHSC);
674          Base = Offset = N.getOperand(0);
675          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
676                                                            ARM_AM::lsl),
677                                          SDLoc(N), MVT::i32);
678          return true;
679        }
680      }
681    }
682  }
683
684  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
685      // ISD::OR that is equivalent to an ISD::ADD.
686      !CurDAG->isBaseWithConstantOffset(N))
687    return false;
688
689  // Leave simple R +/- imm12 operands for LDRi12
690  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
691    int RHSC;
692    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
693                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
694      return false;
695  }
696
697  // Otherwise this is R +/- [possibly shifted] R.
698  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
699  ARM_AM::ShiftOpc ShOpcVal =
700    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
701  unsigned ShAmt = 0;
702
703  Base   = N.getOperand(0);
704  Offset = N.getOperand(1);
705
706  if (ShOpcVal != ARM_AM::no_shift) {
707    // Check to see if the RHS of the shift is a constant, if not, we can't fold
708    // it.
709    if (ConstantSDNode *Sh =
710           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
711      ShAmt = Sh->getZExtValue();
712      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
713        Offset = N.getOperand(1).getOperand(0);
714      else {
715        ShAmt = 0;
716        ShOpcVal = ARM_AM::no_shift;
717      }
718    } else {
719      ShOpcVal = ARM_AM::no_shift;
720    }
721  }
722
723  // Try matching (R shl C) + (R).
724  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
725      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
726        N.getOperand(0).hasOneUse())) {
727    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
728    if (ShOpcVal != ARM_AM::no_shift) {
729      // Check to see if the RHS of the shift is a constant, if not, we can't
730      // fold it.
731      if (ConstantSDNode *Sh =
732          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
733        ShAmt = Sh->getZExtValue();
734        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
735          Offset = N.getOperand(0).getOperand(0);
736          Base = N.getOperand(1);
737        } else {
738          ShAmt = 0;
739          ShOpcVal = ARM_AM::no_shift;
740        }
741      } else {
742        ShOpcVal = ARM_AM::no_shift;
743      }
744    }
745  }
746
747  // If Offset is a multiply-by-constant and it's profitable to extract a shift
748  // and use it in a shifted operand do so.
749  if (Offset.getOpcode() == ISD::MUL) {
750    unsigned PowerOfTwo = 0;
751    SDValue NewMulConst;
752    if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
753      replaceDAGValue(Offset.getOperand(1), NewMulConst);
754      ShAmt = PowerOfTwo;
755      ShOpcVal = ARM_AM::lsl;
756    }
757  }
758
759  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
760                                  SDLoc(N), MVT::i32);
761  return true;
762}
763
764
765//-----
766
767AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
768                                                     SDValue &Base,
769                                                     SDValue &Offset,
770                                                     SDValue &Opc) {
771  if (N.getOpcode() == ISD::MUL &&
772      (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
773    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
774      // X * [3,5,9] -> X + X * [2,4,8] etc.
775      int RHSC = (int)RHS->getZExtValue();
776      if (RHSC & 1) {
777        RHSC = RHSC & ~1;
778        ARM_AM::AddrOpc AddSub = ARM_AM::add;
779        if (RHSC < 0) {
780          AddSub = ARM_AM::sub;
781          RHSC = - RHSC;
782        }
783        if (isPowerOf2_32(RHSC)) {
784          unsigned ShAmt = Log2_32(RHSC);
785          Base = Offset = N.getOperand(0);
786          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
787                                                            ARM_AM::lsl),
788                                          SDLoc(N), MVT::i32);
789          return AM2_SHOP;
790        }
791      }
792    }
793  }
794
795  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
796      // ISD::OR that is equivalent to an ADD.
797      !CurDAG->isBaseWithConstantOffset(N)) {
798    Base = N;
799    if (N.getOpcode() == ISD::FrameIndex) {
800      int FI = cast<FrameIndexSDNode>(N)->getIndex();
801      Base = CurDAG->getTargetFrameIndex(
802          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
803    } else if (N.getOpcode() == ARMISD::Wrapper &&
804               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
805      Base = N.getOperand(0);
806    }
807    Offset = CurDAG->getRegister(0, MVT::i32);
808    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
809                                                      ARM_AM::no_shift),
810                                    SDLoc(N), MVT::i32);
811    return AM2_BASE;
812  }
813
814  // Match simple R +/- imm12 operands.
815  if (N.getOpcode() != ISD::SUB) {
816    int RHSC;
817    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
818                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
819      Base = N.getOperand(0);
820      if (Base.getOpcode() == ISD::FrameIndex) {
821        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
822        Base = CurDAG->getTargetFrameIndex(
823            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
824      }
825      Offset = CurDAG->getRegister(0, MVT::i32);
826
827      ARM_AM::AddrOpc AddSub = ARM_AM::add;
828      if (RHSC < 0) {
829        AddSub = ARM_AM::sub;
830        RHSC = - RHSC;
831      }
832      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
833                                                        ARM_AM::no_shift),
834                                      SDLoc(N), MVT::i32);
835      return AM2_BASE;
836    }
837  }
838
839  if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
840    // Compute R +/- (R << N) and reuse it.
841    Base = N;
842    Offset = CurDAG->getRegister(0, MVT::i32);
843    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
844                                                      ARM_AM::no_shift),
845                                    SDLoc(N), MVT::i32);
846    return AM2_BASE;
847  }
848
849  // Otherwise this is R +/- [possibly shifted] R.
850  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
851  ARM_AM::ShiftOpc ShOpcVal =
852    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
853  unsigned ShAmt = 0;
854
855  Base   = N.getOperand(0);
856  Offset = N.getOperand(1);
857
858  if (ShOpcVal != ARM_AM::no_shift) {
859    // Check to see if the RHS of the shift is a constant, if not, we can't fold
860    // it.
861    if (ConstantSDNode *Sh =
862           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
863      ShAmt = Sh->getZExtValue();
864      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
865        Offset = N.getOperand(1).getOperand(0);
866      else {
867        ShAmt = 0;
868        ShOpcVal = ARM_AM::no_shift;
869      }
870    } else {
871      ShOpcVal = ARM_AM::no_shift;
872    }
873  }
874
875  // Try matching (R shl C) + (R).
876  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
877      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
878        N.getOperand(0).hasOneUse())) {
879    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
880    if (ShOpcVal != ARM_AM::no_shift) {
881      // Check to see if the RHS of the shift is a constant, if not, we can't
882      // fold it.
883      if (ConstantSDNode *Sh =
884          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
885        ShAmt = Sh->getZExtValue();
886        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
887          Offset = N.getOperand(0).getOperand(0);
888          Base = N.getOperand(1);
889        } else {
890          ShAmt = 0;
891          ShOpcVal = ARM_AM::no_shift;
892        }
893      } else {
894        ShOpcVal = ARM_AM::no_shift;
895      }
896    }
897  }
898
899  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
900                                  SDLoc(N), MVT::i32);
901  return AM2_SHOP;
902}
903
904bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
905                                            SDValue &Offset, SDValue &Opc) {
906  unsigned Opcode = Op->getOpcode();
907  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
908    ? cast<LoadSDNode>(Op)->getAddressingMode()
909    : cast<StoreSDNode>(Op)->getAddressingMode();
910  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
911    ? ARM_AM::add : ARM_AM::sub;
912  int Val;
913  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
914    return false;
915
916  Offset = N;
917  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
918  unsigned ShAmt = 0;
919  if (ShOpcVal != ARM_AM::no_shift) {
920    // Check to see if the RHS of the shift is a constant, if not, we can't fold
921    // it.
922    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
923      ShAmt = Sh->getZExtValue();
924      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
925        Offset = N.getOperand(0);
926      else {
927        ShAmt = 0;
928        ShOpcVal = ARM_AM::no_shift;
929      }
930    } else {
931      ShOpcVal = ARM_AM::no_shift;
932    }
933  }
934
935  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
936                                  SDLoc(N), MVT::i32);
937  return true;
938}
939
940bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
941                                            SDValue &Offset, SDValue &Opc) {
942  unsigned Opcode = Op->getOpcode();
943  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
944    ? cast<LoadSDNode>(Op)->getAddressingMode()
945    : cast<StoreSDNode>(Op)->getAddressingMode();
946  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
947    ? ARM_AM::add : ARM_AM::sub;
948  int Val;
949  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
950    if (AddSub == ARM_AM::sub) Val *= -1;
951    Offset = CurDAG->getRegister(0, MVT::i32);
952    Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
953    return true;
954  }
955
956  return false;
957}
958
959
960bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
961                                            SDValue &Offset, SDValue &Opc) {
962  unsigned Opcode = Op->getOpcode();
963  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
964    ? cast<LoadSDNode>(Op)->getAddressingMode()
965    : cast<StoreSDNode>(Op)->getAddressingMode();
966  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
967    ? ARM_AM::add : ARM_AM::sub;
968  int Val;
969  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
970    Offset = CurDAG->getRegister(0, MVT::i32);
971    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
972                                                      ARM_AM::no_shift),
973                                    SDLoc(Op), MVT::i32);
974    return true;
975  }
976
977  return false;
978}
979
980bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
981  Base = N;
982  return true;
983}
984
985bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
986                                      SDValue &Base, SDValue &Offset,
987                                      SDValue &Opc) {
988  if (N.getOpcode() == ISD::SUB) {
989    // X - C  is canonicalize to X + -C, no need to handle it here.
990    Base = N.getOperand(0);
991    Offset = N.getOperand(1);
992    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
993                                    MVT::i32);
994    return true;
995  }
996
997  if (!CurDAG->isBaseWithConstantOffset(N)) {
998    Base = N;
999    if (N.getOpcode() == ISD::FrameIndex) {
1000      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1001      Base = CurDAG->getTargetFrameIndex(
1002          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1003    }
1004    Offset = CurDAG->getRegister(0, MVT::i32);
1005    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1006                                    MVT::i32);
1007    return true;
1008  }
1009
1010  // If the RHS is +/- imm8, fold into addr mode.
1011  int RHSC;
1012  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1013                              -256 + 1, 256, RHSC)) { // 8 bits.
1014    Base = N.getOperand(0);
1015    if (Base.getOpcode() == ISD::FrameIndex) {
1016      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1017      Base = CurDAG->getTargetFrameIndex(
1018          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1019    }
1020    Offset = CurDAG->getRegister(0, MVT::i32);
1021
1022    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1023    if (RHSC < 0) {
1024      AddSub = ARM_AM::sub;
1025      RHSC = -RHSC;
1026    }
1027    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1028                                    MVT::i32);
1029    return true;
1030  }
1031
1032  Base = N.getOperand(0);
1033  Offset = N.getOperand(1);
1034  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1035                                  MVT::i32);
1036  return true;
1037}
1038
1039bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1040                                            SDValue &Offset, SDValue &Opc) {
1041  unsigned Opcode = Op->getOpcode();
1042  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1043    ? cast<LoadSDNode>(Op)->getAddressingMode()
1044    : cast<StoreSDNode>(Op)->getAddressingMode();
1045  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1046    ? ARM_AM::add : ARM_AM::sub;
1047  int Val;
1048  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1049    Offset = CurDAG->getRegister(0, MVT::i32);
1050    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1051                                    MVT::i32);
1052    return true;
1053  }
1054
1055  Offset = N;
1056  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1057                                  MVT::i32);
1058  return true;
1059}
1060
1061bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1062                                      SDValue &Base, SDValue &Offset) {
1063  if (!CurDAG->isBaseWithConstantOffset(N)) {
1064    Base = N;
1065    if (N.getOpcode() == ISD::FrameIndex) {
1066      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1067      Base = CurDAG->getTargetFrameIndex(
1068          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1069    } else if (N.getOpcode() == ARMISD::Wrapper &&
1070               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1071      Base = N.getOperand(0);
1072    }
1073    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1074                                       SDLoc(N), MVT::i32);
1075    return true;
1076  }
1077
1078  // If the RHS is +/- imm8, fold into addr mode.
1079  int RHSC;
1080  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1081                              -256 + 1, 256, RHSC)) {
1082    Base = N.getOperand(0);
1083    if (Base.getOpcode() == ISD::FrameIndex) {
1084      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1085      Base = CurDAG->getTargetFrameIndex(
1086          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1087    }
1088
1089    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1090    if (RHSC < 0) {
1091      AddSub = ARM_AM::sub;
1092      RHSC = -RHSC;
1093    }
1094    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1095                                       SDLoc(N), MVT::i32);
1096    return true;
1097  }
1098
1099  Base = N;
1100  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1101                                     SDLoc(N), MVT::i32);
1102  return true;
1103}
1104
1105bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1106                                      SDValue &Align) {
1107  Addr = N;
1108
1109  unsigned Alignment = 0;
1110
1111  MemSDNode *MemN = cast<MemSDNode>(Parent);
1112
1113  if (isa<LSBaseSDNode>(MemN) ||
1114      ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1115        MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1116       MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1117    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1118    // The maximum alignment is equal to the memory size being referenced.
1119    unsigned MMOAlign = MemN->getAlignment();
1120    unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1121    if (MMOAlign >= MemSize && MemSize > 1)
1122      Alignment = MemSize;
1123  } else {
1124    // All other uses of addrmode6 are for intrinsics.  For now just record
1125    // the raw alignment value; it will be refined later based on the legal
1126    // alignment operands for the intrinsic.
1127    Alignment = MemN->getAlignment();
1128  }
1129
1130  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1131  return true;
1132}
1133
1134bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1135                                            SDValue &Offset) {
1136  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1137  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1138  if (AM != ISD::POST_INC)
1139    return false;
1140  Offset = N;
1141  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1142    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1143      Offset = CurDAG->getRegister(0, MVT::i32);
1144  }
1145  return true;
1146}
1147
1148bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1149                                       SDValue &Offset, SDValue &Label) {
1150  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1151    Offset = N.getOperand(0);
1152    SDValue N1 = N.getOperand(1);
1153    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1154                                      SDLoc(N), MVT::i32);
1155    return true;
1156  }
1157
1158  return false;
1159}
1160
1161
1162//===----------------------------------------------------------------------===//
1163//                         Thumb Addressing Modes
1164//===----------------------------------------------------------------------===//
1165
1166bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1167                                            SDValue &Base, SDValue &Offset){
1168  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1169    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1170    if (!NC || !NC->isNullValue())
1171      return false;
1172
1173    Base = Offset = N;
1174    return true;
1175  }
1176
1177  Base = N.getOperand(0);
1178  Offset = N.getOperand(1);
1179  return true;
1180}
1181
1182bool
1183ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1184                                          SDValue &Base, SDValue &OffImm) {
1185  if (!CurDAG->isBaseWithConstantOffset(N)) {
1186    if (N.getOpcode() == ISD::ADD) {
1187      return false; // We want to select register offset instead
1188    } else if (N.getOpcode() == ARMISD::Wrapper &&
1189               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1190      Base = N.getOperand(0);
1191    } else {
1192      Base = N;
1193    }
1194
1195    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1196    return true;
1197  }
1198
1199  // If the RHS is + imm5 * scale, fold into addr mode.
1200  int RHSC;
1201  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1202    Base = N.getOperand(0);
1203    OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1204    return true;
1205  }
1206
1207  // Offset is too large, so use register offset instead.
1208  return false;
1209}
1210
1211bool
1212ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1213                                           SDValue &OffImm) {
1214  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1215}
1216
1217bool
1218ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1219                                           SDValue &OffImm) {
1220  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1221}
1222
1223bool
1224ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1225                                           SDValue &OffImm) {
1226  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1227}
1228
1229bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1230                                            SDValue &Base, SDValue &OffImm) {
1231  if (N.getOpcode() == ISD::FrameIndex) {
1232    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1233    // Only multiples of 4 are allowed for the offset, so the frame object
1234    // alignment must be at least 4.
1235    MachineFrameInfo *MFI = MF->getFrameInfo();
1236    if (MFI->getObjectAlignment(FI) < 4)
1237      MFI->setObjectAlignment(FI, 4);
1238    Base = CurDAG->getTargetFrameIndex(
1239        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1240    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1241    return true;
1242  }
1243
1244  if (!CurDAG->isBaseWithConstantOffset(N))
1245    return false;
1246
1247  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1248  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1249      (LHSR && LHSR->getReg() == ARM::SP)) {
1250    // If the RHS is + imm8 * scale, fold into addr mode.
1251    int RHSC;
1252    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1253      Base = N.getOperand(0);
1254      if (Base.getOpcode() == ISD::FrameIndex) {
1255        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1256        // For LHS+RHS to result in an offset that's a multiple of 4 the object
1257        // indexed by the LHS must be 4-byte aligned.
1258        MachineFrameInfo *MFI = MF->getFrameInfo();
1259        if (MFI->getObjectAlignment(FI) < 4)
1260          MFI->setObjectAlignment(FI, 4);
1261        Base = CurDAG->getTargetFrameIndex(
1262            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1263      }
1264      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1265      return true;
1266    }
1267  }
1268
1269  return false;
1270}
1271
1272
1273//===----------------------------------------------------------------------===//
1274//                        Thumb 2 Addressing Modes
1275//===----------------------------------------------------------------------===//
1276
1277
1278bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1279                                            SDValue &Base, SDValue &OffImm) {
1280  // Match simple R + imm12 operands.
1281
1282  // Base only.
1283  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1284      !CurDAG->isBaseWithConstantOffset(N)) {
1285    if (N.getOpcode() == ISD::FrameIndex) {
1286      // Match frame index.
1287      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1288      Base = CurDAG->getTargetFrameIndex(
1289          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1290      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1291      return true;
1292    }
1293
1294    if (N.getOpcode() == ARMISD::Wrapper &&
1295        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1296      Base = N.getOperand(0);
1297      if (Base.getOpcode() == ISD::TargetConstantPool)
1298        return false;  // We want to select t2LDRpci instead.
1299    } else
1300      Base = N;
1301    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1302    return true;
1303  }
1304
1305  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1306    if (SelectT2AddrModeImm8(N, Base, OffImm))
1307      // Let t2LDRi8 handle (R - imm8).
1308      return false;
1309
1310    int RHSC = (int)RHS->getZExtValue();
1311    if (N.getOpcode() == ISD::SUB)
1312      RHSC = -RHSC;
1313
1314    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1315      Base   = N.getOperand(0);
1316      if (Base.getOpcode() == ISD::FrameIndex) {
1317        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1318        Base = CurDAG->getTargetFrameIndex(
1319            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1320      }
1321      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1322      return true;
1323    }
1324  }
1325
1326  // Base only.
1327  Base = N;
1328  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1329  return true;
1330}
1331
1332bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1333                                           SDValue &Base, SDValue &OffImm) {
1334  // Match simple R - imm8 operands.
1335  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1336      !CurDAG->isBaseWithConstantOffset(N))
1337    return false;
1338
1339  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1340    int RHSC = (int)RHS->getSExtValue();
1341    if (N.getOpcode() == ISD::SUB)
1342      RHSC = -RHSC;
1343
1344    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1345      Base = N.getOperand(0);
1346      if (Base.getOpcode() == ISD::FrameIndex) {
1347        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1348        Base = CurDAG->getTargetFrameIndex(
1349            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1350      }
1351      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1352      return true;
1353    }
1354  }
1355
1356  return false;
1357}
1358
1359bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1360                                                 SDValue &OffImm){
1361  unsigned Opcode = Op->getOpcode();
1362  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1363    ? cast<LoadSDNode>(Op)->getAddressingMode()
1364    : cast<StoreSDNode>(Op)->getAddressingMode();
1365  int RHSC;
1366  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1367    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1368      ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1369      : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1370    return true;
1371  }
1372
1373  return false;
1374}
1375
1376bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1377                                            SDValue &Base,
1378                                            SDValue &OffReg, SDValue &ShImm) {
1379  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1380  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1381    return false;
1382
1383  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1384  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1385    int RHSC = (int)RHS->getZExtValue();
1386    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1387      return false;
1388    else if (RHSC < 0 && RHSC >= -255) // 8 bits
1389      return false;
1390  }
1391
1392  // Look for (R + R) or (R + (R << [1,2,3])).
1393  unsigned ShAmt = 0;
1394  Base   = N.getOperand(0);
1395  OffReg = N.getOperand(1);
1396
1397  // Swap if it is ((R << c) + R).
1398  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1399  if (ShOpcVal != ARM_AM::lsl) {
1400    ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1401    if (ShOpcVal == ARM_AM::lsl)
1402      std::swap(Base, OffReg);
1403  }
1404
1405  if (ShOpcVal == ARM_AM::lsl) {
1406    // Check to see if the RHS of the shift is a constant, if not, we can't fold
1407    // it.
1408    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1409      ShAmt = Sh->getZExtValue();
1410      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1411        OffReg = OffReg.getOperand(0);
1412      else {
1413        ShAmt = 0;
1414      }
1415    }
1416  }
1417
1418  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1419  // and use it in a shifted operand do so.
1420  if (OffReg.getOpcode() == ISD::MUL) {
1421    unsigned PowerOfTwo = 0;
1422    SDValue NewMulConst;
1423    if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1424      replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1425      ShAmt = PowerOfTwo;
1426    }
1427  }
1428
1429  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1430
1431  return true;
1432}
1433
1434bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1435                                                SDValue &OffImm) {
1436  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1437  // instructions.
1438  Base = N;
1439  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1440
1441  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1442    return true;
1443
1444  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1445  if (!RHS)
1446    return true;
1447
1448  uint32_t RHSC = (int)RHS->getZExtValue();
1449  if (RHSC > 1020 || RHSC % 4 != 0)
1450    return true;
1451
1452  Base = N.getOperand(0);
1453  if (Base.getOpcode() == ISD::FrameIndex) {
1454    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1455    Base = CurDAG->getTargetFrameIndex(
1456        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1457  }
1458
1459  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1460  return true;
1461}
1462
1463//===--------------------------------------------------------------------===//
1464
1465/// getAL - Returns a ARMCC::AL immediate node.
1466static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1467  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1468}
1469
1470SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1471  LoadSDNode *LD = cast<LoadSDNode>(N);
1472  ISD::MemIndexedMode AM = LD->getAddressingMode();
1473  if (AM == ISD::UNINDEXED)
1474    return nullptr;
1475
1476  EVT LoadedVT = LD->getMemoryVT();
1477  SDValue Offset, AMOpc;
1478  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1479  unsigned Opcode = 0;
1480  bool Match = false;
1481  if (LoadedVT == MVT::i32 && isPre &&
1482      SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1483    Opcode = ARM::LDR_PRE_IMM;
1484    Match = true;
1485  } else if (LoadedVT == MVT::i32 && !isPre &&
1486      SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1487    Opcode = ARM::LDR_POST_IMM;
1488    Match = true;
1489  } else if (LoadedVT == MVT::i32 &&
1490      SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1491    Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1492    Match = true;
1493
1494  } else if (LoadedVT == MVT::i16 &&
1495             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1496    Match = true;
1497    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1498      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1499      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1500  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1501    if (LD->getExtensionType() == ISD::SEXTLOAD) {
1502      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1503        Match = true;
1504        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1505      }
1506    } else {
1507      if (isPre &&
1508          SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1509        Match = true;
1510        Opcode = ARM::LDRB_PRE_IMM;
1511      } else if (!isPre &&
1512                  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1513        Match = true;
1514        Opcode = ARM::LDRB_POST_IMM;
1515      } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1516        Match = true;
1517        Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1518      }
1519    }
1520  }
1521
1522  if (Match) {
1523    if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1524      SDValue Chain = LD->getChain();
1525      SDValue Base = LD->getBasePtr();
1526      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1527                       CurDAG->getRegister(0, MVT::i32), Chain };
1528      return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1529                                    MVT::i32, MVT::Other, Ops);
1530    } else {
1531      SDValue Chain = LD->getChain();
1532      SDValue Base = LD->getBasePtr();
1533      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1534                       CurDAG->getRegister(0, MVT::i32), Chain };
1535      return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1536                                    MVT::i32, MVT::Other, Ops);
1537    }
1538  }
1539
1540  return nullptr;
1541}
1542
1543SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1544  LoadSDNode *LD = cast<LoadSDNode>(N);
1545  ISD::MemIndexedMode AM = LD->getAddressingMode();
1546  if (AM == ISD::UNINDEXED)
1547    return nullptr;
1548
1549  EVT LoadedVT = LD->getMemoryVT();
1550  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1551  SDValue Offset;
1552  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1553  unsigned Opcode = 0;
1554  bool Match = false;
1555  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1556    switch (LoadedVT.getSimpleVT().SimpleTy) {
1557    case MVT::i32:
1558      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1559      break;
1560    case MVT::i16:
1561      if (isSExtLd)
1562        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1563      else
1564        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1565      break;
1566    case MVT::i8:
1567    case MVT::i1:
1568      if (isSExtLd)
1569        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1570      else
1571        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1572      break;
1573    default:
1574      return nullptr;
1575    }
1576    Match = true;
1577  }
1578
1579  if (Match) {
1580    SDValue Chain = LD->getChain();
1581    SDValue Base = LD->getBasePtr();
1582    SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1583                     CurDAG->getRegister(0, MVT::i32), Chain };
1584    return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1585                                  MVT::Other, Ops);
1586  }
1587
1588  return nullptr;
1589}
1590
1591/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1592SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1593  SDLoc dl(V0.getNode());
1594  SDValue RegClass =
1595    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1596  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1597  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1598  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1599  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1600}
1601
1602/// \brief Form a D register from a pair of S registers.
1603SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1604  SDLoc dl(V0.getNode());
1605  SDValue RegClass =
1606    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1607  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1608  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1609  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1610  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1611}
1612
1613/// \brief Form a quad register from a pair of D registers.
1614SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1615  SDLoc dl(V0.getNode());
1616  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1617                                               MVT::i32);
1618  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1619  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1620  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1621  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1622}
1623
1624/// \brief Form 4 consecutive D registers from a pair of Q registers.
1625SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1626  SDLoc dl(V0.getNode());
1627  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1628                                               MVT::i32);
1629  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1630  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1631  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1632  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1633}
1634
1635/// \brief Form 4 consecutive S registers.
1636SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1637                                   SDValue V2, SDValue V3) {
1638  SDLoc dl(V0.getNode());
1639  SDValue RegClass =
1640    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1641  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1642  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1643  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1644  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1645  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1646                                    V2, SubReg2, V3, SubReg3 };
1647  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1648}
1649
1650/// \brief Form 4 consecutive D registers.
1651SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1652                                   SDValue V2, SDValue V3) {
1653  SDLoc dl(V0.getNode());
1654  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1655                                               MVT::i32);
1656  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1657  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1658  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1659  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1660  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1661                                    V2, SubReg2, V3, SubReg3 };
1662  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1663}
1664
1665/// \brief Form 4 consecutive Q registers.
1666SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1667                                   SDValue V2, SDValue V3) {
1668  SDLoc dl(V0.getNode());
1669  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1670                                               MVT::i32);
1671  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1672  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1673  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1674  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1675  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1676                                    V2, SubReg2, V3, SubReg3 };
1677  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1678}
1679
1680/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1681/// of a NEON VLD or VST instruction.  The supported values depend on the
1682/// number of registers being loaded.
1683SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1684                                       unsigned NumVecs, bool is64BitVector) {
1685  unsigned NumRegs = NumVecs;
1686  if (!is64BitVector && NumVecs < 3)
1687    NumRegs *= 2;
1688
1689  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1690  if (Alignment >= 32 && NumRegs == 4)
1691    Alignment = 32;
1692  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1693    Alignment = 16;
1694  else if (Alignment >= 8)
1695    Alignment = 8;
1696  else
1697    Alignment = 0;
1698
1699  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1700}
1701
1702static bool isVLDfixed(unsigned Opc)
1703{
1704  switch (Opc) {
1705  default: return false;
1706  case ARM::VLD1d8wb_fixed : return true;
1707  case ARM::VLD1d16wb_fixed : return true;
1708  case ARM::VLD1d64Qwb_fixed : return true;
1709  case ARM::VLD1d32wb_fixed : return true;
1710  case ARM::VLD1d64wb_fixed : return true;
1711  case ARM::VLD1d64TPseudoWB_fixed : return true;
1712  case ARM::VLD1d64QPseudoWB_fixed : return true;
1713  case ARM::VLD1q8wb_fixed : return true;
1714  case ARM::VLD1q16wb_fixed : return true;
1715  case ARM::VLD1q32wb_fixed : return true;
1716  case ARM::VLD1q64wb_fixed : return true;
1717  case ARM::VLD2d8wb_fixed : return true;
1718  case ARM::VLD2d16wb_fixed : return true;
1719  case ARM::VLD2d32wb_fixed : return true;
1720  case ARM::VLD2q8PseudoWB_fixed : return true;
1721  case ARM::VLD2q16PseudoWB_fixed : return true;
1722  case ARM::VLD2q32PseudoWB_fixed : return true;
1723  case ARM::VLD2DUPd8wb_fixed : return true;
1724  case ARM::VLD2DUPd16wb_fixed : return true;
1725  case ARM::VLD2DUPd32wb_fixed : return true;
1726  }
1727}
1728
1729static bool isVSTfixed(unsigned Opc)
1730{
1731  switch (Opc) {
1732  default: return false;
1733  case ARM::VST1d8wb_fixed : return true;
1734  case ARM::VST1d16wb_fixed : return true;
1735  case ARM::VST1d32wb_fixed : return true;
1736  case ARM::VST1d64wb_fixed : return true;
1737  case ARM::VST1q8wb_fixed : return true;
1738  case ARM::VST1q16wb_fixed : return true;
1739  case ARM::VST1q32wb_fixed : return true;
1740  case ARM::VST1q64wb_fixed : return true;
1741  case ARM::VST1d64TPseudoWB_fixed : return true;
1742  case ARM::VST1d64QPseudoWB_fixed : return true;
1743  case ARM::VST2d8wb_fixed : return true;
1744  case ARM::VST2d16wb_fixed : return true;
1745  case ARM::VST2d32wb_fixed : return true;
1746  case ARM::VST2q8PseudoWB_fixed : return true;
1747  case ARM::VST2q16PseudoWB_fixed : return true;
1748  case ARM::VST2q32PseudoWB_fixed : return true;
1749  }
1750}
1751
1752// Get the register stride update opcode of a VLD/VST instruction that
1753// is otherwise equivalent to the given fixed stride updating instruction.
1754static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1755  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1756    && "Incorrect fixed stride updating instruction.");
1757  switch (Opc) {
1758  default: break;
1759  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1760  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1761  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1762  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1763  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1764  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1765  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1766  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1767  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1768  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1769  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1770  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1771
1772  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1773  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1774  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1775  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1776  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1777  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1778  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1779  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1780  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1781  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1782
1783  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1784  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1785  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1786  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1787  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1788  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1789
1790  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1791  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1792  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1793  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1794  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1795  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1796
1797  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1798  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1799  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1800  }
1801  return Opc; // If not one we handle, return it unchanged.
1802}
1803
1804SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1805                                   const uint16_t *DOpcodes,
1806                                   const uint16_t *QOpcodes0,
1807                                   const uint16_t *QOpcodes1) {
1808  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1809  SDLoc dl(N);
1810
1811  SDValue MemAddr, Align;
1812  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1813  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1814    return nullptr;
1815
1816  SDValue Chain = N->getOperand(0);
1817  EVT VT = N->getValueType(0);
1818  bool is64BitVector = VT.is64BitVector();
1819  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1820
1821  unsigned OpcodeIndex;
1822  switch (VT.getSimpleVT().SimpleTy) {
1823  default: llvm_unreachable("unhandled vld type");
1824    // Double-register operations:
1825  case MVT::v8i8:  OpcodeIndex = 0; break;
1826  case MVT::v4i16: OpcodeIndex = 1; break;
1827  case MVT::v2f32:
1828  case MVT::v2i32: OpcodeIndex = 2; break;
1829  case MVT::v1i64: OpcodeIndex = 3; break;
1830    // Quad-register operations:
1831  case MVT::v16i8: OpcodeIndex = 0; break;
1832  case MVT::v8i16: OpcodeIndex = 1; break;
1833  case MVT::v4f32:
1834  case MVT::v4i32: OpcodeIndex = 2; break;
1835  case MVT::v2f64:
1836  case MVT::v2i64: OpcodeIndex = 3;
1837    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1838    break;
1839  }
1840
1841  EVT ResTy;
1842  if (NumVecs == 1)
1843    ResTy = VT;
1844  else {
1845    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1846    if (!is64BitVector)
1847      ResTyElts *= 2;
1848    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1849  }
1850  std::vector<EVT> ResTys;
1851  ResTys.push_back(ResTy);
1852  if (isUpdating)
1853    ResTys.push_back(MVT::i32);
1854  ResTys.push_back(MVT::Other);
1855
1856  SDValue Pred = getAL(CurDAG, dl);
1857  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1858  SDNode *VLd;
1859  SmallVector<SDValue, 7> Ops;
1860
1861  // Double registers and VLD1/VLD2 quad registers are directly supported.
1862  if (is64BitVector || NumVecs <= 2) {
1863    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1864                    QOpcodes0[OpcodeIndex]);
1865    Ops.push_back(MemAddr);
1866    Ops.push_back(Align);
1867    if (isUpdating) {
1868      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1869      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1870      // case entirely when the rest are updated to that form, too.
1871      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1872        Opc = getVLDSTRegisterUpdateOpcode(Opc);
1873      // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1874      // check for that explicitly too. Horribly hacky, but temporary.
1875      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1876          !isa<ConstantSDNode>(Inc.getNode()))
1877        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1878    }
1879    Ops.push_back(Pred);
1880    Ops.push_back(Reg0);
1881    Ops.push_back(Chain);
1882    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1883
1884  } else {
1885    // Otherwise, quad registers are loaded with two separate instructions,
1886    // where one loads the even registers and the other loads the odd registers.
1887    EVT AddrTy = MemAddr.getValueType();
1888
1889    // Load the even subregs.  This is always an updating load, so that it
1890    // provides the address to the second load for the odd subregs.
1891    SDValue ImplDef =
1892      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1893    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1894    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1895                                          ResTy, AddrTy, MVT::Other, OpsA);
1896    Chain = SDValue(VLdA, 2);
1897
1898    // Load the odd subregs.
1899    Ops.push_back(SDValue(VLdA, 1));
1900    Ops.push_back(Align);
1901    if (isUpdating) {
1902      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1903      assert(isa<ConstantSDNode>(Inc.getNode()) &&
1904             "only constant post-increment update allowed for VLD3/4");
1905      (void)Inc;
1906      Ops.push_back(Reg0);
1907    }
1908    Ops.push_back(SDValue(VLdA, 0));
1909    Ops.push_back(Pred);
1910    Ops.push_back(Reg0);
1911    Ops.push_back(Chain);
1912    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1913  }
1914
1915  // Transfer memoperands.
1916  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1917  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1918  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1919
1920  if (NumVecs == 1)
1921    return VLd;
1922
1923  // Extract out the subregisters.
1924  SDValue SuperReg = SDValue(VLd, 0);
1925  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1926         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1927  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1928  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1929    ReplaceUses(SDValue(N, Vec),
1930                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1931  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1932  if (isUpdating)
1933    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1934  return nullptr;
1935}
1936
1937SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1938                                   const uint16_t *DOpcodes,
1939                                   const uint16_t *QOpcodes0,
1940                                   const uint16_t *QOpcodes1) {
1941  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1942  SDLoc dl(N);
1943
1944  SDValue MemAddr, Align;
1945  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1946  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1947  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1948    return nullptr;
1949
1950  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1951  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1952
1953  SDValue Chain = N->getOperand(0);
1954  EVT VT = N->getOperand(Vec0Idx).getValueType();
1955  bool is64BitVector = VT.is64BitVector();
1956  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1957
1958  unsigned OpcodeIndex;
1959  switch (VT.getSimpleVT().SimpleTy) {
1960  default: llvm_unreachable("unhandled vst type");
1961    // Double-register operations:
1962  case MVT::v8i8:  OpcodeIndex = 0; break;
1963  case MVT::v4i16: OpcodeIndex = 1; break;
1964  case MVT::v2f32:
1965  case MVT::v2i32: OpcodeIndex = 2; break;
1966  case MVT::v1i64: OpcodeIndex = 3; break;
1967    // Quad-register operations:
1968  case MVT::v16i8: OpcodeIndex = 0; break;
1969  case MVT::v8i16: OpcodeIndex = 1; break;
1970  case MVT::v4f32:
1971  case MVT::v4i32: OpcodeIndex = 2; break;
1972  case MVT::v2f64:
1973  case MVT::v2i64: OpcodeIndex = 3;
1974    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1975    break;
1976  }
1977
1978  std::vector<EVT> ResTys;
1979  if (isUpdating)
1980    ResTys.push_back(MVT::i32);
1981  ResTys.push_back(MVT::Other);
1982
1983  SDValue Pred = getAL(CurDAG, dl);
1984  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1985  SmallVector<SDValue, 7> Ops;
1986
1987  // Double registers and VST1/VST2 quad registers are directly supported.
1988  if (is64BitVector || NumVecs <= 2) {
1989    SDValue SrcReg;
1990    if (NumVecs == 1) {
1991      SrcReg = N->getOperand(Vec0Idx);
1992    } else if (is64BitVector) {
1993      // Form a REG_SEQUENCE to force register allocation.
1994      SDValue V0 = N->getOperand(Vec0Idx + 0);
1995      SDValue V1 = N->getOperand(Vec0Idx + 1);
1996      if (NumVecs == 2)
1997        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1998      else {
1999        SDValue V2 = N->getOperand(Vec0Idx + 2);
2000        // If it's a vst3, form a quad D-register and leave the last part as
2001        // an undef.
2002        SDValue V3 = (NumVecs == 3)
2003          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2004          : N->getOperand(Vec0Idx + 3);
2005        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2006      }
2007    } else {
2008      // Form a QQ register.
2009      SDValue Q0 = N->getOperand(Vec0Idx);
2010      SDValue Q1 = N->getOperand(Vec0Idx + 1);
2011      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2012    }
2013
2014    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2015                    QOpcodes0[OpcodeIndex]);
2016    Ops.push_back(MemAddr);
2017    Ops.push_back(Align);
2018    if (isUpdating) {
2019      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2020      // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2021      // case entirely when the rest are updated to that form, too.
2022      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2023        Opc = getVLDSTRegisterUpdateOpcode(Opc);
2024      // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2025      // check for that explicitly too. Horribly hacky, but temporary.
2026      if  (!isa<ConstantSDNode>(Inc.getNode()))
2027        Ops.push_back(Inc);
2028      else if (NumVecs > 2 && !isVSTfixed(Opc))
2029        Ops.push_back(Reg0);
2030    }
2031    Ops.push_back(SrcReg);
2032    Ops.push_back(Pred);
2033    Ops.push_back(Reg0);
2034    Ops.push_back(Chain);
2035    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2036
2037    // Transfer memoperands.
2038    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2039
2040    return VSt;
2041  }
2042
2043  // Otherwise, quad registers are stored with two separate instructions,
2044  // where one stores the even registers and the other stores the odd registers.
2045
2046  // Form the QQQQ REG_SEQUENCE.
2047  SDValue V0 = N->getOperand(Vec0Idx + 0);
2048  SDValue V1 = N->getOperand(Vec0Idx + 1);
2049  SDValue V2 = N->getOperand(Vec0Idx + 2);
2050  SDValue V3 = (NumVecs == 3)
2051    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2052    : N->getOperand(Vec0Idx + 3);
2053  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2054
2055  // Store the even D registers.  This is always an updating store, so that it
2056  // provides the address to the second store for the odd subregs.
2057  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2058  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2059                                        MemAddr.getValueType(),
2060                                        MVT::Other, OpsA);
2061  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2062  Chain = SDValue(VStA, 1);
2063
2064  // Store the odd D registers.
2065  Ops.push_back(SDValue(VStA, 0));
2066  Ops.push_back(Align);
2067  if (isUpdating) {
2068    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2069    assert(isa<ConstantSDNode>(Inc.getNode()) &&
2070           "only constant post-increment update allowed for VST3/4");
2071    (void)Inc;
2072    Ops.push_back(Reg0);
2073  }
2074  Ops.push_back(RegSeq);
2075  Ops.push_back(Pred);
2076  Ops.push_back(Reg0);
2077  Ops.push_back(Chain);
2078  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2079                                        Ops);
2080  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2081  return VStB;
2082}
2083
2084SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2085                                         bool isUpdating, unsigned NumVecs,
2086                                         const uint16_t *DOpcodes,
2087                                         const uint16_t *QOpcodes) {
2088  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2089  SDLoc dl(N);
2090
2091  SDValue MemAddr, Align;
2092  unsigned AddrOpIdx = isUpdating ? 1 : 2;
2093  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2094  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2095    return nullptr;
2096
2097  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2098  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2099
2100  SDValue Chain = N->getOperand(0);
2101  unsigned Lane =
2102    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2103  EVT VT = N->getOperand(Vec0Idx).getValueType();
2104  bool is64BitVector = VT.is64BitVector();
2105
2106  unsigned Alignment = 0;
2107  if (NumVecs != 3) {
2108    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2109    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2110    if (Alignment > NumBytes)
2111      Alignment = NumBytes;
2112    if (Alignment < 8 && Alignment < NumBytes)
2113      Alignment = 0;
2114    // Alignment must be a power of two; make sure of that.
2115    Alignment = (Alignment & -Alignment);
2116    if (Alignment == 1)
2117      Alignment = 0;
2118  }
2119  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2120
2121  unsigned OpcodeIndex;
2122  switch (VT.getSimpleVT().SimpleTy) {
2123  default: llvm_unreachable("unhandled vld/vst lane type");
2124    // Double-register operations:
2125  case MVT::v8i8:  OpcodeIndex = 0; break;
2126  case MVT::v4i16: OpcodeIndex = 1; break;
2127  case MVT::v2f32:
2128  case MVT::v2i32: OpcodeIndex = 2; break;
2129    // Quad-register operations:
2130  case MVT::v8i16: OpcodeIndex = 0; break;
2131  case MVT::v4f32:
2132  case MVT::v4i32: OpcodeIndex = 1; break;
2133  }
2134
2135  std::vector<EVT> ResTys;
2136  if (IsLoad) {
2137    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2138    if (!is64BitVector)
2139      ResTyElts *= 2;
2140    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2141                                      MVT::i64, ResTyElts));
2142  }
2143  if (isUpdating)
2144    ResTys.push_back(MVT::i32);
2145  ResTys.push_back(MVT::Other);
2146
2147  SDValue Pred = getAL(CurDAG, dl);
2148  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2149
2150  SmallVector<SDValue, 8> Ops;
2151  Ops.push_back(MemAddr);
2152  Ops.push_back(Align);
2153  if (isUpdating) {
2154    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2155    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2156  }
2157
2158  SDValue SuperReg;
2159  SDValue V0 = N->getOperand(Vec0Idx + 0);
2160  SDValue V1 = N->getOperand(Vec0Idx + 1);
2161  if (NumVecs == 2) {
2162    if (is64BitVector)
2163      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2164    else
2165      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2166  } else {
2167    SDValue V2 = N->getOperand(Vec0Idx + 2);
2168    SDValue V3 = (NumVecs == 3)
2169      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2170      : N->getOperand(Vec0Idx + 3);
2171    if (is64BitVector)
2172      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2173    else
2174      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2175  }
2176  Ops.push_back(SuperReg);
2177  Ops.push_back(getI32Imm(Lane, dl));
2178  Ops.push_back(Pred);
2179  Ops.push_back(Reg0);
2180  Ops.push_back(Chain);
2181
2182  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2183                                  QOpcodes[OpcodeIndex]);
2184  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2185  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2186  if (!IsLoad)
2187    return VLdLn;
2188
2189  // Extract the subregisters.
2190  SuperReg = SDValue(VLdLn, 0);
2191  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2192         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2193  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2194  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2195    ReplaceUses(SDValue(N, Vec),
2196                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2197  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2198  if (isUpdating)
2199    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2200  return nullptr;
2201}
2202
2203SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2204                                      unsigned NumVecs,
2205                                      const uint16_t *Opcodes) {
2206  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2207  SDLoc dl(N);
2208
2209  SDValue MemAddr, Align;
2210  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2211    return nullptr;
2212
2213  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2214  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2215
2216  SDValue Chain = N->getOperand(0);
2217  EVT VT = N->getValueType(0);
2218
2219  unsigned Alignment = 0;
2220  if (NumVecs != 3) {
2221    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2222    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2223    if (Alignment > NumBytes)
2224      Alignment = NumBytes;
2225    if (Alignment < 8 && Alignment < NumBytes)
2226      Alignment = 0;
2227    // Alignment must be a power of two; make sure of that.
2228    Alignment = (Alignment & -Alignment);
2229    if (Alignment == 1)
2230      Alignment = 0;
2231  }
2232  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2233
2234  unsigned OpcodeIndex;
2235  switch (VT.getSimpleVT().SimpleTy) {
2236  default: llvm_unreachable("unhandled vld-dup type");
2237  case MVT::v8i8:  OpcodeIndex = 0; break;
2238  case MVT::v4i16: OpcodeIndex = 1; break;
2239  case MVT::v2f32:
2240  case MVT::v2i32: OpcodeIndex = 2; break;
2241  }
2242
2243  SDValue Pred = getAL(CurDAG, dl);
2244  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2245  SDValue SuperReg;
2246  unsigned Opc = Opcodes[OpcodeIndex];
2247  SmallVector<SDValue, 6> Ops;
2248  Ops.push_back(MemAddr);
2249  Ops.push_back(Align);
2250  if (isUpdating) {
2251    // fixed-stride update instructions don't have an explicit writeback
2252    // operand. It's implicit in the opcode itself.
2253    SDValue Inc = N->getOperand(2);
2254    if (!isa<ConstantSDNode>(Inc.getNode()))
2255      Ops.push_back(Inc);
2256    // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2257    else if (NumVecs > 2)
2258      Ops.push_back(Reg0);
2259  }
2260  Ops.push_back(Pred);
2261  Ops.push_back(Reg0);
2262  Ops.push_back(Chain);
2263
2264  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2265  std::vector<EVT> ResTys;
2266  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2267  if (isUpdating)
2268    ResTys.push_back(MVT::i32);
2269  ResTys.push_back(MVT::Other);
2270  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2271  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2272  SuperReg = SDValue(VLdDup, 0);
2273
2274  // Extract the subregisters.
2275  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2276  unsigned SubIdx = ARM::dsub_0;
2277  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2278    ReplaceUses(SDValue(N, Vec),
2279                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2280  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2281  if (isUpdating)
2282    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2283  return nullptr;
2284}
2285
2286SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2287                                    unsigned Opc) {
2288  assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2289  SDLoc dl(N);
2290  EVT VT = N->getValueType(0);
2291  unsigned FirstTblReg = IsExt ? 2 : 1;
2292
2293  // Form a REG_SEQUENCE to force register allocation.
2294  SDValue RegSeq;
2295  SDValue V0 = N->getOperand(FirstTblReg + 0);
2296  SDValue V1 = N->getOperand(FirstTblReg + 1);
2297  if (NumVecs == 2)
2298    RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2299  else {
2300    SDValue V2 = N->getOperand(FirstTblReg + 2);
2301    // If it's a vtbl3, form a quad D-register and leave the last part as
2302    // an undef.
2303    SDValue V3 = (NumVecs == 3)
2304      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2305      : N->getOperand(FirstTblReg + 3);
2306    RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2307  }
2308
2309  SmallVector<SDValue, 6> Ops;
2310  if (IsExt)
2311    Ops.push_back(N->getOperand(1));
2312  Ops.push_back(RegSeq);
2313  Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2314  Ops.push_back(getAL(CurDAG, dl)); // predicate
2315  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2316  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2317}
2318
2319SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2320                                                     bool isSigned) {
2321  if (!Subtarget->hasV6T2Ops())
2322    return nullptr;
2323
2324  unsigned Opc = isSigned
2325    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2326    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2327  SDLoc dl(N);
2328
2329  // For unsigned extracts, check for a shift right and mask
2330  unsigned And_imm = 0;
2331  if (N->getOpcode() == ISD::AND) {
2332    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2333
2334      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2335      if (And_imm & (And_imm + 1))
2336        return nullptr;
2337
2338      unsigned Srl_imm = 0;
2339      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2340                                Srl_imm)) {
2341        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2342
2343        // Note: The width operand is encoded as width-1.
2344        unsigned Width = countTrailingOnes(And_imm) - 1;
2345        unsigned LSB = Srl_imm;
2346
2347        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2348
2349        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2350          // It's cheaper to use a right shift to extract the top bits.
2351          if (Subtarget->isThumb()) {
2352            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2353            SDValue Ops[] = { N->getOperand(0).getOperand(0),
2354                              CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2355                              getAL(CurDAG, dl), Reg0, Reg0 };
2356            return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2357          }
2358
2359          // ARM models shift instructions as MOVsi with shifter operand.
2360          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2361          SDValue ShOpc =
2362            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2363                                      MVT::i32);
2364          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2365                            getAL(CurDAG, dl), Reg0, Reg0 };
2366          return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2367        }
2368
2369        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2370                          CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2371                          CurDAG->getTargetConstant(Width, dl, MVT::i32),
2372                          getAL(CurDAG, dl), Reg0 };
2373        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2374      }
2375    }
2376    return nullptr;
2377  }
2378
2379  // Otherwise, we're looking for a shift of a shift
2380  unsigned Shl_imm = 0;
2381  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2382    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2383    unsigned Srl_imm = 0;
2384    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2385      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2386      // Note: The width operand is encoded as width-1.
2387      unsigned Width = 32 - Srl_imm - 1;
2388      int LSB = Srl_imm - Shl_imm;
2389      if (LSB < 0)
2390        return nullptr;
2391      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2392      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2393                        CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2394                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2395                        getAL(CurDAG, dl), Reg0 };
2396      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2397    }
2398  }
2399
2400  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2401    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2402    unsigned LSB = 0;
2403    if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2404        !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2405      return nullptr;
2406
2407    if (LSB + Width > 32)
2408      return nullptr;
2409
2410    SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2411    SDValue Ops[] = { N->getOperand(0).getOperand(0),
2412                      CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2413                      CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2414                      getAL(CurDAG, dl), Reg0 };
2415    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2416  }
2417
2418  return nullptr;
2419}
2420
2421/// Target-specific DAG combining for ISD::XOR.
2422/// Target-independent combining lowers SELECT_CC nodes of the form
2423/// select_cc setg[ge] X,  0,  X, -X
2424/// select_cc setgt    X, -1,  X, -X
2425/// select_cc setl[te] X,  0, -X,  X
2426/// select_cc setlt    X,  1, -X,  X
2427/// which represent Integer ABS into:
2428/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2429/// ARM instruction selection detects the latter and matches it to
2430/// ARM::ABS or ARM::t2ABS machine node.
2431SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2432  SDValue XORSrc0 = N->getOperand(0);
2433  SDValue XORSrc1 = N->getOperand(1);
2434  EVT VT = N->getValueType(0);
2435
2436  if (Subtarget->isThumb1Only())
2437    return nullptr;
2438
2439  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2440    return nullptr;
2441
2442  SDValue ADDSrc0 = XORSrc0.getOperand(0);
2443  SDValue ADDSrc1 = XORSrc0.getOperand(1);
2444  SDValue SRASrc0 = XORSrc1.getOperand(0);
2445  SDValue SRASrc1 = XORSrc1.getOperand(1);
2446  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2447  EVT XType = SRASrc0.getValueType();
2448  unsigned Size = XType.getSizeInBits() - 1;
2449
2450  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2451      XType.isInteger() && SRAConstant != nullptr &&
2452      Size == SRAConstant->getZExtValue()) {
2453    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2454    return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2455  }
2456
2457  return nullptr;
2458}
2459
2460SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2461  // The only time a CONCAT_VECTORS operation can have legal types is when
2462  // two 64-bit vectors are concatenated to a 128-bit vector.
2463  EVT VT = N->getValueType(0);
2464  if (!VT.is128BitVector() || N->getNumOperands() != 2)
2465    llvm_unreachable("unexpected CONCAT_VECTORS");
2466  return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2467}
2468
2469SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2470  SDLoc dl(N);
2471
2472  if (N->isMachineOpcode()) {
2473    N->setNodeId(-1);
2474    return nullptr;   // Already selected.
2475  }
2476
2477  switch (N->getOpcode()) {
2478  default: break;
2479  case ISD::WRITE_REGISTER: {
2480    SDNode *ResNode = SelectWriteRegister(N);
2481    if (ResNode)
2482      return ResNode;
2483    break;
2484  }
2485  case ISD::READ_REGISTER: {
2486    SDNode *ResNode = SelectReadRegister(N);
2487    if (ResNode)
2488      return ResNode;
2489    break;
2490  }
2491  case ISD::INLINEASM: {
2492    SDNode *ResNode = SelectInlineAsm(N);
2493    if (ResNode)
2494      return ResNode;
2495    break;
2496  }
2497  case ISD::XOR: {
2498    // Select special operations if XOR node forms integer ABS pattern
2499    SDNode *ResNode = SelectABSOp(N);
2500    if (ResNode)
2501      return ResNode;
2502    // Other cases are autogenerated.
2503    break;
2504  }
2505  case ISD::Constant: {
2506    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2507    // If we can't materialize the constant we need to use a literal pool
2508    if (ConstantMaterializationCost(Val) > 2) {
2509      SDValue CPIdx = CurDAG->getTargetConstantPool(
2510          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2511          TLI->getPointerTy(CurDAG->getDataLayout()));
2512
2513      SDNode *ResNode;
2514      if (Subtarget->isThumb()) {
2515        SDValue Pred = getAL(CurDAG, dl);
2516        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2517        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2518        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2519                                         Ops);
2520      } else {
2521        SDValue Ops[] = {
2522          CPIdx,
2523          CurDAG->getTargetConstant(0, dl, MVT::i32),
2524          getAL(CurDAG, dl),
2525          CurDAG->getRegister(0, MVT::i32),
2526          CurDAG->getEntryNode()
2527        };
2528        ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2529                                       Ops);
2530      }
2531      ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2532      return nullptr;
2533    }
2534
2535    // Other cases are autogenerated.
2536    break;
2537  }
2538  case ISD::FrameIndex: {
2539    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2540    int FI = cast<FrameIndexSDNode>(N)->getIndex();
2541    SDValue TFI = CurDAG->getTargetFrameIndex(
2542        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2543    if (Subtarget->isThumb1Only()) {
2544      // Set the alignment of the frame object to 4, to avoid having to generate
2545      // more than one ADD
2546      MachineFrameInfo *MFI = MF->getFrameInfo();
2547      if (MFI->getObjectAlignment(FI) < 4)
2548        MFI->setObjectAlignment(FI, 4);
2549      return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2550                                  CurDAG->getTargetConstant(0, dl, MVT::i32));
2551    } else {
2552      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2553                      ARM::t2ADDri : ARM::ADDri);
2554      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2555                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2556                        CurDAG->getRegister(0, MVT::i32) };
2557      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2558    }
2559  }
2560  case ISD::SRL:
2561    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2562      return I;
2563    break;
2564  case ISD::SIGN_EXTEND_INREG:
2565  case ISD::SRA:
2566    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2567      return I;
2568    break;
2569  case ISD::MUL:
2570    if (Subtarget->isThumb1Only())
2571      break;
2572    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2573      unsigned RHSV = C->getZExtValue();
2574      if (!RHSV) break;
2575      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2576        unsigned ShImm = Log2_32(RHSV-1);
2577        if (ShImm >= 32)
2578          break;
2579        SDValue V = N->getOperand(0);
2580        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2581        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2582        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2583        if (Subtarget->isThumb()) {
2584          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2585          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2586        } else {
2587          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2588                            Reg0 };
2589          return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2590        }
2591      }
2592      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2593        unsigned ShImm = Log2_32(RHSV+1);
2594        if (ShImm >= 32)
2595          break;
2596        SDValue V = N->getOperand(0);
2597        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2598        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2599        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2600        if (Subtarget->isThumb()) {
2601          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2602          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2603        } else {
2604          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2605                            Reg0 };
2606          return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2607        }
2608      }
2609    }
2610    break;
2611  case ISD::AND: {
2612    // Check for unsigned bitfield extract
2613    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2614      return I;
2615
2616    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2617    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2618    // are entirely contributed by c2 and lower 16-bits are entirely contributed
2619    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2620    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2621    EVT VT = N->getValueType(0);
2622    if (VT != MVT::i32)
2623      break;
2624    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2625      ? ARM::t2MOVTi16
2626      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2627    if (!Opc)
2628      break;
2629    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2630    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2631    if (!N1C)
2632      break;
2633    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2634      SDValue N2 = N0.getOperand(1);
2635      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2636      if (!N2C)
2637        break;
2638      unsigned N1CVal = N1C->getZExtValue();
2639      unsigned N2CVal = N2C->getZExtValue();
2640      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2641          (N1CVal & 0xffffU) == 0xffffU &&
2642          (N2CVal & 0xffffU) == 0x0U) {
2643        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2644                                                  dl, MVT::i32);
2645        SDValue Ops[] = { N0.getOperand(0), Imm16,
2646                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2647        return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2648      }
2649    }
2650    break;
2651  }
2652  case ARMISD::VMOVRRD:
2653    return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2654                                  N->getOperand(0), getAL(CurDAG, dl),
2655                                  CurDAG->getRegister(0, MVT::i32));
2656  case ISD::UMUL_LOHI: {
2657    if (Subtarget->isThumb1Only())
2658      break;
2659    if (Subtarget->isThumb()) {
2660      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2661                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2662      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2663    } else {
2664      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2665                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2666                        CurDAG->getRegister(0, MVT::i32) };
2667      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2668                                    ARM::UMULL : ARM::UMULLv5,
2669                                    dl, MVT::i32, MVT::i32, Ops);
2670    }
2671  }
2672  case ISD::SMUL_LOHI: {
2673    if (Subtarget->isThumb1Only())
2674      break;
2675    if (Subtarget->isThumb()) {
2676      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2677                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2678      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2679    } else {
2680      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2681                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2682                        CurDAG->getRegister(0, MVT::i32) };
2683      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2684                                    ARM::SMULL : ARM::SMULLv5,
2685                                    dl, MVT::i32, MVT::i32, Ops);
2686    }
2687  }
2688  case ARMISD::UMLAL:{
2689    if (Subtarget->isThumb()) {
2690      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2691                        N->getOperand(3), getAL(CurDAG, dl),
2692                        CurDAG->getRegister(0, MVT::i32)};
2693      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2694    }else{
2695      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2696                        N->getOperand(3), getAL(CurDAG, dl),
2697                        CurDAG->getRegister(0, MVT::i32),
2698                        CurDAG->getRegister(0, MVT::i32) };
2699      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2700                                      ARM::UMLAL : ARM::UMLALv5,
2701                                      dl, MVT::i32, MVT::i32, Ops);
2702    }
2703  }
2704  case ARMISD::SMLAL:{
2705    if (Subtarget->isThumb()) {
2706      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2707                        N->getOperand(3), getAL(CurDAG, dl),
2708                        CurDAG->getRegister(0, MVT::i32)};
2709      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2710    }else{
2711      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2712                        N->getOperand(3), getAL(CurDAG, dl),
2713                        CurDAG->getRegister(0, MVT::i32),
2714                        CurDAG->getRegister(0, MVT::i32) };
2715      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2716                                      ARM::SMLAL : ARM::SMLALv5,
2717                                      dl, MVT::i32, MVT::i32, Ops);
2718    }
2719  }
2720  case ISD::LOAD: {
2721    SDNode *ResNode = nullptr;
2722    if (Subtarget->isThumb() && Subtarget->hasThumb2())
2723      ResNode = SelectT2IndexedLoad(N);
2724    else
2725      ResNode = SelectARMIndexedLoad(N);
2726    if (ResNode)
2727      return ResNode;
2728    // Other cases are autogenerated.
2729    break;
2730  }
2731  case ARMISD::BRCOND: {
2732    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2733    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2734    // Pattern complexity = 6  cost = 1  size = 0
2735
2736    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2737    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2738    // Pattern complexity = 6  cost = 1  size = 0
2739
2740    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2741    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2742    // Pattern complexity = 6  cost = 1  size = 0
2743
2744    unsigned Opc = Subtarget->isThumb() ?
2745      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2746    SDValue Chain = N->getOperand(0);
2747    SDValue N1 = N->getOperand(1);
2748    SDValue N2 = N->getOperand(2);
2749    SDValue N3 = N->getOperand(3);
2750    SDValue InFlag = N->getOperand(4);
2751    assert(N1.getOpcode() == ISD::BasicBlock);
2752    assert(N2.getOpcode() == ISD::Constant);
2753    assert(N3.getOpcode() == ISD::Register);
2754
2755    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2756                               cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2757                               MVT::i32);
2758    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2759    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2760                                             MVT::Glue, Ops);
2761    Chain = SDValue(ResNode, 0);
2762    if (N->getNumValues() == 2) {
2763      InFlag = SDValue(ResNode, 1);
2764      ReplaceUses(SDValue(N, 1), InFlag);
2765    }
2766    ReplaceUses(SDValue(N, 0),
2767                SDValue(Chain.getNode(), Chain.getResNo()));
2768    return nullptr;
2769  }
2770  case ARMISD::VZIP: {
2771    unsigned Opc = 0;
2772    EVT VT = N->getValueType(0);
2773    switch (VT.getSimpleVT().SimpleTy) {
2774    default: return nullptr;
2775    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2776    case MVT::v4i16: Opc = ARM::VZIPd16; break;
2777    case MVT::v2f32:
2778    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2779    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2780    case MVT::v16i8: Opc = ARM::VZIPq8; break;
2781    case MVT::v8i16: Opc = ARM::VZIPq16; break;
2782    case MVT::v4f32:
2783    case MVT::v4i32: Opc = ARM::VZIPq32; break;
2784    }
2785    SDValue Pred = getAL(CurDAG, dl);
2786    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2787    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2788    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2789  }
2790  case ARMISD::VUZP: {
2791    unsigned Opc = 0;
2792    EVT VT = N->getValueType(0);
2793    switch (VT.getSimpleVT().SimpleTy) {
2794    default: return nullptr;
2795    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2796    case MVT::v4i16: Opc = ARM::VUZPd16; break;
2797    case MVT::v2f32:
2798    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2799    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2800    case MVT::v16i8: Opc = ARM::VUZPq8; break;
2801    case MVT::v8i16: Opc = ARM::VUZPq16; break;
2802    case MVT::v4f32:
2803    case MVT::v4i32: Opc = ARM::VUZPq32; break;
2804    }
2805    SDValue Pred = getAL(CurDAG, dl);
2806    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2807    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2808    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2809  }
2810  case ARMISD::VTRN: {
2811    unsigned Opc = 0;
2812    EVT VT = N->getValueType(0);
2813    switch (VT.getSimpleVT().SimpleTy) {
2814    default: return nullptr;
2815    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2816    case MVT::v4i16: Opc = ARM::VTRNd16; break;
2817    case MVT::v2f32:
2818    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2819    case MVT::v16i8: Opc = ARM::VTRNq8; break;
2820    case MVT::v8i16: Opc = ARM::VTRNq16; break;
2821    case MVT::v4f32:
2822    case MVT::v4i32: Opc = ARM::VTRNq32; break;
2823    }
2824    SDValue Pred = getAL(CurDAG, dl);
2825    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2826    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2827    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2828  }
2829  case ARMISD::BUILD_VECTOR: {
2830    EVT VecVT = N->getValueType(0);
2831    EVT EltVT = VecVT.getVectorElementType();
2832    unsigned NumElts = VecVT.getVectorNumElements();
2833    if (EltVT == MVT::f64) {
2834      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2835      return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2836    }
2837    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2838    if (NumElts == 2)
2839      return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2840    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2841    return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2842                     N->getOperand(2), N->getOperand(3));
2843  }
2844
2845  case ARMISD::VLD2DUP: {
2846    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2847                                        ARM::VLD2DUPd32 };
2848    return SelectVLDDup(N, false, 2, Opcodes);
2849  }
2850
2851  case ARMISD::VLD3DUP: {
2852    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2853                                        ARM::VLD3DUPd16Pseudo,
2854                                        ARM::VLD3DUPd32Pseudo };
2855    return SelectVLDDup(N, false, 3, Opcodes);
2856  }
2857
2858  case ARMISD::VLD4DUP: {
2859    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2860                                        ARM::VLD4DUPd16Pseudo,
2861                                        ARM::VLD4DUPd32Pseudo };
2862    return SelectVLDDup(N, false, 4, Opcodes);
2863  }
2864
2865  case ARMISD::VLD2DUP_UPD: {
2866    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2867                                        ARM::VLD2DUPd16wb_fixed,
2868                                        ARM::VLD2DUPd32wb_fixed };
2869    return SelectVLDDup(N, true, 2, Opcodes);
2870  }
2871
2872  case ARMISD::VLD3DUP_UPD: {
2873    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2874                                        ARM::VLD3DUPd16Pseudo_UPD,
2875                                        ARM::VLD3DUPd32Pseudo_UPD };
2876    return SelectVLDDup(N, true, 3, Opcodes);
2877  }
2878
2879  case ARMISD::VLD4DUP_UPD: {
2880    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2881                                        ARM::VLD4DUPd16Pseudo_UPD,
2882                                        ARM::VLD4DUPd32Pseudo_UPD };
2883    return SelectVLDDup(N, true, 4, Opcodes);
2884  }
2885
2886  case ARMISD::VLD1_UPD: {
2887    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2888                                         ARM::VLD1d16wb_fixed,
2889                                         ARM::VLD1d32wb_fixed,
2890                                         ARM::VLD1d64wb_fixed };
2891    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2892                                         ARM::VLD1q16wb_fixed,
2893                                         ARM::VLD1q32wb_fixed,
2894                                         ARM::VLD1q64wb_fixed };
2895    return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2896  }
2897
2898  case ARMISD::VLD2_UPD: {
2899    static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2900                                         ARM::VLD2d16wb_fixed,
2901                                         ARM::VLD2d32wb_fixed,
2902                                         ARM::VLD1q64wb_fixed};
2903    static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2904                                         ARM::VLD2q16PseudoWB_fixed,
2905                                         ARM::VLD2q32PseudoWB_fixed };
2906    return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2907  }
2908
2909  case ARMISD::VLD3_UPD: {
2910    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2911                                         ARM::VLD3d16Pseudo_UPD,
2912                                         ARM::VLD3d32Pseudo_UPD,
2913                                         ARM::VLD1d64TPseudoWB_fixed};
2914    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2915                                          ARM::VLD3q16Pseudo_UPD,
2916                                          ARM::VLD3q32Pseudo_UPD };
2917    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2918                                          ARM::VLD3q16oddPseudo_UPD,
2919                                          ARM::VLD3q32oddPseudo_UPD };
2920    return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2921  }
2922
2923  case ARMISD::VLD4_UPD: {
2924    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2925                                         ARM::VLD4d16Pseudo_UPD,
2926                                         ARM::VLD4d32Pseudo_UPD,
2927                                         ARM::VLD1d64QPseudoWB_fixed};
2928    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2929                                          ARM::VLD4q16Pseudo_UPD,
2930                                          ARM::VLD4q32Pseudo_UPD };
2931    static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2932                                          ARM::VLD4q16oddPseudo_UPD,
2933                                          ARM::VLD4q32oddPseudo_UPD };
2934    return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2935  }
2936
2937  case ARMISD::VLD2LN_UPD: {
2938    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2939                                         ARM::VLD2LNd16Pseudo_UPD,
2940                                         ARM::VLD2LNd32Pseudo_UPD };
2941    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2942                                         ARM::VLD2LNq32Pseudo_UPD };
2943    return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2944  }
2945
2946  case ARMISD::VLD3LN_UPD: {
2947    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2948                                         ARM::VLD3LNd16Pseudo_UPD,
2949                                         ARM::VLD3LNd32Pseudo_UPD };
2950    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2951                                         ARM::VLD3LNq32Pseudo_UPD };
2952    return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2953  }
2954
2955  case ARMISD::VLD4LN_UPD: {
2956    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2957                                         ARM::VLD4LNd16Pseudo_UPD,
2958                                         ARM::VLD4LNd32Pseudo_UPD };
2959    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2960                                         ARM::VLD4LNq32Pseudo_UPD };
2961    return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2962  }
2963
2964  case ARMISD::VST1_UPD: {
2965    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2966                                         ARM::VST1d16wb_fixed,
2967                                         ARM::VST1d32wb_fixed,
2968                                         ARM::VST1d64wb_fixed };
2969    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2970                                         ARM::VST1q16wb_fixed,
2971                                         ARM::VST1q32wb_fixed,
2972                                         ARM::VST1q64wb_fixed };
2973    return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2974  }
2975
2976  case ARMISD::VST2_UPD: {
2977    static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2978                                         ARM::VST2d16wb_fixed,
2979                                         ARM::VST2d32wb_fixed,
2980                                         ARM::VST1q64wb_fixed};
2981    static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2982                                         ARM::VST2q16PseudoWB_fixed,
2983                                         ARM::VST2q32PseudoWB_fixed };
2984    return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
2985  }
2986
2987  case ARMISD::VST3_UPD: {
2988    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
2989                                         ARM::VST3d16Pseudo_UPD,
2990                                         ARM::VST3d32Pseudo_UPD,
2991                                         ARM::VST1d64TPseudoWB_fixed};
2992    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
2993                                          ARM::VST3q16Pseudo_UPD,
2994                                          ARM::VST3q32Pseudo_UPD };
2995    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
2996                                          ARM::VST3q16oddPseudo_UPD,
2997                                          ARM::VST3q32oddPseudo_UPD };
2998    return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2999  }
3000
3001  case ARMISD::VST4_UPD: {
3002    static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3003                                         ARM::VST4d16Pseudo_UPD,
3004                                         ARM::VST4d32Pseudo_UPD,
3005                                         ARM::VST1d64QPseudoWB_fixed};
3006    static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3007                                          ARM::VST4q16Pseudo_UPD,
3008                                          ARM::VST4q32Pseudo_UPD };
3009    static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3010                                          ARM::VST4q16oddPseudo_UPD,
3011                                          ARM::VST4q32oddPseudo_UPD };
3012    return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3013  }
3014
3015  case ARMISD::VST2LN_UPD: {
3016    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3017                                         ARM::VST2LNd16Pseudo_UPD,
3018                                         ARM::VST2LNd32Pseudo_UPD };
3019    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3020                                         ARM::VST2LNq32Pseudo_UPD };
3021    return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3022  }
3023
3024  case ARMISD::VST3LN_UPD: {
3025    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3026                                         ARM::VST3LNd16Pseudo_UPD,
3027                                         ARM::VST3LNd32Pseudo_UPD };
3028    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3029                                         ARM::VST3LNq32Pseudo_UPD };
3030    return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3031  }
3032
3033  case ARMISD::VST4LN_UPD: {
3034    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3035                                         ARM::VST4LNd16Pseudo_UPD,
3036                                         ARM::VST4LNd32Pseudo_UPD };
3037    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3038                                         ARM::VST4LNq32Pseudo_UPD };
3039    return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3040  }
3041
3042  case ISD::INTRINSIC_VOID:
3043  case ISD::INTRINSIC_W_CHAIN: {
3044    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3045    switch (IntNo) {
3046    default:
3047      break;
3048
3049    case Intrinsic::arm_ldaexd:
3050    case Intrinsic::arm_ldrexd: {
3051      SDLoc dl(N);
3052      SDValue Chain = N->getOperand(0);
3053      SDValue MemAddr = N->getOperand(2);
3054      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3055
3056      bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3057      unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3058                                : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3059
3060      // arm_ldrexd returns a i64 value in {i32, i32}
3061      std::vector<EVT> ResTys;
3062      if (isThumb) {
3063        ResTys.push_back(MVT::i32);
3064        ResTys.push_back(MVT::i32);
3065      } else
3066        ResTys.push_back(MVT::Untyped);
3067      ResTys.push_back(MVT::Other);
3068
3069      // Place arguments in the right order.
3070      SmallVector<SDValue, 7> Ops;
3071      Ops.push_back(MemAddr);
3072      Ops.push_back(getAL(CurDAG, dl));
3073      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3074      Ops.push_back(Chain);
3075      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3076      // Transfer memoperands.
3077      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3078      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3079      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3080
3081      // Remap uses.
3082      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3083      if (!SDValue(N, 0).use_empty()) {
3084        SDValue Result;
3085        if (isThumb)
3086          Result = SDValue(Ld, 0);
3087        else {
3088          SDValue SubRegIdx =
3089            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3090          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3091              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3092          Result = SDValue(ResNode,0);
3093        }
3094        ReplaceUses(SDValue(N, 0), Result);
3095      }
3096      if (!SDValue(N, 1).use_empty()) {
3097        SDValue Result;
3098        if (isThumb)
3099          Result = SDValue(Ld, 1);
3100        else {
3101          SDValue SubRegIdx =
3102            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3103          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3104              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3105          Result = SDValue(ResNode,0);
3106        }
3107        ReplaceUses(SDValue(N, 1), Result);
3108      }
3109      ReplaceUses(SDValue(N, 2), OutChain);
3110      return nullptr;
3111    }
3112    case Intrinsic::arm_stlexd:
3113    case Intrinsic::arm_strexd: {
3114      SDLoc dl(N);
3115      SDValue Chain = N->getOperand(0);
3116      SDValue Val0 = N->getOperand(2);
3117      SDValue Val1 = N->getOperand(3);
3118      SDValue MemAddr = N->getOperand(4);
3119
3120      // Store exclusive double return a i32 value which is the return status
3121      // of the issued store.
3122      const EVT ResTys[] = {MVT::i32, MVT::Other};
3123
3124      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3125      // Place arguments in the right order.
3126      SmallVector<SDValue, 7> Ops;
3127      if (isThumb) {
3128        Ops.push_back(Val0);
3129        Ops.push_back(Val1);
3130      } else
3131        // arm_strexd uses GPRPair.
3132        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3133      Ops.push_back(MemAddr);
3134      Ops.push_back(getAL(CurDAG, dl));
3135      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3136      Ops.push_back(Chain);
3137
3138      bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3139      unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3140                                : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3141
3142      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3143      // Transfer memoperands.
3144      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3145      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3146      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3147
3148      return St;
3149    }
3150
3151    case Intrinsic::arm_neon_vld1: {
3152      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3153                                           ARM::VLD1d32, ARM::VLD1d64 };
3154      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3155                                           ARM::VLD1q32, ARM::VLD1q64};
3156      return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3157    }
3158
3159    case Intrinsic::arm_neon_vld2: {
3160      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3161                                           ARM::VLD2d32, ARM::VLD1q64 };
3162      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3163                                           ARM::VLD2q32Pseudo };
3164      return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3165    }
3166
3167    case Intrinsic::arm_neon_vld3: {
3168      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3169                                           ARM::VLD3d16Pseudo,
3170                                           ARM::VLD3d32Pseudo,
3171                                           ARM::VLD1d64TPseudo };
3172      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3173                                            ARM::VLD3q16Pseudo_UPD,
3174                                            ARM::VLD3q32Pseudo_UPD };
3175      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3176                                            ARM::VLD3q16oddPseudo,
3177                                            ARM::VLD3q32oddPseudo };
3178      return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3179    }
3180
3181    case Intrinsic::arm_neon_vld4: {
3182      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3183                                           ARM::VLD4d16Pseudo,
3184                                           ARM::VLD4d32Pseudo,
3185                                           ARM::VLD1d64QPseudo };
3186      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3187                                            ARM::VLD4q16Pseudo_UPD,
3188                                            ARM::VLD4q32Pseudo_UPD };
3189      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3190                                            ARM::VLD4q16oddPseudo,
3191                                            ARM::VLD4q32oddPseudo };
3192      return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3193    }
3194
3195    case Intrinsic::arm_neon_vld2lane: {
3196      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3197                                           ARM::VLD2LNd16Pseudo,
3198                                           ARM::VLD2LNd32Pseudo };
3199      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3200                                           ARM::VLD2LNq32Pseudo };
3201      return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3202    }
3203
3204    case Intrinsic::arm_neon_vld3lane: {
3205      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3206                                           ARM::VLD3LNd16Pseudo,
3207                                           ARM::VLD3LNd32Pseudo };
3208      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3209                                           ARM::VLD3LNq32Pseudo };
3210      return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3211    }
3212
3213    case Intrinsic::arm_neon_vld4lane: {
3214      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3215                                           ARM::VLD4LNd16Pseudo,
3216                                           ARM::VLD4LNd32Pseudo };
3217      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3218                                           ARM::VLD4LNq32Pseudo };
3219      return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3220    }
3221
3222    case Intrinsic::arm_neon_vst1: {
3223      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3224                                           ARM::VST1d32, ARM::VST1d64 };
3225      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3226                                           ARM::VST1q32, ARM::VST1q64 };
3227      return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3228    }
3229
3230    case Intrinsic::arm_neon_vst2: {
3231      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3232                                           ARM::VST2d32, ARM::VST1q64 };
3233      static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3234                                     ARM::VST2q32Pseudo };
3235      return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3236    }
3237
3238    case Intrinsic::arm_neon_vst3: {
3239      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3240                                           ARM::VST3d16Pseudo,
3241                                           ARM::VST3d32Pseudo,
3242                                           ARM::VST1d64TPseudo };
3243      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3244                                            ARM::VST3q16Pseudo_UPD,
3245                                            ARM::VST3q32Pseudo_UPD };
3246      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3247                                            ARM::VST3q16oddPseudo,
3248                                            ARM::VST3q32oddPseudo };
3249      return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3250    }
3251
3252    case Intrinsic::arm_neon_vst4: {
3253      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3254                                           ARM::VST4d16Pseudo,
3255                                           ARM::VST4d32Pseudo,
3256                                           ARM::VST1d64QPseudo };
3257      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3258                                            ARM::VST4q16Pseudo_UPD,
3259                                            ARM::VST4q32Pseudo_UPD };
3260      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3261                                            ARM::VST4q16oddPseudo,
3262                                            ARM::VST4q32oddPseudo };
3263      return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3264    }
3265
3266    case Intrinsic::arm_neon_vst2lane: {
3267      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3268                                           ARM::VST2LNd16Pseudo,
3269                                           ARM::VST2LNd32Pseudo };
3270      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3271                                           ARM::VST2LNq32Pseudo };
3272      return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3273    }
3274
3275    case Intrinsic::arm_neon_vst3lane: {
3276      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3277                                           ARM::VST3LNd16Pseudo,
3278                                           ARM::VST3LNd32Pseudo };
3279      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3280                                           ARM::VST3LNq32Pseudo };
3281      return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3282    }
3283
3284    case Intrinsic::arm_neon_vst4lane: {
3285      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3286                                           ARM::VST4LNd16Pseudo,
3287                                           ARM::VST4LNd32Pseudo };
3288      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3289                                           ARM::VST4LNq32Pseudo };
3290      return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3291    }
3292    }
3293    break;
3294  }
3295
3296  case ISD::INTRINSIC_WO_CHAIN: {
3297    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3298    switch (IntNo) {
3299    default:
3300      break;
3301
3302    case Intrinsic::arm_neon_vtbl2:
3303      return SelectVTBL(N, false, 2, ARM::VTBL2);
3304    case Intrinsic::arm_neon_vtbl3:
3305      return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3306    case Intrinsic::arm_neon_vtbl4:
3307      return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3308
3309    case Intrinsic::arm_neon_vtbx2:
3310      return SelectVTBL(N, true, 2, ARM::VTBX2);
3311    case Intrinsic::arm_neon_vtbx3:
3312      return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3313    case Intrinsic::arm_neon_vtbx4:
3314      return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3315    }
3316    break;
3317  }
3318
3319  case ARMISD::VTBL1: {
3320    SDLoc dl(N);
3321    EVT VT = N->getValueType(0);
3322    SmallVector<SDValue, 6> Ops;
3323
3324    Ops.push_back(N->getOperand(0));
3325    Ops.push_back(N->getOperand(1));
3326    Ops.push_back(getAL(CurDAG, dl));                // Predicate
3327    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3328    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3329  }
3330  case ARMISD::VTBL2: {
3331    SDLoc dl(N);
3332    EVT VT = N->getValueType(0);
3333
3334    // Form a REG_SEQUENCE to force register allocation.
3335    SDValue V0 = N->getOperand(0);
3336    SDValue V1 = N->getOperand(1);
3337    SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3338
3339    SmallVector<SDValue, 6> Ops;
3340    Ops.push_back(RegSeq);
3341    Ops.push_back(N->getOperand(2));
3342    Ops.push_back(getAL(CurDAG, dl));                // Predicate
3343    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3344    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3345  }
3346
3347  case ISD::CONCAT_VECTORS:
3348    return SelectConcatVector(N);
3349  }
3350
3351  return SelectCode(N);
3352}
3353
3354// Inspect a register string of the form
3355// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3356// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3357// and obtain the integer operands from them, adding these operands to the
3358// provided vector.
3359static void getIntOperandsFromRegisterString(StringRef RegString,
3360                                             SelectionDAG *CurDAG, SDLoc DL,
3361                                             std::vector<SDValue>& Ops) {
3362  SmallVector<StringRef, 5> Fields;
3363  RegString.split(Fields, ':');
3364
3365  if (Fields.size() > 1) {
3366    bool AllIntFields = true;
3367
3368    for (StringRef Field : Fields) {
3369      // Need to trim out leading 'cp' characters and get the integer field.
3370      unsigned IntField;
3371      AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3372      Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3373    }
3374
3375    assert(AllIntFields &&
3376            "Unexpected non-integer value in special register string.");
3377  }
3378}
3379
3380// Maps a Banked Register string to its mask value. The mask value returned is
3381// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3382// mask operand, which expresses which register is to be used, e.g. r8, and in
3383// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3384// was invalid.
3385static inline int getBankedRegisterMask(StringRef RegString) {
3386  return StringSwitch<int>(RegString.lower())
3387          .Case("r8_usr", 0x00)
3388          .Case("r9_usr", 0x01)
3389          .Case("r10_usr", 0x02)
3390          .Case("r11_usr", 0x03)
3391          .Case("r12_usr", 0x04)
3392          .Case("sp_usr", 0x05)
3393          .Case("lr_usr", 0x06)
3394          .Case("r8_fiq", 0x08)
3395          .Case("r9_fiq", 0x09)
3396          .Case("r10_fiq", 0x0a)
3397          .Case("r11_fiq", 0x0b)
3398          .Case("r12_fiq", 0x0c)
3399          .Case("sp_fiq", 0x0d)
3400          .Case("lr_fiq", 0x0e)
3401          .Case("lr_irq", 0x10)
3402          .Case("sp_irq", 0x11)
3403          .Case("lr_svc", 0x12)
3404          .Case("sp_svc", 0x13)
3405          .Case("lr_abt", 0x14)
3406          .Case("sp_abt", 0x15)
3407          .Case("lr_und", 0x16)
3408          .Case("sp_und", 0x17)
3409          .Case("lr_mon", 0x1c)
3410          .Case("sp_mon", 0x1d)
3411          .Case("elr_hyp", 0x1e)
3412          .Case("sp_hyp", 0x1f)
3413          .Case("spsr_fiq", 0x2e)
3414          .Case("spsr_irq", 0x30)
3415          .Case("spsr_svc", 0x32)
3416          .Case("spsr_abt", 0x34)
3417          .Case("spsr_und", 0x36)
3418          .Case("spsr_mon", 0x3c)
3419          .Case("spsr_hyp", 0x3e)
3420          .Default(-1);
3421}
3422
3423// Maps a MClass special register string to its value for use in the
3424// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3425// Returns -1 to signify that the string was invalid.
3426static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3427  return StringSwitch<int>(RegString.lower())
3428          .Case("apsr", 0x0)
3429          .Case("iapsr", 0x1)
3430          .Case("eapsr", 0x2)
3431          .Case("xpsr", 0x3)
3432          .Case("ipsr", 0x5)
3433          .Case("epsr", 0x6)
3434          .Case("iepsr", 0x7)
3435          .Case("msp", 0x8)
3436          .Case("psp", 0x9)
3437          .Case("primask", 0x10)
3438          .Case("basepri", 0x11)
3439          .Case("basepri_max", 0x12)
3440          .Case("faultmask", 0x13)
3441          .Case("control", 0x14)
3442          .Default(-1);
3443}
3444
3445// The flags here are common to those allowed for apsr in the A class cores and
3446// those allowed for the special registers in the M class cores. Returns a
3447// value representing which flags were present, -1 if invalid.
3448static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3449  if (Flags.empty())
3450    return 0x2 | (int)hasDSP;
3451
3452  return StringSwitch<int>(Flags)
3453          .Case("g", 0x1)
3454          .Case("nzcvq", 0x2)
3455          .Case("nzcvqg", 0x3)
3456          .Default(-1);
3457}
3458
3459static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3460                                 const ARMSubtarget *Subtarget) {
3461  // Ensure that the register (without flags) was a valid M Class special
3462  // register.
3463  int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3464  if (SYSmvalue == -1)
3465    return -1;
3466
3467  // basepri, basepri_max and faultmask are only valid for V7m.
3468  if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3469    return -1;
3470
3471  // If it was a read then we won't be expecting flags and so at this point
3472  // we can return the mask.
3473  if (IsRead) {
3474    assert (Flags.empty() && "Unexpected flags for reading M class register.");
3475    return SYSmvalue;
3476  }
3477
3478  // We know we are now handling a write so need to get the mask for the flags.
3479  int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3480
3481  // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3482  // shouldn't have flags present.
3483  if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3484    return -1;
3485
3486  // The _g and _nzcvqg versions are only valid if the DSP extension is
3487  // available.
3488  if (!Subtarget->hasDSP() && (Mask & 0x1))
3489    return -1;
3490
3491  // The register was valid so need to put the mask in the correct place
3492  // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3493  // construct the operand for the instruction node.
3494  if (SYSmvalue < 0x4)
3495    return SYSmvalue | Mask << 10;
3496
3497  return SYSmvalue;
3498}
3499
3500static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3501  // The mask operand contains the special register (R Bit) in bit 4, whether
3502  // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3503  // bits 3-0 contains the fields to be accessed in the special register, set by
3504  // the flags provided with the register.
3505  int Mask = 0;
3506  if (Reg == "apsr") {
3507    // The flags permitted for apsr are the same flags that are allowed in
3508    // M class registers. We get the flag value and then shift the flags into
3509    // the correct place to combine with the mask.
3510    Mask = getMClassFlagsMask(Flags, true);
3511    if (Mask == -1)
3512      return -1;
3513    return Mask << 2;
3514  }
3515
3516  if (Reg != "cpsr" && Reg != "spsr") {
3517    return -1;
3518  }
3519
3520  // This is the same as if the flags were "fc"
3521  if (Flags.empty() || Flags == "all")
3522    return Mask | 0x9;
3523
3524  // Inspect the supplied flags string and set the bits in the mask for
3525  // the relevant and valid flags allowed for cpsr and spsr.
3526  for (char Flag : Flags) {
3527    int FlagVal;
3528    switch (Flag) {
3529      case 'c':
3530        FlagVal = 0x1;
3531        break;
3532      case 'x':
3533        FlagVal = 0x2;
3534        break;
3535      case 's':
3536        FlagVal = 0x4;
3537        break;
3538      case 'f':
3539        FlagVal = 0x8;
3540        break;
3541      default:
3542        FlagVal = 0;
3543    }
3544
3545    // This avoids allowing strings where the same flag bit appears twice.
3546    if (!FlagVal || (Mask & FlagVal))
3547      return -1;
3548    Mask |= FlagVal;
3549  }
3550
3551  // If the register is spsr then we need to set the R bit.
3552  if (Reg == "spsr")
3553    Mask |= 0x10;
3554
3555  return Mask;
3556}
3557
3558// Lower the read_register intrinsic to ARM specific DAG nodes
3559// using the supplied metadata string to select the instruction node to use
3560// and the registers/masks to construct as operands for the node.
3561SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3562  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3563  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3564  bool IsThumb2 = Subtarget->isThumb2();
3565  SDLoc DL(N);
3566
3567  std::vector<SDValue> Ops;
3568  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3569
3570  if (!Ops.empty()) {
3571    // If the special register string was constructed of fields (as defined
3572    // in the ACLE) then need to lower to MRC node (32 bit) or
3573    // MRRC node(64 bit), we can make the distinction based on the number of
3574    // operands we have.
3575    unsigned Opcode;
3576    SmallVector<EVT, 3> ResTypes;
3577    if (Ops.size() == 5){
3578      Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3579      ResTypes.append({ MVT::i32, MVT::Other });
3580    } else {
3581      assert(Ops.size() == 3 &&
3582              "Invalid number of fields in special register string.");
3583      Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3584      ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3585    }
3586
3587    Ops.push_back(getAL(CurDAG, DL));
3588    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3589    Ops.push_back(N->getOperand(0));
3590    return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3591  }
3592
3593  std::string SpecialReg = RegString->getString().lower();
3594
3595  int BankedReg = getBankedRegisterMask(SpecialReg);
3596  if (BankedReg != -1) {
3597    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3598            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3599            N->getOperand(0) };
3600    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3601                                  DL, MVT::i32, MVT::Other, Ops);
3602  }
3603
3604  // The VFP registers are read by creating SelectionDAG nodes with opcodes
3605  // corresponding to the register that is being read from. So we switch on the
3606  // string to find which opcode we need to use.
3607  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3608                    .Case("fpscr", ARM::VMRS)
3609                    .Case("fpexc", ARM::VMRS_FPEXC)
3610                    .Case("fpsid", ARM::VMRS_FPSID)
3611                    .Case("mvfr0", ARM::VMRS_MVFR0)
3612                    .Case("mvfr1", ARM::VMRS_MVFR1)
3613                    .Case("mvfr2", ARM::VMRS_MVFR2)
3614                    .Case("fpinst", ARM::VMRS_FPINST)
3615                    .Case("fpinst2", ARM::VMRS_FPINST2)
3616                    .Default(0);
3617
3618  // If an opcode was found then we can lower the read to a VFP instruction.
3619  if (Opcode) {
3620    if (!Subtarget->hasVFP2())
3621      return nullptr;
3622    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3623      return nullptr;
3624
3625    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3626            N->getOperand(0) };
3627    return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3628  }
3629
3630  // If the target is M Class then need to validate that the register string
3631  // is an acceptable value, so check that a mask can be constructed from the
3632  // string.
3633  if (Subtarget->isMClass()) {
3634    int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3635    if (SYSmValue == -1)
3636      return nullptr;
3637
3638    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3639                      getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3640                      N->getOperand(0) };
3641    return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3642  }
3643
3644  // Here we know the target is not M Class so we need to check if it is one
3645  // of the remaining possible values which are apsr, cpsr or spsr.
3646  if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3647    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3648            N->getOperand(0) };
3649    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3650                                  MVT::i32, MVT::Other, Ops);
3651  }
3652
3653  if (SpecialReg == "spsr") {
3654    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3655            N->getOperand(0) };
3656    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3657                                  DL, MVT::i32, MVT::Other, Ops);
3658  }
3659
3660  return nullptr;
3661}
3662
3663// Lower the write_register intrinsic to ARM specific DAG nodes
3664// using the supplied metadata string to select the instruction node to use
3665// and the registers/masks to use in the nodes
3666SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3667  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3668  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3669  bool IsThumb2 = Subtarget->isThumb2();
3670  SDLoc DL(N);
3671
3672  std::vector<SDValue> Ops;
3673  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3674
3675  if (!Ops.empty()) {
3676    // If the special register string was constructed of fields (as defined
3677    // in the ACLE) then need to lower to MCR node (32 bit) or
3678    // MCRR node(64 bit), we can make the distinction based on the number of
3679    // operands we have.
3680    unsigned Opcode;
3681    if (Ops.size() == 5) {
3682      Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3683      Ops.insert(Ops.begin()+2, N->getOperand(2));
3684    } else {
3685      assert(Ops.size() == 3 &&
3686              "Invalid number of fields in special register string.");
3687      Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3688      SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3689      Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3690    }
3691
3692    Ops.push_back(getAL(CurDAG, DL));
3693    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3694    Ops.push_back(N->getOperand(0));
3695
3696    return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3697  }
3698
3699  std::string SpecialReg = RegString->getString().lower();
3700  int BankedReg = getBankedRegisterMask(SpecialReg);
3701  if (BankedReg != -1) {
3702    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3703            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3704            N->getOperand(0) };
3705    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3706                                  DL, MVT::Other, Ops);
3707  }
3708
3709  // The VFP registers are written to by creating SelectionDAG nodes with
3710  // opcodes corresponding to the register that is being written. So we switch
3711  // on the string to find which opcode we need to use.
3712  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3713                    .Case("fpscr", ARM::VMSR)
3714                    .Case("fpexc", ARM::VMSR_FPEXC)
3715                    .Case("fpsid", ARM::VMSR_FPSID)
3716                    .Case("fpinst", ARM::VMSR_FPINST)
3717                    .Case("fpinst2", ARM::VMSR_FPINST2)
3718                    .Default(0);
3719
3720  if (Opcode) {
3721    if (!Subtarget->hasVFP2())
3722      return nullptr;
3723    Ops = { N->getOperand(2), getAL(CurDAG, DL),
3724            CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3725    return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3726  }
3727
3728  SmallVector<StringRef, 5> Fields;
3729  StringRef(SpecialReg).split(Fields, '_', 1, false);
3730  std::string Reg = Fields[0].str();
3731  StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3732
3733  // If the target was M Class then need to validate the special register value
3734  // and retrieve the mask for use in the instruction node.
3735  if (Subtarget->isMClass()) {
3736    // basepri_max gets split so need to correct Reg and Flags.
3737    if (SpecialReg == "basepri_max") {
3738      Reg = SpecialReg;
3739      Flags = "";
3740    }
3741    int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3742    if (SYSmValue == -1)
3743      return nullptr;
3744
3745    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3746                      N->getOperand(2), getAL(CurDAG, DL),
3747                      CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3748    return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3749  }
3750
3751  // We then check to see if a valid mask can be constructed for one of the
3752  // register string values permitted for the A and R class cores. These values
3753  // are apsr, spsr and cpsr; these are also valid on older cores.
3754  int Mask = getARClassRegisterMask(Reg, Flags);
3755  if (Mask != -1) {
3756    Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3757            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3758            N->getOperand(0) };
3759    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3760                                  DL, MVT::Other, Ops);
3761  }
3762
3763  return nullptr;
3764}
3765
3766SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3767  std::vector<SDValue> AsmNodeOperands;
3768  unsigned Flag, Kind;
3769  bool Changed = false;
3770  unsigned NumOps = N->getNumOperands();
3771
3772  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3773  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3774  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3775  // respectively. Since there is no constraint to explicitly specify a
3776  // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3777  // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3778  // them into a GPRPair.
3779
3780  SDLoc dl(N);
3781  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3782                                   : SDValue(nullptr,0);
3783
3784  SmallVector<bool, 8> OpChanged;
3785  // Glue node will be appended late.
3786  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3787    SDValue op = N->getOperand(i);
3788    AsmNodeOperands.push_back(op);
3789
3790    if (i < InlineAsm::Op_FirstOperand)
3791      continue;
3792
3793    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3794      Flag = C->getZExtValue();
3795      Kind = InlineAsm::getKind(Flag);
3796    }
3797    else
3798      continue;
3799
3800    // Immediate operands to inline asm in the SelectionDAG are modeled with
3801    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3802    // the second is a constant with the value of the immediate. If we get here
3803    // and we have a Kind_Imm, skip the next operand, and continue.
3804    if (Kind == InlineAsm::Kind_Imm) {
3805      SDValue op = N->getOperand(++i);
3806      AsmNodeOperands.push_back(op);
3807      continue;
3808    }
3809
3810    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3811    if (NumRegs)
3812      OpChanged.push_back(false);
3813
3814    unsigned DefIdx = 0;
3815    bool IsTiedToChangedOp = false;
3816    // If it's a use that is tied with a previous def, it has no
3817    // reg class constraint.
3818    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3819      IsTiedToChangedOp = OpChanged[DefIdx];
3820
3821    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3822        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3823      continue;
3824
3825    unsigned RC;
3826    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3827    if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3828        || NumRegs != 2)
3829      continue;
3830
3831    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3832    SDValue V0 = N->getOperand(i+1);
3833    SDValue V1 = N->getOperand(i+2);
3834    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3835    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3836    SDValue PairedReg;
3837    MachineRegisterInfo &MRI = MF->getRegInfo();
3838
3839    if (Kind == InlineAsm::Kind_RegDef ||
3840        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3841      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3842      // the original GPRs.
3843
3844      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3845      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3846      SDValue Chain = SDValue(N,0);
3847
3848      SDNode *GU = N->getGluedUser();
3849      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3850                                               Chain.getValue(1));
3851
3852      // Extract values from a GPRPair reg and copy to the original GPR reg.
3853      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3854                                                    RegCopy);
3855      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3856                                                    RegCopy);
3857      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3858                                        RegCopy.getValue(1));
3859      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3860
3861      // Update the original glue user.
3862      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3863      Ops.push_back(T1.getValue(1));
3864      CurDAG->UpdateNodeOperands(GU, Ops);
3865    }
3866    else {
3867      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3868      // GPRPair and then pass the GPRPair to the inline asm.
3869      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3870
3871      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3872      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3873                                          Chain.getValue(1));
3874      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3875                                          T0.getValue(1));
3876      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3877
3878      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3879      // i32 VRs of inline asm with it.
3880      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3881      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3882      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3883
3884      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3885      Glue = Chain.getValue(1);
3886    }
3887
3888    Changed = true;
3889
3890    if(PairedReg.getNode()) {
3891      OpChanged[OpChanged.size() -1 ] = true;
3892      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3893      if (IsTiedToChangedOp)
3894        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3895      else
3896        Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3897      // Replace the current flag.
3898      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3899          Flag, dl, MVT::i32);
3900      // Add the new register node and skip the original two GPRs.
3901      AsmNodeOperands.push_back(PairedReg);
3902      // Skip the next two GPRs.
3903      i += 2;
3904    }
3905  }
3906
3907  if (Glue.getNode())
3908    AsmNodeOperands.push_back(Glue);
3909  if (!Changed)
3910    return nullptr;
3911
3912  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3913      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3914  New->setNodeId(-1);
3915  return New.getNode();
3916}
3917
3918
3919bool ARMDAGToDAGISel::
3920SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3921                             std::vector<SDValue> &OutOps) {
3922  switch(ConstraintID) {
3923  default:
3924    llvm_unreachable("Unexpected asm memory constraint");
3925  case InlineAsm::Constraint_i:
3926    // FIXME: It seems strange that 'i' is needed here since it's supposed to
3927    //        be an immediate and not a memory constraint.
3928    // Fallthrough.
3929  case InlineAsm::Constraint_m:
3930  case InlineAsm::Constraint_o:
3931  case InlineAsm::Constraint_Q:
3932  case InlineAsm::Constraint_Um:
3933  case InlineAsm::Constraint_Un:
3934  case InlineAsm::Constraint_Uq:
3935  case InlineAsm::Constraint_Us:
3936  case InlineAsm::Constraint_Ut:
3937  case InlineAsm::Constraint_Uv:
3938  case InlineAsm::Constraint_Uy:
3939    // Require the address to be in a register.  That is safe for all ARM
3940    // variants and it is hard to do anything much smarter without knowing
3941    // how the operand is used.
3942    OutOps.push_back(Op);
3943    return false;
3944  }
3945  return true;
3946}
3947
3948/// createARMISelDag - This pass converts a legalized DAG into a
3949/// ARM-specific DAG, ready for instruction scheduling.
3950///
3951FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3952                                     CodeGenOpt::Level OptLevel) {
3953  return new ARMDAGToDAGISel(TM, OptLevel);
3954}
3955