1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the ARM target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMTargetMachine.h"
17#include "MCTargetDesc/ARMAddressingModes.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGISel.h"
25#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Target/TargetLowering.h"
35#include "llvm/Target/TargetOptions.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "arm-isel"
40
41static cl::opt<bool>
42DisableShifterOp("disable-shifter-op", cl::Hidden,
43  cl::desc("Disable isel of shifter-op"),
44  cl::init(false));
45
46//===--------------------------------------------------------------------===//
47/// ARMDAGToDAGISel - ARM specific code to select ARM machine
48/// instructions for SelectionDAG operations.
49///
50namespace {
51
52enum AddrMode2Type {
53  AM2_BASE, // Simple AM2 (+-imm12)
54  AM2_SHOP  // Shifter-op AM2
55};
56
57class ARMDAGToDAGISel : public SelectionDAGISel {
58  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
59  /// make the right decision when generating code for different targets.
60  const ARMSubtarget *Subtarget;
61
62public:
63  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
64      : SelectionDAGISel(tm, OptLevel) {}
65
66  bool runOnMachineFunction(MachineFunction &MF) override {
67    // Reset the subtarget each time through.
68    Subtarget = &MF.getSubtarget<ARMSubtarget>();
69    SelectionDAGISel::runOnMachineFunction(MF);
70    return true;
71  }
72
73  const char *getPassName() const override {
74    return "ARM Instruction Selection";
75  }
76
77  void PreprocessISelDAG() override;
78
79  /// getI32Imm - Return a target constant of type i32 with the specified
80  /// value.
81  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
82    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
83  }
84
85  void Select(SDNode *N) override;
86
87  bool hasNoVMLxHazardUse(SDNode *N) const;
88  bool isShifterOpProfitable(const SDValue &Shift,
89                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
90  bool SelectRegShifterOperand(SDValue N, SDValue &A,
91                               SDValue &B, SDValue &C,
92                               bool CheckProfitability = true);
93  bool SelectImmShifterOperand(SDValue N, SDValue &A,
94                               SDValue &B, bool CheckProfitability = true);
95  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
96                                    SDValue &B, SDValue &C) {
97    // Don't apply the profitability check
98    return SelectRegShifterOperand(N, A, B, C, false);
99  }
100  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
101                                    SDValue &B) {
102    // Don't apply the profitability check
103    return SelectImmShifterOperand(N, A, B, false);
104  }
105
106  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
107  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
108
109  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
110                                      SDValue &Offset, SDValue &Opc);
111  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
112                           SDValue &Opc) {
113    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
114  }
115
116  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
117                           SDValue &Opc) {
118    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
119  }
120
121  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
122                       SDValue &Opc) {
123    SelectAddrMode2Worker(N, Base, Offset, Opc);
124//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
125    // This always matches one way or another.
126    return true;
127  }
128
129  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
130    const ConstantSDNode *CN = cast<ConstantSDNode>(N);
131    Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
132    Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
133    return true;
134  }
135
136  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
137                             SDValue &Offset, SDValue &Opc);
138  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
139                             SDValue &Offset, SDValue &Opc);
140  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
141                             SDValue &Offset, SDValue &Opc);
142  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
143  bool SelectAddrMode3(SDValue N, SDValue &Base,
144                       SDValue &Offset, SDValue &Opc);
145  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
146                             SDValue &Offset, SDValue &Opc);
147  bool SelectAddrMode5(SDValue N, SDValue &Base,
148                       SDValue &Offset);
149  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
150  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
151
152  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
153
154  // Thumb Addressing Modes:
155  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
156  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
157                                SDValue &OffImm);
158  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
159                                 SDValue &OffImm);
160  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
161                                 SDValue &OffImm);
162  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
163                                 SDValue &OffImm);
164  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
165
166  // Thumb 2 Addressing Modes:
167  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
168  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
169                            SDValue &OffImm);
170  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
171                                 SDValue &OffImm);
172  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
173                             SDValue &OffReg, SDValue &ShImm);
174  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
175
176  inline bool is_so_imm(unsigned Imm) const {
177    return ARM_AM::getSOImmVal(Imm) != -1;
178  }
179
180  inline bool is_so_imm_not(unsigned Imm) const {
181    return ARM_AM::getSOImmVal(~Imm) != -1;
182  }
183
184  inline bool is_t2_so_imm(unsigned Imm) const {
185    return ARM_AM::getT2SOImmVal(Imm) != -1;
186  }
187
188  inline bool is_t2_so_imm_not(unsigned Imm) const {
189    return ARM_AM::getT2SOImmVal(~Imm) != -1;
190  }
191
192  // Include the pieces autogenerated from the target description.
193#include "ARMGenDAGISel.inc"
194
195private:
196  /// Indexed (pre/post inc/dec) load matching code for ARM.
197  bool tryARMIndexedLoad(SDNode *N);
198  bool tryT2IndexedLoad(SDNode *N);
199
200  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
201  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
202  /// loads of D registers and even subregs and odd subregs of Q registers.
203  /// For NumVecs <= 2, QOpcodes1 is not used.
204  void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
205                 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
206                 const uint16_t *QOpcodes1);
207
208  /// SelectVST - Select NEON store intrinsics.  NumVecs should
209  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
210  /// stores of D registers and even subregs and odd subregs of Q registers.
211  /// For NumVecs <= 2, QOpcodes1 is not used.
212  void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
213                 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
214                 const uint16_t *QOpcodes1);
215
216  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
217  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
218  /// load/store of D registers and Q registers.
219  void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
220                       unsigned NumVecs, const uint16_t *DOpcodes,
221                       const uint16_t *QOpcodes);
222
223  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
224  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
225  /// for loading D registers.  (Q registers are not supported.)
226  void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
227                    const uint16_t *Opcodes);
228
229  /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
230  /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
231  /// generated to force the table registers to be consecutive.
232  void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
233
234  /// Try to select SBFX/UBFX instructions for ARM.
235  bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
236
237  // Select special operations if node forms integer ABS pattern
238  bool tryABSOp(SDNode *N);
239
240  bool tryReadRegister(SDNode *N);
241  bool tryWriteRegister(SDNode *N);
242
243  bool tryInlineAsm(SDNode *N);
244
245  void SelectConcatVector(SDNode *N);
246
247  bool trySMLAWSMULW(SDNode *N);
248
249  void SelectCMP_SWAP(SDNode *N);
250
251  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
252  /// inline asm expressions.
253  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
254                                    std::vector<SDValue> &OutOps) override;
255
256  // Form pairs of consecutive R, S, D, or Q registers.
257  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
258  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
259  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
260  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
261
262  // Form sequences of 4 consecutive S, D, or Q registers.
263  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
264  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
265  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
266
267  // Get the alignment operand for a NEON VLD or VST instruction.
268  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
269                        bool is64BitVector);
270
271  /// Returns the number of instructions required to materialize the given
272  /// constant in a register, or 3 if a literal pool load is needed.
273  unsigned ConstantMaterializationCost(unsigned Val) const;
274
275  /// Checks if N is a multiplication by a constant where we can extract out a
276  /// power of two from the constant so that it can be used in a shift, but only
277  /// if it simplifies the materialization of the constant. Returns true if it
278  /// is, and assigns to PowerOfTwo the power of two that should be extracted
279  /// out and to NewMulConst the new constant to be multiplied by.
280  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
281                              unsigned &PowerOfTwo, SDValue &NewMulConst) const;
282
283  /// Replace N with M in CurDAG, in a way that also ensures that M gets
284  /// selected when N would have been selected.
285  void replaceDAGValue(const SDValue &N, SDValue M);
286};
287}
288
289/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
290/// operand. If so Imm will receive the 32-bit value.
291static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
292  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
293    Imm = cast<ConstantSDNode>(N)->getZExtValue();
294    return true;
295  }
296  return false;
297}
298
299// isInt32Immediate - This method tests to see if a constant operand.
300// If so Imm will receive the 32 bit value.
301static bool isInt32Immediate(SDValue N, unsigned &Imm) {
302  return isInt32Immediate(N.getNode(), Imm);
303}
304
305// isOpcWithIntImmediate - This method tests to see if the node is a specific
306// opcode and that it has a immediate integer right operand.
307// If so Imm will receive the 32 bit value.
308static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
309  return N->getOpcode() == Opc &&
310         isInt32Immediate(N->getOperand(1).getNode(), Imm);
311}
312
313/// \brief Check whether a particular node is a constant value representable as
314/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
315///
316/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
317static bool isScaledConstantInRange(SDValue Node, int Scale,
318                                    int RangeMin, int RangeMax,
319                                    int &ScaledConstant) {
320  assert(Scale > 0 && "Invalid scale!");
321
322  // Check that this is a constant.
323  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
324  if (!C)
325    return false;
326
327  ScaledConstant = (int) C->getZExtValue();
328  if ((ScaledConstant % Scale) != 0)
329    return false;
330
331  ScaledConstant /= Scale;
332  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
333}
334
335void ARMDAGToDAGISel::PreprocessISelDAG() {
336  if (!Subtarget->hasV6T2Ops())
337    return;
338
339  bool isThumb2 = Subtarget->isThumb();
340  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
341       E = CurDAG->allnodes_end(); I != E; ) {
342    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
343
344    if (N->getOpcode() != ISD::ADD)
345      continue;
346
347    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
348    // leading zeros, followed by consecutive set bits, followed by 1 or 2
349    // trailing zeros, e.g. 1020.
350    // Transform the expression to
351    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
352    // of trailing zeros of c2. The left shift would be folded as an shifter
353    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
354    // node (UBFX).
355
356    SDValue N0 = N->getOperand(0);
357    SDValue N1 = N->getOperand(1);
358    unsigned And_imm = 0;
359    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
360      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
361        std::swap(N0, N1);
362    }
363    if (!And_imm)
364      continue;
365
366    // Check if the AND mask is an immediate of the form: 000.....1111111100
367    unsigned TZ = countTrailingZeros(And_imm);
368    if (TZ != 1 && TZ != 2)
369      // Be conservative here. Shifter operands aren't always free. e.g. On
370      // Swift, left shifter operand of 1 / 2 for free but others are not.
371      // e.g.
372      //  ubfx   r3, r1, #16, #8
373      //  ldr.w  r3, [r0, r3, lsl #2]
374      // vs.
375      //  mov.w  r9, #1020
376      //  and.w  r2, r9, r1, lsr #14
377      //  ldr    r2, [r0, r2]
378      continue;
379    And_imm >>= TZ;
380    if (And_imm & (And_imm + 1))
381      continue;
382
383    // Look for (and (srl X, c1), c2).
384    SDValue Srl = N1.getOperand(0);
385    unsigned Srl_imm = 0;
386    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
387        (Srl_imm <= 2))
388      continue;
389
390    // Make sure first operand is not a shifter operand which would prevent
391    // folding of the left shift.
392    SDValue CPTmp0;
393    SDValue CPTmp1;
394    SDValue CPTmp2;
395    if (isThumb2) {
396      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
397        continue;
398    } else {
399      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
400          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
401        continue;
402    }
403
404    // Now make the transformation.
405    Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
406                          Srl.getOperand(0),
407                          CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
408                                              MVT::i32));
409    N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
410                         Srl,
411                         CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
412    N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
413                         N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
414    CurDAG->UpdateNodeOperands(N, N0, N1);
415  }
416}
417
418/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
419/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
420/// least on current ARM implementations) which should be avoidded.
421bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
422  if (OptLevel == CodeGenOpt::None)
423    return true;
424
425  if (!Subtarget->hasVMLxHazards())
426    return true;
427
428  if (!N->hasOneUse())
429    return false;
430
431  SDNode *Use = *N->use_begin();
432  if (Use->getOpcode() == ISD::CopyToReg)
433    return true;
434  if (Use->isMachineOpcode()) {
435    const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
436        CurDAG->getSubtarget().getInstrInfo());
437
438    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
439    if (MCID.mayStore())
440      return true;
441    unsigned Opcode = MCID.getOpcode();
442    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
443      return true;
444    // vmlx feeding into another vmlx. We actually want to unfold
445    // the use later in the MLxExpansion pass. e.g.
446    // vmla
447    // vmla (stall 8 cycles)
448    //
449    // vmul (5 cycles)
450    // vadd (5 cycles)
451    // vmla
452    // This adds up to about 18 - 19 cycles.
453    //
454    // vmla
455    // vmul (stall 4 cycles)
456    // vadd adds up to about 14 cycles.
457    return TII->isFpMLxInstruction(Opcode);
458  }
459
460  return false;
461}
462
463bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
464                                            ARM_AM::ShiftOpc ShOpcVal,
465                                            unsigned ShAmt) {
466  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
467    return true;
468  if (Shift.hasOneUse())
469    return true;
470  // R << 2 is free.
471  return ShOpcVal == ARM_AM::lsl &&
472         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
473}
474
475unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
476  if (Subtarget->isThumb()) {
477    if (Val <= 255) return 1;                               // MOV
478    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
479    if (Val <= 510) return 2;                               // MOV + ADDi8
480    if (~Val <= 255) return 2;                              // MOV + MVN
481    if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
482  } else {
483    if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
484    if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
485    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
486    if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
487  }
488  if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
489  return 3; // Literal pool load
490}
491
492bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
493                                             unsigned MaxShift,
494                                             unsigned &PowerOfTwo,
495                                             SDValue &NewMulConst) const {
496  assert(N.getOpcode() == ISD::MUL);
497  assert(MaxShift > 0);
498
499  // If the multiply is used in more than one place then changing the constant
500  // will make other uses incorrect, so don't.
501  if (!N.hasOneUse()) return false;
502  // Check if the multiply is by a constant
503  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
504  if (!MulConst) return false;
505  // If the constant is used in more than one place then modifying it will mean
506  // we need to materialize two constants instead of one, which is a bad idea.
507  if (!MulConst->hasOneUse()) return false;
508  unsigned MulConstVal = MulConst->getZExtValue();
509  if (MulConstVal == 0) return false;
510
511  // Find the largest power of 2 that MulConstVal is a multiple of
512  PowerOfTwo = MaxShift;
513  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
514    --PowerOfTwo;
515    if (PowerOfTwo == 0) return false;
516  }
517
518  // Only optimise if the new cost is better
519  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
520  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
521  unsigned OldCost = ConstantMaterializationCost(MulConstVal);
522  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
523  return NewCost < OldCost;
524}
525
526void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
527  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
528  CurDAG->ReplaceAllUsesWith(N, M);
529}
530
531bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
532                                              SDValue &BaseReg,
533                                              SDValue &Opc,
534                                              bool CheckProfitability) {
535  if (DisableShifterOp)
536    return false;
537
538  // If N is a multiply-by-constant and it's profitable to extract a shift and
539  // use it in a shifted operand do so.
540  if (N.getOpcode() == ISD::MUL) {
541    unsigned PowerOfTwo = 0;
542    SDValue NewMulConst;
543    if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
544      HandleSDNode Handle(N);
545      replaceDAGValue(N.getOperand(1), NewMulConst);
546      BaseReg = Handle.getValue();
547      Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
548                                                          PowerOfTwo),
549                                      SDLoc(N), MVT::i32);
550      return true;
551    }
552  }
553
554  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
555
556  // Don't match base register only case. That is matched to a separate
557  // lower complexity pattern with explicit register operand.
558  if (ShOpcVal == ARM_AM::no_shift) return false;
559
560  BaseReg = N.getOperand(0);
561  unsigned ShImmVal = 0;
562  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
563  if (!RHS) return false;
564  ShImmVal = RHS->getZExtValue() & 31;
565  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
566                                  SDLoc(N), MVT::i32);
567  return true;
568}
569
570bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
571                                              SDValue &BaseReg,
572                                              SDValue &ShReg,
573                                              SDValue &Opc,
574                                              bool CheckProfitability) {
575  if (DisableShifterOp)
576    return false;
577
578  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
579
580  // Don't match base register only case. That is matched to a separate
581  // lower complexity pattern with explicit register operand.
582  if (ShOpcVal == ARM_AM::no_shift) return false;
583
584  BaseReg = N.getOperand(0);
585  unsigned ShImmVal = 0;
586  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
587  if (RHS) return false;
588
589  ShReg = N.getOperand(1);
590  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
591    return false;
592  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
593                                  SDLoc(N), MVT::i32);
594  return true;
595}
596
597
598bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
599                                          SDValue &Base,
600                                          SDValue &OffImm) {
601  // Match simple R + imm12 operands.
602
603  // Base only.
604  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
605      !CurDAG->isBaseWithConstantOffset(N)) {
606    if (N.getOpcode() == ISD::FrameIndex) {
607      // Match frame index.
608      int FI = cast<FrameIndexSDNode>(N)->getIndex();
609      Base = CurDAG->getTargetFrameIndex(
610          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
611      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
612      return true;
613    }
614
615    if (N.getOpcode() == ARMISD::Wrapper &&
616        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
617        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
618        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
619      Base = N.getOperand(0);
620    } else
621      Base = N;
622    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
623    return true;
624  }
625
626  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
627    int RHSC = (int)RHS->getSExtValue();
628    if (N.getOpcode() == ISD::SUB)
629      RHSC = -RHSC;
630
631    if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
632      Base   = N.getOperand(0);
633      if (Base.getOpcode() == ISD::FrameIndex) {
634        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
635        Base = CurDAG->getTargetFrameIndex(
636            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
637      }
638      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
639      return true;
640    }
641  }
642
643  // Base only.
644  Base = N;
645  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
646  return true;
647}
648
649
650
651bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
652                                      SDValue &Opc) {
653  if (N.getOpcode() == ISD::MUL &&
654      ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
655    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
656      // X * [3,5,9] -> X + X * [2,4,8] etc.
657      int RHSC = (int)RHS->getZExtValue();
658      if (RHSC & 1) {
659        RHSC = RHSC & ~1;
660        ARM_AM::AddrOpc AddSub = ARM_AM::add;
661        if (RHSC < 0) {
662          AddSub = ARM_AM::sub;
663          RHSC = - RHSC;
664        }
665        if (isPowerOf2_32(RHSC)) {
666          unsigned ShAmt = Log2_32(RHSC);
667          Base = Offset = N.getOperand(0);
668          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
669                                                            ARM_AM::lsl),
670                                          SDLoc(N), MVT::i32);
671          return true;
672        }
673      }
674    }
675  }
676
677  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
678      // ISD::OR that is equivalent to an ISD::ADD.
679      !CurDAG->isBaseWithConstantOffset(N))
680    return false;
681
682  // Leave simple R +/- imm12 operands for LDRi12
683  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
684    int RHSC;
685    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
686                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
687      return false;
688  }
689
690  // Otherwise this is R +/- [possibly shifted] R.
691  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
692  ARM_AM::ShiftOpc ShOpcVal =
693    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
694  unsigned ShAmt = 0;
695
696  Base   = N.getOperand(0);
697  Offset = N.getOperand(1);
698
699  if (ShOpcVal != ARM_AM::no_shift) {
700    // Check to see if the RHS of the shift is a constant, if not, we can't fold
701    // it.
702    if (ConstantSDNode *Sh =
703           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
704      ShAmt = Sh->getZExtValue();
705      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
706        Offset = N.getOperand(1).getOperand(0);
707      else {
708        ShAmt = 0;
709        ShOpcVal = ARM_AM::no_shift;
710      }
711    } else {
712      ShOpcVal = ARM_AM::no_shift;
713    }
714  }
715
716  // Try matching (R shl C) + (R).
717  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
718      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
719        N.getOperand(0).hasOneUse())) {
720    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
721    if (ShOpcVal != ARM_AM::no_shift) {
722      // Check to see if the RHS of the shift is a constant, if not, we can't
723      // fold it.
724      if (ConstantSDNode *Sh =
725          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
726        ShAmt = Sh->getZExtValue();
727        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
728          Offset = N.getOperand(0).getOperand(0);
729          Base = N.getOperand(1);
730        } else {
731          ShAmt = 0;
732          ShOpcVal = ARM_AM::no_shift;
733        }
734      } else {
735        ShOpcVal = ARM_AM::no_shift;
736      }
737    }
738  }
739
740  // If Offset is a multiply-by-constant and it's profitable to extract a shift
741  // and use it in a shifted operand do so.
742  if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
743    unsigned PowerOfTwo = 0;
744    SDValue NewMulConst;
745    if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
746      replaceDAGValue(Offset.getOperand(1), NewMulConst);
747      ShAmt = PowerOfTwo;
748      ShOpcVal = ARM_AM::lsl;
749    }
750  }
751
752  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
753                                  SDLoc(N), MVT::i32);
754  return true;
755}
756
757
758//-----
759
760AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
761                                                     SDValue &Base,
762                                                     SDValue &Offset,
763                                                     SDValue &Opc) {
764  if (N.getOpcode() == ISD::MUL &&
765      (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
766    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
767      // X * [3,5,9] -> X + X * [2,4,8] etc.
768      int RHSC = (int)RHS->getZExtValue();
769      if (RHSC & 1) {
770        RHSC = RHSC & ~1;
771        ARM_AM::AddrOpc AddSub = ARM_AM::add;
772        if (RHSC < 0) {
773          AddSub = ARM_AM::sub;
774          RHSC = - RHSC;
775        }
776        if (isPowerOf2_32(RHSC)) {
777          unsigned ShAmt = Log2_32(RHSC);
778          Base = Offset = N.getOperand(0);
779          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
780                                                            ARM_AM::lsl),
781                                          SDLoc(N), MVT::i32);
782          return AM2_SHOP;
783        }
784      }
785    }
786  }
787
788  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
789      // ISD::OR that is equivalent to an ADD.
790      !CurDAG->isBaseWithConstantOffset(N)) {
791    Base = N;
792    if (N.getOpcode() == ISD::FrameIndex) {
793      int FI = cast<FrameIndexSDNode>(N)->getIndex();
794      Base = CurDAG->getTargetFrameIndex(
795          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
796    } else if (N.getOpcode() == ARMISD::Wrapper &&
797               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
798               N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
799               N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
800      Base = N.getOperand(0);
801    }
802    Offset = CurDAG->getRegister(0, MVT::i32);
803    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
804                                                      ARM_AM::no_shift),
805                                    SDLoc(N), MVT::i32);
806    return AM2_BASE;
807  }
808
809  // Match simple R +/- imm12 operands.
810  if (N.getOpcode() != ISD::SUB) {
811    int RHSC;
812    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
813                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
814      Base = N.getOperand(0);
815      if (Base.getOpcode() == ISD::FrameIndex) {
816        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
817        Base = CurDAG->getTargetFrameIndex(
818            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
819      }
820      Offset = CurDAG->getRegister(0, MVT::i32);
821
822      ARM_AM::AddrOpc AddSub = ARM_AM::add;
823      if (RHSC < 0) {
824        AddSub = ARM_AM::sub;
825        RHSC = - RHSC;
826      }
827      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
828                                                        ARM_AM::no_shift),
829                                      SDLoc(N), MVT::i32);
830      return AM2_BASE;
831    }
832  }
833
834  if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
835    // Compute R +/- (R << N) and reuse it.
836    Base = N;
837    Offset = CurDAG->getRegister(0, MVT::i32);
838    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
839                                                      ARM_AM::no_shift),
840                                    SDLoc(N), MVT::i32);
841    return AM2_BASE;
842  }
843
844  // Otherwise this is R +/- [possibly shifted] R.
845  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
846  ARM_AM::ShiftOpc ShOpcVal =
847    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
848  unsigned ShAmt = 0;
849
850  Base   = N.getOperand(0);
851  Offset = N.getOperand(1);
852
853  if (ShOpcVal != ARM_AM::no_shift) {
854    // Check to see if the RHS of the shift is a constant, if not, we can't fold
855    // it.
856    if (ConstantSDNode *Sh =
857           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
858      ShAmt = Sh->getZExtValue();
859      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
860        Offset = N.getOperand(1).getOperand(0);
861      else {
862        ShAmt = 0;
863        ShOpcVal = ARM_AM::no_shift;
864      }
865    } else {
866      ShOpcVal = ARM_AM::no_shift;
867    }
868  }
869
870  // Try matching (R shl C) + (R).
871  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
872      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
873        N.getOperand(0).hasOneUse())) {
874    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
875    if (ShOpcVal != ARM_AM::no_shift) {
876      // Check to see if the RHS of the shift is a constant, if not, we can't
877      // fold it.
878      if (ConstantSDNode *Sh =
879          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
880        ShAmt = Sh->getZExtValue();
881        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
882          Offset = N.getOperand(0).getOperand(0);
883          Base = N.getOperand(1);
884        } else {
885          ShAmt = 0;
886          ShOpcVal = ARM_AM::no_shift;
887        }
888      } else {
889        ShOpcVal = ARM_AM::no_shift;
890      }
891    }
892  }
893
894  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
895                                  SDLoc(N), MVT::i32);
896  return AM2_SHOP;
897}
898
899bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
900                                            SDValue &Offset, SDValue &Opc) {
901  unsigned Opcode = Op->getOpcode();
902  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
903    ? cast<LoadSDNode>(Op)->getAddressingMode()
904    : cast<StoreSDNode>(Op)->getAddressingMode();
905  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
906    ? ARM_AM::add : ARM_AM::sub;
907  int Val;
908  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
909    return false;
910
911  Offset = N;
912  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
913  unsigned ShAmt = 0;
914  if (ShOpcVal != ARM_AM::no_shift) {
915    // Check to see if the RHS of the shift is a constant, if not, we can't fold
916    // it.
917    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
918      ShAmt = Sh->getZExtValue();
919      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
920        Offset = N.getOperand(0);
921      else {
922        ShAmt = 0;
923        ShOpcVal = ARM_AM::no_shift;
924      }
925    } else {
926      ShOpcVal = ARM_AM::no_shift;
927    }
928  }
929
930  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
931                                  SDLoc(N), MVT::i32);
932  return true;
933}
934
935bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
936                                            SDValue &Offset, SDValue &Opc) {
937  unsigned Opcode = Op->getOpcode();
938  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
939    ? cast<LoadSDNode>(Op)->getAddressingMode()
940    : cast<StoreSDNode>(Op)->getAddressingMode();
941  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
942    ? ARM_AM::add : ARM_AM::sub;
943  int Val;
944  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
945    if (AddSub == ARM_AM::sub) Val *= -1;
946    Offset = CurDAG->getRegister(0, MVT::i32);
947    Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
948    return true;
949  }
950
951  return false;
952}
953
954
955bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
956                                            SDValue &Offset, SDValue &Opc) {
957  unsigned Opcode = Op->getOpcode();
958  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
959    ? cast<LoadSDNode>(Op)->getAddressingMode()
960    : cast<StoreSDNode>(Op)->getAddressingMode();
961  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
962    ? ARM_AM::add : ARM_AM::sub;
963  int Val;
964  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
965    Offset = CurDAG->getRegister(0, MVT::i32);
966    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
967                                                      ARM_AM::no_shift),
968                                    SDLoc(Op), MVT::i32);
969    return true;
970  }
971
972  return false;
973}
974
975bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
976  Base = N;
977  return true;
978}
979
980bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
981                                      SDValue &Base, SDValue &Offset,
982                                      SDValue &Opc) {
983  if (N.getOpcode() == ISD::SUB) {
984    // X - C  is canonicalize to X + -C, no need to handle it here.
985    Base = N.getOperand(0);
986    Offset = N.getOperand(1);
987    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
988                                    MVT::i32);
989    return true;
990  }
991
992  if (!CurDAG->isBaseWithConstantOffset(N)) {
993    Base = N;
994    if (N.getOpcode() == ISD::FrameIndex) {
995      int FI = cast<FrameIndexSDNode>(N)->getIndex();
996      Base = CurDAG->getTargetFrameIndex(
997          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
998    }
999    Offset = CurDAG->getRegister(0, MVT::i32);
1000    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1001                                    MVT::i32);
1002    return true;
1003  }
1004
1005  // If the RHS is +/- imm8, fold into addr mode.
1006  int RHSC;
1007  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1008                              -256 + 1, 256, RHSC)) { // 8 bits.
1009    Base = N.getOperand(0);
1010    if (Base.getOpcode() == ISD::FrameIndex) {
1011      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1012      Base = CurDAG->getTargetFrameIndex(
1013          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1014    }
1015    Offset = CurDAG->getRegister(0, MVT::i32);
1016
1017    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1018    if (RHSC < 0) {
1019      AddSub = ARM_AM::sub;
1020      RHSC = -RHSC;
1021    }
1022    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1023                                    MVT::i32);
1024    return true;
1025  }
1026
1027  Base = N.getOperand(0);
1028  Offset = N.getOperand(1);
1029  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1030                                  MVT::i32);
1031  return true;
1032}
1033
1034bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1035                                            SDValue &Offset, SDValue &Opc) {
1036  unsigned Opcode = Op->getOpcode();
1037  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1038    ? cast<LoadSDNode>(Op)->getAddressingMode()
1039    : cast<StoreSDNode>(Op)->getAddressingMode();
1040  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1041    ? ARM_AM::add : ARM_AM::sub;
1042  int Val;
1043  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1044    Offset = CurDAG->getRegister(0, MVT::i32);
1045    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1046                                    MVT::i32);
1047    return true;
1048  }
1049
1050  Offset = N;
1051  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1052                                  MVT::i32);
1053  return true;
1054}
1055
1056bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1057                                      SDValue &Base, SDValue &Offset) {
1058  if (!CurDAG->isBaseWithConstantOffset(N)) {
1059    Base = N;
1060    if (N.getOpcode() == ISD::FrameIndex) {
1061      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1062      Base = CurDAG->getTargetFrameIndex(
1063          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1064    } else if (N.getOpcode() == ARMISD::Wrapper &&
1065               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1066               N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1067               N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1068      Base = N.getOperand(0);
1069    }
1070    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1071                                       SDLoc(N), MVT::i32);
1072    return true;
1073  }
1074
1075  // If the RHS is +/- imm8, fold into addr mode.
1076  int RHSC;
1077  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1078                              -256 + 1, 256, RHSC)) {
1079    Base = N.getOperand(0);
1080    if (Base.getOpcode() == ISD::FrameIndex) {
1081      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1082      Base = CurDAG->getTargetFrameIndex(
1083          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1084    }
1085
1086    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1087    if (RHSC < 0) {
1088      AddSub = ARM_AM::sub;
1089      RHSC = -RHSC;
1090    }
1091    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1092                                       SDLoc(N), MVT::i32);
1093    return true;
1094  }
1095
1096  Base = N;
1097  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1098                                     SDLoc(N), MVT::i32);
1099  return true;
1100}
1101
1102bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1103                                      SDValue &Align) {
1104  Addr = N;
1105
1106  unsigned Alignment = 0;
1107
1108  MemSDNode *MemN = cast<MemSDNode>(Parent);
1109
1110  if (isa<LSBaseSDNode>(MemN) ||
1111      ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1112        MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1113       MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1114    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1115    // The maximum alignment is equal to the memory size being referenced.
1116    unsigned MMOAlign = MemN->getAlignment();
1117    unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1118    if (MMOAlign >= MemSize && MemSize > 1)
1119      Alignment = MemSize;
1120  } else {
1121    // All other uses of addrmode6 are for intrinsics.  For now just record
1122    // the raw alignment value; it will be refined later based on the legal
1123    // alignment operands for the intrinsic.
1124    Alignment = MemN->getAlignment();
1125  }
1126
1127  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1128  return true;
1129}
1130
1131bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1132                                            SDValue &Offset) {
1133  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1134  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1135  if (AM != ISD::POST_INC)
1136    return false;
1137  Offset = N;
1138  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1139    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1140      Offset = CurDAG->getRegister(0, MVT::i32);
1141  }
1142  return true;
1143}
1144
1145bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1146                                       SDValue &Offset, SDValue &Label) {
1147  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1148    Offset = N.getOperand(0);
1149    SDValue N1 = N.getOperand(1);
1150    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1151                                      SDLoc(N), MVT::i32);
1152    return true;
1153  }
1154
1155  return false;
1156}
1157
1158
1159//===----------------------------------------------------------------------===//
1160//                         Thumb Addressing Modes
1161//===----------------------------------------------------------------------===//
1162
1163bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1164                                            SDValue &Base, SDValue &Offset){
1165  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1166    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1167    if (!NC || !NC->isNullValue())
1168      return false;
1169
1170    Base = Offset = N;
1171    return true;
1172  }
1173
1174  Base = N.getOperand(0);
1175  Offset = N.getOperand(1);
1176  return true;
1177}
1178
1179bool
1180ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1181                                          SDValue &Base, SDValue &OffImm) {
1182  if (!CurDAG->isBaseWithConstantOffset(N)) {
1183    if (N.getOpcode() == ISD::ADD) {
1184      return false; // We want to select register offset instead
1185    } else if (N.getOpcode() == ARMISD::Wrapper &&
1186        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1187        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1188        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1189      Base = N.getOperand(0);
1190    } else {
1191      Base = N;
1192    }
1193
1194    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1195    return true;
1196  }
1197
1198  // If the RHS is + imm5 * scale, fold into addr mode.
1199  int RHSC;
1200  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1201    Base = N.getOperand(0);
1202    OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1203    return true;
1204  }
1205
1206  // Offset is too large, so use register offset instead.
1207  return false;
1208}
1209
1210bool
1211ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1212                                           SDValue &OffImm) {
1213  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1214}
1215
1216bool
1217ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1218                                           SDValue &OffImm) {
1219  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1220}
1221
1222bool
1223ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1224                                           SDValue &OffImm) {
1225  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1226}
1227
1228bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1229                                            SDValue &Base, SDValue &OffImm) {
1230  if (N.getOpcode() == ISD::FrameIndex) {
1231    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1232    // Only multiples of 4 are allowed for the offset, so the frame object
1233    // alignment must be at least 4.
1234    MachineFrameInfo *MFI = MF->getFrameInfo();
1235    if (MFI->getObjectAlignment(FI) < 4)
1236      MFI->setObjectAlignment(FI, 4);
1237    Base = CurDAG->getTargetFrameIndex(
1238        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1239    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1240    return true;
1241  }
1242
1243  if (!CurDAG->isBaseWithConstantOffset(N))
1244    return false;
1245
1246  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1247  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1248      (LHSR && LHSR->getReg() == ARM::SP)) {
1249    // If the RHS is + imm8 * scale, fold into addr mode.
1250    int RHSC;
1251    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1252      Base = N.getOperand(0);
1253      if (Base.getOpcode() == ISD::FrameIndex) {
1254        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1255        // For LHS+RHS to result in an offset that's a multiple of 4 the object
1256        // indexed by the LHS must be 4-byte aligned.
1257        MachineFrameInfo *MFI = MF->getFrameInfo();
1258        if (MFI->getObjectAlignment(FI) < 4)
1259          MFI->setObjectAlignment(FI, 4);
1260        Base = CurDAG->getTargetFrameIndex(
1261            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1262      }
1263      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1264      return true;
1265    }
1266  }
1267
1268  return false;
1269}
1270
1271
1272//===----------------------------------------------------------------------===//
1273//                        Thumb 2 Addressing Modes
1274//===----------------------------------------------------------------------===//
1275
1276
1277bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1278                                            SDValue &Base, SDValue &OffImm) {
1279  // Match simple R + imm12 operands.
1280
1281  // Base only.
1282  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1283      !CurDAG->isBaseWithConstantOffset(N)) {
1284    if (N.getOpcode() == ISD::FrameIndex) {
1285      // Match frame index.
1286      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1287      Base = CurDAG->getTargetFrameIndex(
1288          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1289      OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290      return true;
1291    }
1292
1293    if (N.getOpcode() == ARMISD::Wrapper &&
1294        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1295        N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1296        N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1297      Base = N.getOperand(0);
1298      if (Base.getOpcode() == ISD::TargetConstantPool)
1299        return false;  // We want to select t2LDRpci instead.
1300    } else
1301      Base = N;
1302    OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1303    return true;
1304  }
1305
1306  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1307    if (SelectT2AddrModeImm8(N, Base, OffImm))
1308      // Let t2LDRi8 handle (R - imm8).
1309      return false;
1310
1311    int RHSC = (int)RHS->getZExtValue();
1312    if (N.getOpcode() == ISD::SUB)
1313      RHSC = -RHSC;
1314
1315    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1316      Base   = N.getOperand(0);
1317      if (Base.getOpcode() == ISD::FrameIndex) {
1318        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1319        Base = CurDAG->getTargetFrameIndex(
1320            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1321      }
1322      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1323      return true;
1324    }
1325  }
1326
1327  // Base only.
1328  Base = N;
1329  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1330  return true;
1331}
1332
1333bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1334                                           SDValue &Base, SDValue &OffImm) {
1335  // Match simple R - imm8 operands.
1336  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1337      !CurDAG->isBaseWithConstantOffset(N))
1338    return false;
1339
1340  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1341    int RHSC = (int)RHS->getSExtValue();
1342    if (N.getOpcode() == ISD::SUB)
1343      RHSC = -RHSC;
1344
1345    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1346      Base = N.getOperand(0);
1347      if (Base.getOpcode() == ISD::FrameIndex) {
1348        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1349        Base = CurDAG->getTargetFrameIndex(
1350            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1351      }
1352      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1353      return true;
1354    }
1355  }
1356
1357  return false;
1358}
1359
1360bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1361                                                 SDValue &OffImm){
1362  unsigned Opcode = Op->getOpcode();
1363  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1364    ? cast<LoadSDNode>(Op)->getAddressingMode()
1365    : cast<StoreSDNode>(Op)->getAddressingMode();
1366  int RHSC;
1367  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1368    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1369      ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1370      : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1371    return true;
1372  }
1373
1374  return false;
1375}
1376
1377bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1378                                            SDValue &Base,
1379                                            SDValue &OffReg, SDValue &ShImm) {
1380  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1381  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1382    return false;
1383
1384  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1385  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1386    int RHSC = (int)RHS->getZExtValue();
1387    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1388      return false;
1389    else if (RHSC < 0 && RHSC >= -255) // 8 bits
1390      return false;
1391  }
1392
1393  // Look for (R + R) or (R + (R << [1,2,3])).
1394  unsigned ShAmt = 0;
1395  Base   = N.getOperand(0);
1396  OffReg = N.getOperand(1);
1397
1398  // Swap if it is ((R << c) + R).
1399  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1400  if (ShOpcVal != ARM_AM::lsl) {
1401    ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1402    if (ShOpcVal == ARM_AM::lsl)
1403      std::swap(Base, OffReg);
1404  }
1405
1406  if (ShOpcVal == ARM_AM::lsl) {
1407    // Check to see if the RHS of the shift is a constant, if not, we can't fold
1408    // it.
1409    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1410      ShAmt = Sh->getZExtValue();
1411      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1412        OffReg = OffReg.getOperand(0);
1413      else {
1414        ShAmt = 0;
1415      }
1416    }
1417  }
1418
1419  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1420  // and use it in a shifted operand do so.
1421  if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1422    unsigned PowerOfTwo = 0;
1423    SDValue NewMulConst;
1424    if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1425      replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1426      ShAmt = PowerOfTwo;
1427    }
1428  }
1429
1430  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1431
1432  return true;
1433}
1434
1435bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1436                                                SDValue &OffImm) {
1437  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1438  // instructions.
1439  Base = N;
1440  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1441
1442  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1443    return true;
1444
1445  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1446  if (!RHS)
1447    return true;
1448
1449  uint32_t RHSC = (int)RHS->getZExtValue();
1450  if (RHSC > 1020 || RHSC % 4 != 0)
1451    return true;
1452
1453  Base = N.getOperand(0);
1454  if (Base.getOpcode() == ISD::FrameIndex) {
1455    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1456    Base = CurDAG->getTargetFrameIndex(
1457        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1458  }
1459
1460  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1461  return true;
1462}
1463
1464//===--------------------------------------------------------------------===//
1465
1466/// getAL - Returns a ARMCC::AL immediate node.
1467static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1468  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1469}
1470
1471bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1472  LoadSDNode *LD = cast<LoadSDNode>(N);
1473  ISD::MemIndexedMode AM = LD->getAddressingMode();
1474  if (AM == ISD::UNINDEXED)
1475    return false;
1476
1477  EVT LoadedVT = LD->getMemoryVT();
1478  SDValue Offset, AMOpc;
1479  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1480  unsigned Opcode = 0;
1481  bool Match = false;
1482  if (LoadedVT == MVT::i32 && isPre &&
1483      SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1484    Opcode = ARM::LDR_PRE_IMM;
1485    Match = true;
1486  } else if (LoadedVT == MVT::i32 && !isPre &&
1487      SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1488    Opcode = ARM::LDR_POST_IMM;
1489    Match = true;
1490  } else if (LoadedVT == MVT::i32 &&
1491      SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1492    Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1493    Match = true;
1494
1495  } else if (LoadedVT == MVT::i16 &&
1496             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1497    Match = true;
1498    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1499      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1500      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1501  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1502    if (LD->getExtensionType() == ISD::SEXTLOAD) {
1503      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1504        Match = true;
1505        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1506      }
1507    } else {
1508      if (isPre &&
1509          SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1510        Match = true;
1511        Opcode = ARM::LDRB_PRE_IMM;
1512      } else if (!isPre &&
1513                  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1514        Match = true;
1515        Opcode = ARM::LDRB_POST_IMM;
1516      } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1517        Match = true;
1518        Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1519      }
1520    }
1521  }
1522
1523  if (Match) {
1524    if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1525      SDValue Chain = LD->getChain();
1526      SDValue Base = LD->getBasePtr();
1527      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1528                       CurDAG->getRegister(0, MVT::i32), Chain };
1529      ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1530                                            MVT::i32, MVT::Other, Ops));
1531      return true;
1532    } else {
1533      SDValue Chain = LD->getChain();
1534      SDValue Base = LD->getBasePtr();
1535      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1536                       CurDAG->getRegister(0, MVT::i32), Chain };
1537      ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1538                                            MVT::i32, MVT::Other, Ops));
1539      return true;
1540    }
1541  }
1542
1543  return false;
1544}
1545
1546bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1547  LoadSDNode *LD = cast<LoadSDNode>(N);
1548  ISD::MemIndexedMode AM = LD->getAddressingMode();
1549  if (AM == ISD::UNINDEXED)
1550    return false;
1551
1552  EVT LoadedVT = LD->getMemoryVT();
1553  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1554  SDValue Offset;
1555  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1556  unsigned Opcode = 0;
1557  bool Match = false;
1558  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1559    switch (LoadedVT.getSimpleVT().SimpleTy) {
1560    case MVT::i32:
1561      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1562      break;
1563    case MVT::i16:
1564      if (isSExtLd)
1565        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1566      else
1567        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1568      break;
1569    case MVT::i8:
1570    case MVT::i1:
1571      if (isSExtLd)
1572        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1573      else
1574        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1575      break;
1576    default:
1577      return false;
1578    }
1579    Match = true;
1580  }
1581
1582  if (Match) {
1583    SDValue Chain = LD->getChain();
1584    SDValue Base = LD->getBasePtr();
1585    SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1586                     CurDAG->getRegister(0, MVT::i32), Chain };
1587    ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1588                                          MVT::Other, Ops));
1589    return true;
1590  }
1591
1592  return false;
1593}
1594
1595/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1596SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1597  SDLoc dl(V0.getNode());
1598  SDValue RegClass =
1599    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1600  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1601  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1602  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1603  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1604}
1605
1606/// \brief Form a D register from a pair of S registers.
1607SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1608  SDLoc dl(V0.getNode());
1609  SDValue RegClass =
1610    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1611  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1612  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1613  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1614  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1615}
1616
1617/// \brief Form a quad register from a pair of D registers.
1618SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1619  SDLoc dl(V0.getNode());
1620  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1621                                               MVT::i32);
1622  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1623  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1624  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1625  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1626}
1627
1628/// \brief Form 4 consecutive D registers from a pair of Q registers.
1629SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1630  SDLoc dl(V0.getNode());
1631  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632                                               MVT::i32);
1633  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1634  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1635  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1636  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1637}
1638
1639/// \brief Form 4 consecutive S registers.
1640SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1641                                   SDValue V2, SDValue V3) {
1642  SDLoc dl(V0.getNode());
1643  SDValue RegClass =
1644    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1645  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1646  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1647  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1648  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1649  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1650                                    V2, SubReg2, V3, SubReg3 };
1651  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1652}
1653
1654/// \brief Form 4 consecutive D registers.
1655SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1656                                   SDValue V2, SDValue V3) {
1657  SDLoc dl(V0.getNode());
1658  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1659                                               MVT::i32);
1660  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1661  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1662  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1663  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1664  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1665                                    V2, SubReg2, V3, SubReg3 };
1666  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1667}
1668
1669/// \brief Form 4 consecutive Q registers.
1670SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1671                                   SDValue V2, SDValue V3) {
1672  SDLoc dl(V0.getNode());
1673  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1674                                               MVT::i32);
1675  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1676  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1677  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1678  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1679  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1680                                    V2, SubReg2, V3, SubReg3 };
1681  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1682}
1683
1684/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1685/// of a NEON VLD or VST instruction.  The supported values depend on the
1686/// number of registers being loaded.
1687SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1688                                       unsigned NumVecs, bool is64BitVector) {
1689  unsigned NumRegs = NumVecs;
1690  if (!is64BitVector && NumVecs < 3)
1691    NumRegs *= 2;
1692
1693  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1694  if (Alignment >= 32 && NumRegs == 4)
1695    Alignment = 32;
1696  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1697    Alignment = 16;
1698  else if (Alignment >= 8)
1699    Alignment = 8;
1700  else
1701    Alignment = 0;
1702
1703  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1704}
1705
1706static bool isVLDfixed(unsigned Opc)
1707{
1708  switch (Opc) {
1709  default: return false;
1710  case ARM::VLD1d8wb_fixed : return true;
1711  case ARM::VLD1d16wb_fixed : return true;
1712  case ARM::VLD1d64Qwb_fixed : return true;
1713  case ARM::VLD1d32wb_fixed : return true;
1714  case ARM::VLD1d64wb_fixed : return true;
1715  case ARM::VLD1d64TPseudoWB_fixed : return true;
1716  case ARM::VLD1d64QPseudoWB_fixed : return true;
1717  case ARM::VLD1q8wb_fixed : return true;
1718  case ARM::VLD1q16wb_fixed : return true;
1719  case ARM::VLD1q32wb_fixed : return true;
1720  case ARM::VLD1q64wb_fixed : return true;
1721  case ARM::VLD2d8wb_fixed : return true;
1722  case ARM::VLD2d16wb_fixed : return true;
1723  case ARM::VLD2d32wb_fixed : return true;
1724  case ARM::VLD2q8PseudoWB_fixed : return true;
1725  case ARM::VLD2q16PseudoWB_fixed : return true;
1726  case ARM::VLD2q32PseudoWB_fixed : return true;
1727  case ARM::VLD2DUPd8wb_fixed : return true;
1728  case ARM::VLD2DUPd16wb_fixed : return true;
1729  case ARM::VLD2DUPd32wb_fixed : return true;
1730  }
1731}
1732
1733static bool isVSTfixed(unsigned Opc)
1734{
1735  switch (Opc) {
1736  default: return false;
1737  case ARM::VST1d8wb_fixed : return true;
1738  case ARM::VST1d16wb_fixed : return true;
1739  case ARM::VST1d32wb_fixed : return true;
1740  case ARM::VST1d64wb_fixed : return true;
1741  case ARM::VST1q8wb_fixed : return true;
1742  case ARM::VST1q16wb_fixed : return true;
1743  case ARM::VST1q32wb_fixed : return true;
1744  case ARM::VST1q64wb_fixed : return true;
1745  case ARM::VST1d64TPseudoWB_fixed : return true;
1746  case ARM::VST1d64QPseudoWB_fixed : return true;
1747  case ARM::VST2d8wb_fixed : return true;
1748  case ARM::VST2d16wb_fixed : return true;
1749  case ARM::VST2d32wb_fixed : return true;
1750  case ARM::VST2q8PseudoWB_fixed : return true;
1751  case ARM::VST2q16PseudoWB_fixed : return true;
1752  case ARM::VST2q32PseudoWB_fixed : return true;
1753  }
1754}
1755
1756// Get the register stride update opcode of a VLD/VST instruction that
1757// is otherwise equivalent to the given fixed stride updating instruction.
1758static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1759  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1760    && "Incorrect fixed stride updating instruction.");
1761  switch (Opc) {
1762  default: break;
1763  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1764  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1765  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1766  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1767  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1768  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1769  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1770  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1771  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1772  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1773  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1774  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1775
1776  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1777  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1778  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1779  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1780  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1781  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1782  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1783  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1784  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1785  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1786
1787  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1788  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1789  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1790  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1791  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1792  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1793
1794  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1795  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1796  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1797  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1798  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1799  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1800
1801  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1802  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1803  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1804  }
1805  return Opc; // If not one we handle, return it unchanged.
1806}
1807
1808void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1809                                const uint16_t *DOpcodes,
1810                                const uint16_t *QOpcodes0,
1811                                const uint16_t *QOpcodes1) {
1812  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1813  SDLoc dl(N);
1814
1815  SDValue MemAddr, Align;
1816  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1817  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1818    return;
1819
1820  SDValue Chain = N->getOperand(0);
1821  EVT VT = N->getValueType(0);
1822  bool is64BitVector = VT.is64BitVector();
1823  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1824
1825  unsigned OpcodeIndex;
1826  switch (VT.getSimpleVT().SimpleTy) {
1827  default: llvm_unreachable("unhandled vld type");
1828    // Double-register operations:
1829  case MVT::v8i8:  OpcodeIndex = 0; break;
1830  case MVT::v4i16: OpcodeIndex = 1; break;
1831  case MVT::v2f32:
1832  case MVT::v2i32: OpcodeIndex = 2; break;
1833  case MVT::v1i64: OpcodeIndex = 3; break;
1834    // Quad-register operations:
1835  case MVT::v16i8: OpcodeIndex = 0; break;
1836  case MVT::v8i16: OpcodeIndex = 1; break;
1837  case MVT::v4f32:
1838  case MVT::v4i32: OpcodeIndex = 2; break;
1839  case MVT::v2f64:
1840  case MVT::v2i64: OpcodeIndex = 3;
1841    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1842    break;
1843  }
1844
1845  EVT ResTy;
1846  if (NumVecs == 1)
1847    ResTy = VT;
1848  else {
1849    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1850    if (!is64BitVector)
1851      ResTyElts *= 2;
1852    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1853  }
1854  std::vector<EVT> ResTys;
1855  ResTys.push_back(ResTy);
1856  if (isUpdating)
1857    ResTys.push_back(MVT::i32);
1858  ResTys.push_back(MVT::Other);
1859
1860  SDValue Pred = getAL(CurDAG, dl);
1861  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1862  SDNode *VLd;
1863  SmallVector<SDValue, 7> Ops;
1864
1865  // Double registers and VLD1/VLD2 quad registers are directly supported.
1866  if (is64BitVector || NumVecs <= 2) {
1867    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1868                    QOpcodes0[OpcodeIndex]);
1869    Ops.push_back(MemAddr);
1870    Ops.push_back(Align);
1871    if (isUpdating) {
1872      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1873      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1874      // case entirely when the rest are updated to that form, too.
1875      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1876        Opc = getVLDSTRegisterUpdateOpcode(Opc);
1877      // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1878      // check for that explicitly too. Horribly hacky, but temporary.
1879      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1880          !isa<ConstantSDNode>(Inc.getNode()))
1881        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1882    }
1883    Ops.push_back(Pred);
1884    Ops.push_back(Reg0);
1885    Ops.push_back(Chain);
1886    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1887
1888  } else {
1889    // Otherwise, quad registers are loaded with two separate instructions,
1890    // where one loads the even registers and the other loads the odd registers.
1891    EVT AddrTy = MemAddr.getValueType();
1892
1893    // Load the even subregs.  This is always an updating load, so that it
1894    // provides the address to the second load for the odd subregs.
1895    SDValue ImplDef =
1896      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1897    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1898    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1899                                          ResTy, AddrTy, MVT::Other, OpsA);
1900    Chain = SDValue(VLdA, 2);
1901
1902    // Load the odd subregs.
1903    Ops.push_back(SDValue(VLdA, 1));
1904    Ops.push_back(Align);
1905    if (isUpdating) {
1906      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1907      assert(isa<ConstantSDNode>(Inc.getNode()) &&
1908             "only constant post-increment update allowed for VLD3/4");
1909      (void)Inc;
1910      Ops.push_back(Reg0);
1911    }
1912    Ops.push_back(SDValue(VLdA, 0));
1913    Ops.push_back(Pred);
1914    Ops.push_back(Reg0);
1915    Ops.push_back(Chain);
1916    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1917  }
1918
1919  // Transfer memoperands.
1920  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1921  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1922  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1923
1924  if (NumVecs == 1) {
1925    ReplaceNode(N, VLd);
1926    return;
1927  }
1928
1929  // Extract out the subregisters.
1930  SDValue SuperReg = SDValue(VLd, 0);
1931  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1932                    ARM::qsub_3 == ARM::qsub_0 + 3,
1933                "Unexpected subreg numbering");
1934  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1935  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1936    ReplaceUses(SDValue(N, Vec),
1937                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1938  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1939  if (isUpdating)
1940    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1941  CurDAG->RemoveDeadNode(N);
1942}
1943
1944void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1945                                const uint16_t *DOpcodes,
1946                                const uint16_t *QOpcodes0,
1947                                const uint16_t *QOpcodes1) {
1948  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1949  SDLoc dl(N);
1950
1951  SDValue MemAddr, Align;
1952  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1953  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1954  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1955    return;
1956
1957  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1958  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1959
1960  SDValue Chain = N->getOperand(0);
1961  EVT VT = N->getOperand(Vec0Idx).getValueType();
1962  bool is64BitVector = VT.is64BitVector();
1963  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1964
1965  unsigned OpcodeIndex;
1966  switch (VT.getSimpleVT().SimpleTy) {
1967  default: llvm_unreachable("unhandled vst type");
1968    // Double-register operations:
1969  case MVT::v8i8:  OpcodeIndex = 0; break;
1970  case MVT::v4i16: OpcodeIndex = 1; break;
1971  case MVT::v2f32:
1972  case MVT::v2i32: OpcodeIndex = 2; break;
1973  case MVT::v1i64: OpcodeIndex = 3; break;
1974    // Quad-register operations:
1975  case MVT::v16i8: OpcodeIndex = 0; break;
1976  case MVT::v8i16: OpcodeIndex = 1; break;
1977  case MVT::v4f32:
1978  case MVT::v4i32: OpcodeIndex = 2; break;
1979  case MVT::v2f64:
1980  case MVT::v2i64: OpcodeIndex = 3;
1981    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1982    break;
1983  }
1984
1985  std::vector<EVT> ResTys;
1986  if (isUpdating)
1987    ResTys.push_back(MVT::i32);
1988  ResTys.push_back(MVT::Other);
1989
1990  SDValue Pred = getAL(CurDAG, dl);
1991  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1992  SmallVector<SDValue, 7> Ops;
1993
1994  // Double registers and VST1/VST2 quad registers are directly supported.
1995  if (is64BitVector || NumVecs <= 2) {
1996    SDValue SrcReg;
1997    if (NumVecs == 1) {
1998      SrcReg = N->getOperand(Vec0Idx);
1999    } else if (is64BitVector) {
2000      // Form a REG_SEQUENCE to force register allocation.
2001      SDValue V0 = N->getOperand(Vec0Idx + 0);
2002      SDValue V1 = N->getOperand(Vec0Idx + 1);
2003      if (NumVecs == 2)
2004        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2005      else {
2006        SDValue V2 = N->getOperand(Vec0Idx + 2);
2007        // If it's a vst3, form a quad D-register and leave the last part as
2008        // an undef.
2009        SDValue V3 = (NumVecs == 3)
2010          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2011          : N->getOperand(Vec0Idx + 3);
2012        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2013      }
2014    } else {
2015      // Form a QQ register.
2016      SDValue Q0 = N->getOperand(Vec0Idx);
2017      SDValue Q1 = N->getOperand(Vec0Idx + 1);
2018      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2019    }
2020
2021    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2022                    QOpcodes0[OpcodeIndex]);
2023    Ops.push_back(MemAddr);
2024    Ops.push_back(Align);
2025    if (isUpdating) {
2026      SDValue Inc = N->getOperand(AddrOpIdx + 1);
2027      // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2028      // case entirely when the rest are updated to that form, too.
2029      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2030        Opc = getVLDSTRegisterUpdateOpcode(Opc);
2031      // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2032      // check for that explicitly too. Horribly hacky, but temporary.
2033      if  (!isa<ConstantSDNode>(Inc.getNode()))
2034        Ops.push_back(Inc);
2035      else if (NumVecs > 2 && !isVSTfixed(Opc))
2036        Ops.push_back(Reg0);
2037    }
2038    Ops.push_back(SrcReg);
2039    Ops.push_back(Pred);
2040    Ops.push_back(Reg0);
2041    Ops.push_back(Chain);
2042    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2043
2044    // Transfer memoperands.
2045    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2046
2047    ReplaceNode(N, VSt);
2048    return;
2049  }
2050
2051  // Otherwise, quad registers are stored with two separate instructions,
2052  // where one stores the even registers and the other stores the odd registers.
2053
2054  // Form the QQQQ REG_SEQUENCE.
2055  SDValue V0 = N->getOperand(Vec0Idx + 0);
2056  SDValue V1 = N->getOperand(Vec0Idx + 1);
2057  SDValue V2 = N->getOperand(Vec0Idx + 2);
2058  SDValue V3 = (NumVecs == 3)
2059    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2060    : N->getOperand(Vec0Idx + 3);
2061  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2062
2063  // Store the even D registers.  This is always an updating store, so that it
2064  // provides the address to the second store for the odd subregs.
2065  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2066  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2067                                        MemAddr.getValueType(),
2068                                        MVT::Other, OpsA);
2069  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2070  Chain = SDValue(VStA, 1);
2071
2072  // Store the odd D registers.
2073  Ops.push_back(SDValue(VStA, 0));
2074  Ops.push_back(Align);
2075  if (isUpdating) {
2076    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2077    assert(isa<ConstantSDNode>(Inc.getNode()) &&
2078           "only constant post-increment update allowed for VST3/4");
2079    (void)Inc;
2080    Ops.push_back(Reg0);
2081  }
2082  Ops.push_back(RegSeq);
2083  Ops.push_back(Pred);
2084  Ops.push_back(Reg0);
2085  Ops.push_back(Chain);
2086  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2087                                        Ops);
2088  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2089  ReplaceNode(N, VStB);
2090}
2091
2092void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2093                                      unsigned NumVecs,
2094                                      const uint16_t *DOpcodes,
2095                                      const uint16_t *QOpcodes) {
2096  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2097  SDLoc dl(N);
2098
2099  SDValue MemAddr, Align;
2100  unsigned AddrOpIdx = isUpdating ? 1 : 2;
2101  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2102  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2103    return;
2104
2105  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2106  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2107
2108  SDValue Chain = N->getOperand(0);
2109  unsigned Lane =
2110    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111  EVT VT = N->getOperand(Vec0Idx).getValueType();
2112  bool is64BitVector = VT.is64BitVector();
2113
2114  unsigned Alignment = 0;
2115  if (NumVecs != 3) {
2116    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2118    if (Alignment > NumBytes)
2119      Alignment = NumBytes;
2120    if (Alignment < 8 && Alignment < NumBytes)
2121      Alignment = 0;
2122    // Alignment must be a power of two; make sure of that.
2123    Alignment = (Alignment & -Alignment);
2124    if (Alignment == 1)
2125      Alignment = 0;
2126  }
2127  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2128
2129  unsigned OpcodeIndex;
2130  switch (VT.getSimpleVT().SimpleTy) {
2131  default: llvm_unreachable("unhandled vld/vst lane type");
2132    // Double-register operations:
2133  case MVT::v8i8:  OpcodeIndex = 0; break;
2134  case MVT::v4i16: OpcodeIndex = 1; break;
2135  case MVT::v2f32:
2136  case MVT::v2i32: OpcodeIndex = 2; break;
2137    // Quad-register operations:
2138  case MVT::v8i16: OpcodeIndex = 0; break;
2139  case MVT::v4f32:
2140  case MVT::v4i32: OpcodeIndex = 1; break;
2141  }
2142
2143  std::vector<EVT> ResTys;
2144  if (IsLoad) {
2145    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2146    if (!is64BitVector)
2147      ResTyElts *= 2;
2148    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2149                                      MVT::i64, ResTyElts));
2150  }
2151  if (isUpdating)
2152    ResTys.push_back(MVT::i32);
2153  ResTys.push_back(MVT::Other);
2154
2155  SDValue Pred = getAL(CurDAG, dl);
2156  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2157
2158  SmallVector<SDValue, 8> Ops;
2159  Ops.push_back(MemAddr);
2160  Ops.push_back(Align);
2161  if (isUpdating) {
2162    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2163    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2164  }
2165
2166  SDValue SuperReg;
2167  SDValue V0 = N->getOperand(Vec0Idx + 0);
2168  SDValue V1 = N->getOperand(Vec0Idx + 1);
2169  if (NumVecs == 2) {
2170    if (is64BitVector)
2171      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2172    else
2173      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2174  } else {
2175    SDValue V2 = N->getOperand(Vec0Idx + 2);
2176    SDValue V3 = (NumVecs == 3)
2177      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2178      : N->getOperand(Vec0Idx + 3);
2179    if (is64BitVector)
2180      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2181    else
2182      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2183  }
2184  Ops.push_back(SuperReg);
2185  Ops.push_back(getI32Imm(Lane, dl));
2186  Ops.push_back(Pred);
2187  Ops.push_back(Reg0);
2188  Ops.push_back(Chain);
2189
2190  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2191                                  QOpcodes[OpcodeIndex]);
2192  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2193  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2194  if (!IsLoad) {
2195    ReplaceNode(N, VLdLn);
2196    return;
2197  }
2198
2199  // Extract the subregisters.
2200  SuperReg = SDValue(VLdLn, 0);
2201  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2202                    ARM::qsub_3 == ARM::qsub_0 + 3,
2203                "Unexpected subreg numbering");
2204  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2205  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2206    ReplaceUses(SDValue(N, Vec),
2207                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2208  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2209  if (isUpdating)
2210    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2211  CurDAG->RemoveDeadNode(N);
2212}
2213
2214void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2215                                   const uint16_t *Opcodes) {
2216  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2217  SDLoc dl(N);
2218
2219  SDValue MemAddr, Align;
2220  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2221    return;
2222
2223  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2224  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2225
2226  SDValue Chain = N->getOperand(0);
2227  EVT VT = N->getValueType(0);
2228
2229  unsigned Alignment = 0;
2230  if (NumVecs != 3) {
2231    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2232    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2233    if (Alignment > NumBytes)
2234      Alignment = NumBytes;
2235    if (Alignment < 8 && Alignment < NumBytes)
2236      Alignment = 0;
2237    // Alignment must be a power of two; make sure of that.
2238    Alignment = (Alignment & -Alignment);
2239    if (Alignment == 1)
2240      Alignment = 0;
2241  }
2242  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2243
2244  unsigned OpcodeIndex;
2245  switch (VT.getSimpleVT().SimpleTy) {
2246  default: llvm_unreachable("unhandled vld-dup type");
2247  case MVT::v8i8:  OpcodeIndex = 0; break;
2248  case MVT::v4i16: OpcodeIndex = 1; break;
2249  case MVT::v2f32:
2250  case MVT::v2i32: OpcodeIndex = 2; break;
2251  }
2252
2253  SDValue Pred = getAL(CurDAG, dl);
2254  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2255  SDValue SuperReg;
2256  unsigned Opc = Opcodes[OpcodeIndex];
2257  SmallVector<SDValue, 6> Ops;
2258  Ops.push_back(MemAddr);
2259  Ops.push_back(Align);
2260  if (isUpdating) {
2261    // fixed-stride update instructions don't have an explicit writeback
2262    // operand. It's implicit in the opcode itself.
2263    SDValue Inc = N->getOperand(2);
2264    if (!isa<ConstantSDNode>(Inc.getNode()))
2265      Ops.push_back(Inc);
2266    // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2267    else if (NumVecs > 2)
2268      Ops.push_back(Reg0);
2269  }
2270  Ops.push_back(Pred);
2271  Ops.push_back(Reg0);
2272  Ops.push_back(Chain);
2273
2274  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2275  std::vector<EVT> ResTys;
2276  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2277  if (isUpdating)
2278    ResTys.push_back(MVT::i32);
2279  ResTys.push_back(MVT::Other);
2280  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2281  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2282  SuperReg = SDValue(VLdDup, 0);
2283
2284  // Extract the subregisters.
2285  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2286  unsigned SubIdx = ARM::dsub_0;
2287  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2288    ReplaceUses(SDValue(N, Vec),
2289                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2290  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2291  if (isUpdating)
2292    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2293  CurDAG->RemoveDeadNode(N);
2294}
2295
2296void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2297                                 unsigned Opc) {
2298  assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2299  SDLoc dl(N);
2300  EVT VT = N->getValueType(0);
2301  unsigned FirstTblReg = IsExt ? 2 : 1;
2302
2303  // Form a REG_SEQUENCE to force register allocation.
2304  SDValue RegSeq;
2305  SDValue V0 = N->getOperand(FirstTblReg + 0);
2306  SDValue V1 = N->getOperand(FirstTblReg + 1);
2307  if (NumVecs == 2)
2308    RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2309  else {
2310    SDValue V2 = N->getOperand(FirstTblReg + 2);
2311    // If it's a vtbl3, form a quad D-register and leave the last part as
2312    // an undef.
2313    SDValue V3 = (NumVecs == 3)
2314      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2315      : N->getOperand(FirstTblReg + 3);
2316    RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2317  }
2318
2319  SmallVector<SDValue, 6> Ops;
2320  if (IsExt)
2321    Ops.push_back(N->getOperand(1));
2322  Ops.push_back(RegSeq);
2323  Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2324  Ops.push_back(getAL(CurDAG, dl)); // predicate
2325  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2326  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2327}
2328
2329bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2330  if (!Subtarget->hasV6T2Ops())
2331    return false;
2332
2333  unsigned Opc = isSigned
2334    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2335    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2336  SDLoc dl(N);
2337
2338  // For unsigned extracts, check for a shift right and mask
2339  unsigned And_imm = 0;
2340  if (N->getOpcode() == ISD::AND) {
2341    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2342
2343      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2344      if (And_imm & (And_imm + 1))
2345        return false;
2346
2347      unsigned Srl_imm = 0;
2348      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2349                                Srl_imm)) {
2350        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2351
2352        // Note: The width operand is encoded as width-1.
2353        unsigned Width = countTrailingOnes(And_imm) - 1;
2354        unsigned LSB = Srl_imm;
2355
2356        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2357
2358        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2359          // It's cheaper to use a right shift to extract the top bits.
2360          if (Subtarget->isThumb()) {
2361            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2362            SDValue Ops[] = { N->getOperand(0).getOperand(0),
2363                              CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2364                              getAL(CurDAG, dl), Reg0, Reg0 };
2365            CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2366            return true;
2367          }
2368
2369          // ARM models shift instructions as MOVsi with shifter operand.
2370          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2371          SDValue ShOpc =
2372            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2373                                      MVT::i32);
2374          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2375                            getAL(CurDAG, dl), Reg0, Reg0 };
2376          CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2377          return true;
2378        }
2379
2380        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2381                          CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2382                          CurDAG->getTargetConstant(Width, dl, MVT::i32),
2383                          getAL(CurDAG, dl), Reg0 };
2384        CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2385        return true;
2386      }
2387    }
2388    return false;
2389  }
2390
2391  // Otherwise, we're looking for a shift of a shift
2392  unsigned Shl_imm = 0;
2393  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2394    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2395    unsigned Srl_imm = 0;
2396    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2397      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2398      // Note: The width operand is encoded as width-1.
2399      unsigned Width = 32 - Srl_imm - 1;
2400      int LSB = Srl_imm - Shl_imm;
2401      if (LSB < 0)
2402        return false;
2403      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2404      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2405                        CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2406                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2407                        getAL(CurDAG, dl), Reg0 };
2408      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2409      return true;
2410    }
2411  }
2412
2413  // Or we are looking for a shift of an and, with a mask operand
2414  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2415      isShiftedMask_32(And_imm)) {
2416    unsigned Srl_imm = 0;
2417    unsigned LSB = countTrailingZeros(And_imm);
2418    // Shift must be the same as the ands lsb
2419    if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2420      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2421      unsigned MSB = 31 - countLeadingZeros(And_imm);
2422      // Note: The width operand is encoded as width-1.
2423      unsigned Width = MSB - LSB;
2424      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2425      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2426                        CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2427                        CurDAG->getTargetConstant(Width, dl, MVT::i32),
2428                        getAL(CurDAG, dl), Reg0 };
2429      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2430      return true;
2431    }
2432  }
2433
2434  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2435    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2436    unsigned LSB = 0;
2437    if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2438        !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2439      return false;
2440
2441    if (LSB + Width > 32)
2442      return false;
2443
2444    SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2445    SDValue Ops[] = { N->getOperand(0).getOperand(0),
2446                      CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2447                      CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2448                      getAL(CurDAG, dl), Reg0 };
2449    CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2450    return true;
2451  }
2452
2453  return false;
2454}
2455
2456/// Target-specific DAG combining for ISD::XOR.
2457/// Target-independent combining lowers SELECT_CC nodes of the form
2458/// select_cc setg[ge] X,  0,  X, -X
2459/// select_cc setgt    X, -1,  X, -X
2460/// select_cc setl[te] X,  0, -X,  X
2461/// select_cc setlt    X,  1, -X,  X
2462/// which represent Integer ABS into:
2463/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2464/// ARM instruction selection detects the latter and matches it to
2465/// ARM::ABS or ARM::t2ABS machine node.
2466bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2467  SDValue XORSrc0 = N->getOperand(0);
2468  SDValue XORSrc1 = N->getOperand(1);
2469  EVT VT = N->getValueType(0);
2470
2471  if (Subtarget->isThumb1Only())
2472    return false;
2473
2474  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2475    return false;
2476
2477  SDValue ADDSrc0 = XORSrc0.getOperand(0);
2478  SDValue ADDSrc1 = XORSrc0.getOperand(1);
2479  SDValue SRASrc0 = XORSrc1.getOperand(0);
2480  SDValue SRASrc1 = XORSrc1.getOperand(1);
2481  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2482  EVT XType = SRASrc0.getValueType();
2483  unsigned Size = XType.getSizeInBits() - 1;
2484
2485  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2486      XType.isInteger() && SRAConstant != nullptr &&
2487      Size == SRAConstant->getZExtValue()) {
2488    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2489    CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2490    return true;
2491  }
2492
2493  return false;
2494}
2495
2496static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2497                                 bool Accumulate) {
2498  // For SM*WB, we need to some form of sext.
2499  // For SM*WT, we need to search for (sra X, 16)
2500  // Src1 then gets set to X.
2501  if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2502       SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2503       SignExt.getOpcode() == ISD::AssertSext) &&
2504       SignExt.getValueType() == MVT::i32) {
2505
2506    *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2507    Src1 = SignExt.getOperand(0);
2508    return true;
2509  }
2510
2511  if (SignExt.getOpcode() != ISD::SRA)
2512    return false;
2513
2514  ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2515  if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2516    return false;
2517
2518  SDValue Op0 = SignExt.getOperand(0);
2519
2520  // The sign extend operand for SM*WB could be generated by a shl and ashr.
2521  if (Op0.getOpcode() == ISD::SHL) {
2522    SDValue SHL = Op0;
2523    ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2524    if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2525      return false;
2526
2527    *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2528    Src1 = Op0.getOperand(0);
2529    return true;
2530  }
2531  *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2532  Src1 = SignExt.getOperand(0);
2533  return true;
2534}
2535
2536static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2537                                SDValue &Src1, bool Accumulate) {
2538  // First we look for:
2539  // (add (or (srl ?, 16), (shl ?, 16)))
2540  if (OR.getOpcode() != ISD::OR)
2541    return false;
2542
2543  SDValue SRL = OR.getOperand(0);
2544  SDValue SHL = OR.getOperand(1);
2545
2546  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2547    SRL = OR.getOperand(1);
2548    SHL = OR.getOperand(0);
2549    if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2550      return false;
2551  }
2552
2553  ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2554  ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2555  if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2556      SHLSrc1->getZExtValue() != 16)
2557    return false;
2558
2559  // The first operands to the shifts need to be the two results from the
2560  // same smul_lohi node.
2561  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2562       SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2563    return false;
2564
2565  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2566  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2567      SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2568    return false;
2569
2570  // Now we have:
2571  // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2572  // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2573  // For SMLAWB the 16-bit value will signed extended somehow.
2574  // For SMLAWT only the SRA is required.
2575
2576  // Check both sides of SMUL_LOHI
2577  if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2578    Src0 = SMULLOHI->getOperand(1);
2579  } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2580                                  Accumulate)) {
2581    Src0 = SMULLOHI->getOperand(0);
2582  } else {
2583    return false;
2584  }
2585  return true;
2586}
2587
2588bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2589  SDLoc dl(N);
2590  SDValue Src0 = N->getOperand(0);
2591  SDValue Src1 = N->getOperand(1);
2592  SDValue A, B;
2593  unsigned Opc = 0;
2594
2595  if (N->getOpcode() == ISD::ADD) {
2596    if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2597      return false;
2598
2599    SDValue Acc;
2600    if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2601      Acc = Src1;
2602    } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2603      Acc = Src0;
2604    } else {
2605      return false;
2606    }
2607    if (Opc == 0)
2608      return false;
2609
2610    SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2611                      CurDAG->getRegister(0, MVT::i32) };
2612    CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2613    return true;
2614  } else if (N->getOpcode() == ISD::OR &&
2615             SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2616    if (Opc == 0)
2617      return false;
2618
2619    SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2620                      CurDAG->getRegister(0, MVT::i32)};
2621    CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2622    return true;
2623  }
2624  return false;
2625}
2626
2627/// We've got special pseudo-instructions for these
2628void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2629  unsigned Opcode;
2630  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2631  if (MemTy == MVT::i8)
2632    Opcode = ARM::CMP_SWAP_8;
2633  else if (MemTy == MVT::i16)
2634    Opcode = ARM::CMP_SWAP_16;
2635  else if (MemTy == MVT::i32)
2636    Opcode = ARM::CMP_SWAP_32;
2637  else
2638    llvm_unreachable("Unknown AtomicCmpSwap type");
2639
2640  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2641                   N->getOperand(0)};
2642  SDNode *CmpSwap = CurDAG->getMachineNode(
2643      Opcode, SDLoc(N),
2644      CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2645
2646  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2647  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2648  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2649
2650  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2651  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2652  CurDAG->RemoveDeadNode(N);
2653}
2654
2655void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2656  // The only time a CONCAT_VECTORS operation can have legal types is when
2657  // two 64-bit vectors are concatenated to a 128-bit vector.
2658  EVT VT = N->getValueType(0);
2659  if (!VT.is128BitVector() || N->getNumOperands() != 2)
2660    llvm_unreachable("unexpected CONCAT_VECTORS");
2661  ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2662}
2663
2664void ARMDAGToDAGISel::Select(SDNode *N) {
2665  SDLoc dl(N);
2666
2667  if (N->isMachineOpcode()) {
2668    N->setNodeId(-1);
2669    return;   // Already selected.
2670  }
2671
2672  switch (N->getOpcode()) {
2673  default: break;
2674  case ISD::ADD:
2675  case ISD::OR:
2676    if (trySMLAWSMULW(N))
2677      return;
2678    break;
2679  case ISD::WRITE_REGISTER:
2680    if (tryWriteRegister(N))
2681      return;
2682    break;
2683  case ISD::READ_REGISTER:
2684    if (tryReadRegister(N))
2685      return;
2686    break;
2687  case ISD::INLINEASM:
2688    if (tryInlineAsm(N))
2689      return;
2690    break;
2691  case ISD::XOR:
2692    // Select special operations if XOR node forms integer ABS pattern
2693    if (tryABSOp(N))
2694      return;
2695    // Other cases are autogenerated.
2696    break;
2697  case ISD::Constant: {
2698    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2699    // If we can't materialize the constant we need to use a literal pool
2700    if (ConstantMaterializationCost(Val) > 2) {
2701      SDValue CPIdx = CurDAG->getTargetConstantPool(
2702          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2703          TLI->getPointerTy(CurDAG->getDataLayout()));
2704
2705      SDNode *ResNode;
2706      if (Subtarget->isThumb()) {
2707        SDValue Pred = getAL(CurDAG, dl);
2708        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2709        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2710        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2711                                         Ops);
2712      } else {
2713        SDValue Ops[] = {
2714          CPIdx,
2715          CurDAG->getTargetConstant(0, dl, MVT::i32),
2716          getAL(CurDAG, dl),
2717          CurDAG->getRegister(0, MVT::i32),
2718          CurDAG->getEntryNode()
2719        };
2720        ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2721                                         Ops);
2722      }
2723      ReplaceNode(N, ResNode);
2724      return;
2725    }
2726
2727    // Other cases are autogenerated.
2728    break;
2729  }
2730  case ISD::FrameIndex: {
2731    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2732    int FI = cast<FrameIndexSDNode>(N)->getIndex();
2733    SDValue TFI = CurDAG->getTargetFrameIndex(
2734        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2735    if (Subtarget->isThumb1Only()) {
2736      // Set the alignment of the frame object to 4, to avoid having to generate
2737      // more than one ADD
2738      MachineFrameInfo *MFI = MF->getFrameInfo();
2739      if (MFI->getObjectAlignment(FI) < 4)
2740        MFI->setObjectAlignment(FI, 4);
2741      CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2742                           CurDAG->getTargetConstant(0, dl, MVT::i32));
2743      return;
2744    } else {
2745      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2746                      ARM::t2ADDri : ARM::ADDri);
2747      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2748                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2749                        CurDAG->getRegister(0, MVT::i32) };
2750      CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2751      return;
2752    }
2753  }
2754  case ISD::SRL:
2755    if (tryV6T2BitfieldExtractOp(N, false))
2756      return;
2757    break;
2758  case ISD::SIGN_EXTEND_INREG:
2759  case ISD::SRA:
2760    if (tryV6T2BitfieldExtractOp(N, true))
2761      return;
2762    break;
2763  case ISD::MUL:
2764    if (Subtarget->isThumb1Only())
2765      break;
2766    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2767      unsigned RHSV = C->getZExtValue();
2768      if (!RHSV) break;
2769      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2770        unsigned ShImm = Log2_32(RHSV-1);
2771        if (ShImm >= 32)
2772          break;
2773        SDValue V = N->getOperand(0);
2774        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2775        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2776        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2777        if (Subtarget->isThumb()) {
2778          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2779          CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2780          return;
2781        } else {
2782          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2783                            Reg0 };
2784          CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2785          return;
2786        }
2787      }
2788      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2789        unsigned ShImm = Log2_32(RHSV+1);
2790        if (ShImm >= 32)
2791          break;
2792        SDValue V = N->getOperand(0);
2793        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2794        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2795        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2796        if (Subtarget->isThumb()) {
2797          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2798          CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2799          return;
2800        } else {
2801          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2802                            Reg0 };
2803          CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2804          return;
2805        }
2806      }
2807    }
2808    break;
2809  case ISD::AND: {
2810    // Check for unsigned bitfield extract
2811    if (tryV6T2BitfieldExtractOp(N, false))
2812      return;
2813
2814    // If an immediate is used in an AND node, it is possible that the immediate
2815    // can be more optimally materialized when negated. If this is the case we
2816    // can negate the immediate and use a BIC instead.
2817    auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2818    if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2819      uint32_t Imm = (uint32_t) N1C->getZExtValue();
2820
2821      // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2822      // immediate can be negated and fit in the immediate operand of
2823      // a t2BIC, don't do any manual transform here as this can be
2824      // handled by the generic ISel machinery.
2825      bool PreferImmediateEncoding =
2826        Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2827      if (!PreferImmediateEncoding &&
2828          ConstantMaterializationCost(Imm) >
2829              ConstantMaterializationCost(~Imm)) {
2830        // The current immediate costs more to materialize than a negated
2831        // immediate, so negate the immediate and use a BIC.
2832        SDValue NewImm =
2833          CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2834        // If the new constant didn't exist before, reposition it in the topological
2835        // ordering so it is just before N. Otherwise, don't touch its location.
2836        if (NewImm->getNodeId() == -1)
2837          CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2838
2839        if (!Subtarget->hasThumb2()) {
2840          SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2841                           N->getOperand(0), NewImm, getAL(CurDAG, dl),
2842                           CurDAG->getRegister(0, MVT::i32)};
2843          ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2844          return;
2845        } else {
2846          SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2847                           CurDAG->getRegister(0, MVT::i32),
2848                           CurDAG->getRegister(0, MVT::i32)};
2849          ReplaceNode(N,
2850                      CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2851          return;
2852        }
2853      }
2854    }
2855
2856    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2857    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2858    // are entirely contributed by c2 and lower 16-bits are entirely contributed
2859    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2860    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2861    EVT VT = N->getValueType(0);
2862    if (VT != MVT::i32)
2863      break;
2864    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2865      ? ARM::t2MOVTi16
2866      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2867    if (!Opc)
2868      break;
2869    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2870    N1C = dyn_cast<ConstantSDNode>(N1);
2871    if (!N1C)
2872      break;
2873    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2874      SDValue N2 = N0.getOperand(1);
2875      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2876      if (!N2C)
2877        break;
2878      unsigned N1CVal = N1C->getZExtValue();
2879      unsigned N2CVal = N2C->getZExtValue();
2880      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2881          (N1CVal & 0xffffU) == 0xffffU &&
2882          (N2CVal & 0xffffU) == 0x0U) {
2883        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2884                                                  dl, MVT::i32);
2885        SDValue Ops[] = { N0.getOperand(0), Imm16,
2886                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2887        ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2888        return;
2889      }
2890    }
2891    break;
2892  }
2893  case ARMISD::VMOVRRD:
2894    ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2895                                          N->getOperand(0), getAL(CurDAG, dl),
2896                                          CurDAG->getRegister(0, MVT::i32)));
2897    return;
2898  case ISD::UMUL_LOHI: {
2899    if (Subtarget->isThumb1Only())
2900      break;
2901    if (Subtarget->isThumb()) {
2902      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2903                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904      ReplaceNode(
2905          N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
2906      return;
2907    } else {
2908      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2909                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2910                        CurDAG->getRegister(0, MVT::i32) };
2911      ReplaceNode(N, CurDAG->getMachineNode(
2912                         Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
2913                         MVT::i32, MVT::i32, Ops));
2914      return;
2915    }
2916  }
2917  case ISD::SMUL_LOHI: {
2918    if (Subtarget->isThumb1Only())
2919      break;
2920    if (Subtarget->isThumb()) {
2921      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2922                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2923      ReplaceNode(
2924          N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
2925      return;
2926    } else {
2927      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2928                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2929                        CurDAG->getRegister(0, MVT::i32) };
2930      ReplaceNode(N, CurDAG->getMachineNode(
2931                         Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
2932                         MVT::i32, MVT::i32, Ops));
2933      return;
2934    }
2935  }
2936  case ARMISD::UMAAL: {
2937    unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2938    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2939                      N->getOperand(2), N->getOperand(3),
2940                      getAL(CurDAG, dl),
2941                      CurDAG->getRegister(0, MVT::i32) };
2942    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2943    return;
2944  }
2945  case ARMISD::UMLAL:{
2946    // UMAAL is similar to UMLAL but it adds two 32-bit values to the
2947    // 64-bit multiplication result.
2948    if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
2949        N->getOperand(3).getOpcode() == ARMISD::ADDE) {
2950
2951      SDValue Addc = N->getOperand(2);
2952      SDValue Adde = N->getOperand(3);
2953
2954      if (Adde.getOperand(2).getNode() == Addc.getNode()) {
2955
2956        ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
2957        ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
2958
2959        if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
2960        {
2961          // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
2962          // RdLo = one operand to be added, lower 32-bits of res
2963          // RdHi = other operand to be added, upper 32-bits of res
2964          // Rn = first multiply operand
2965          // Rm = second multiply operand
2966          SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2967                            Addc.getOperand(0), Addc.getOperand(1),
2968                            getAL(CurDAG, dl),
2969                            CurDAG->getRegister(0, MVT::i32) };
2970          unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2971          CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
2972          return;
2973        }
2974      }
2975    }
2976
2977    if (Subtarget->isThumb()) {
2978      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2979                        N->getOperand(3), getAL(CurDAG, dl),
2980                        CurDAG->getRegister(0, MVT::i32)};
2981      ReplaceNode(
2982          N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2983      return;
2984    }else{
2985      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2986                        N->getOperand(3), getAL(CurDAG, dl),
2987                        CurDAG->getRegister(0, MVT::i32),
2988                        CurDAG->getRegister(0, MVT::i32) };
2989      ReplaceNode(N, CurDAG->getMachineNode(
2990                         Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2991                         MVT::i32, MVT::i32, Ops));
2992      return;
2993    }
2994  }
2995  case ARMISD::SMLAL:{
2996    if (Subtarget->isThumb()) {
2997      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2998                        N->getOperand(3), getAL(CurDAG, dl),
2999                        CurDAG->getRegister(0, MVT::i32)};
3000      ReplaceNode(
3001          N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3002      return;
3003    }else{
3004      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3005                        N->getOperand(3), getAL(CurDAG, dl),
3006                        CurDAG->getRegister(0, MVT::i32),
3007                        CurDAG->getRegister(0, MVT::i32) };
3008      ReplaceNode(N, CurDAG->getMachineNode(
3009                         Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3010                         MVT::i32, MVT::i32, Ops));
3011      return;
3012    }
3013  }
3014  case ISD::LOAD: {
3015    if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3016      if (tryT2IndexedLoad(N))
3017        return;
3018    } else if (tryARMIndexedLoad(N))
3019      return;
3020    // Other cases are autogenerated.
3021    break;
3022  }
3023  case ARMISD::BRCOND: {
3024    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3025    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3026    // Pattern complexity = 6  cost = 1  size = 0
3027
3028    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3029    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3030    // Pattern complexity = 6  cost = 1  size = 0
3031
3032    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3033    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3034    // Pattern complexity = 6  cost = 1  size = 0
3035
3036    unsigned Opc = Subtarget->isThumb() ?
3037      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3038    SDValue Chain = N->getOperand(0);
3039    SDValue N1 = N->getOperand(1);
3040    SDValue N2 = N->getOperand(2);
3041    SDValue N3 = N->getOperand(3);
3042    SDValue InFlag = N->getOperand(4);
3043    assert(N1.getOpcode() == ISD::BasicBlock);
3044    assert(N2.getOpcode() == ISD::Constant);
3045    assert(N3.getOpcode() == ISD::Register);
3046
3047    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
3048                               cast<ConstantSDNode>(N2)->getZExtValue()), dl,
3049                               MVT::i32);
3050    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3051    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3052                                             MVT::Glue, Ops);
3053    Chain = SDValue(ResNode, 0);
3054    if (N->getNumValues() == 2) {
3055      InFlag = SDValue(ResNode, 1);
3056      ReplaceUses(SDValue(N, 1), InFlag);
3057    }
3058    ReplaceUses(SDValue(N, 0),
3059                SDValue(Chain.getNode(), Chain.getResNo()));
3060    CurDAG->RemoveDeadNode(N);
3061    return;
3062  }
3063  case ARMISD::VZIP: {
3064    unsigned Opc = 0;
3065    EVT VT = N->getValueType(0);
3066    switch (VT.getSimpleVT().SimpleTy) {
3067    default: return;
3068    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3069    case MVT::v4i16: Opc = ARM::VZIPd16; break;
3070    case MVT::v2f32:
3071    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3072    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3073    case MVT::v16i8: Opc = ARM::VZIPq8; break;
3074    case MVT::v8i16: Opc = ARM::VZIPq16; break;
3075    case MVT::v4f32:
3076    case MVT::v4i32: Opc = ARM::VZIPq32; break;
3077    }
3078    SDValue Pred = getAL(CurDAG, dl);
3079    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3080    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3081    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3082    return;
3083  }
3084  case ARMISD::VUZP: {
3085    unsigned Opc = 0;
3086    EVT VT = N->getValueType(0);
3087    switch (VT.getSimpleVT().SimpleTy) {
3088    default: return;
3089    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3090    case MVT::v4i16: Opc = ARM::VUZPd16; break;
3091    case MVT::v2f32:
3092    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3093    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3094    case MVT::v16i8: Opc = ARM::VUZPq8; break;
3095    case MVT::v8i16: Opc = ARM::VUZPq16; break;
3096    case MVT::v4f32:
3097    case MVT::v4i32: Opc = ARM::VUZPq32; break;
3098    }
3099    SDValue Pred = getAL(CurDAG, dl);
3100    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3101    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3102    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3103    return;
3104  }
3105  case ARMISD::VTRN: {
3106    unsigned Opc = 0;
3107    EVT VT = N->getValueType(0);
3108    switch (VT.getSimpleVT().SimpleTy) {
3109    default: return;
3110    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3111    case MVT::v4i16: Opc = ARM::VTRNd16; break;
3112    case MVT::v2f32:
3113    case MVT::v2i32: Opc = ARM::VTRNd32; break;
3114    case MVT::v16i8: Opc = ARM::VTRNq8; break;
3115    case MVT::v8i16: Opc = ARM::VTRNq16; break;
3116    case MVT::v4f32:
3117    case MVT::v4i32: Opc = ARM::VTRNq32; break;
3118    }
3119    SDValue Pred = getAL(CurDAG, dl);
3120    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3121    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3122    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3123    return;
3124  }
3125  case ARMISD::BUILD_VECTOR: {
3126    EVT VecVT = N->getValueType(0);
3127    EVT EltVT = VecVT.getVectorElementType();
3128    unsigned NumElts = VecVT.getVectorNumElements();
3129    if (EltVT == MVT::f64) {
3130      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3131      ReplaceNode(
3132          N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3133      return;
3134    }
3135    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3136    if (NumElts == 2) {
3137      ReplaceNode(
3138          N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3139      return;
3140    }
3141    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3142    ReplaceNode(N,
3143                createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3144                                    N->getOperand(2), N->getOperand(3)));
3145    return;
3146  }
3147
3148  case ARMISD::VLD2DUP: {
3149    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3150                                        ARM::VLD2DUPd32 };
3151    SelectVLDDup(N, false, 2, Opcodes);
3152    return;
3153  }
3154
3155  case ARMISD::VLD3DUP: {
3156    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3157                                        ARM::VLD3DUPd16Pseudo,
3158                                        ARM::VLD3DUPd32Pseudo };
3159    SelectVLDDup(N, false, 3, Opcodes);
3160    return;
3161  }
3162
3163  case ARMISD::VLD4DUP: {
3164    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3165                                        ARM::VLD4DUPd16Pseudo,
3166                                        ARM::VLD4DUPd32Pseudo };
3167    SelectVLDDup(N, false, 4, Opcodes);
3168    return;
3169  }
3170
3171  case ARMISD::VLD2DUP_UPD: {
3172    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3173                                        ARM::VLD2DUPd16wb_fixed,
3174                                        ARM::VLD2DUPd32wb_fixed };
3175    SelectVLDDup(N, true, 2, Opcodes);
3176    return;
3177  }
3178
3179  case ARMISD::VLD3DUP_UPD: {
3180    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3181                                        ARM::VLD3DUPd16Pseudo_UPD,
3182                                        ARM::VLD3DUPd32Pseudo_UPD };
3183    SelectVLDDup(N, true, 3, Opcodes);
3184    return;
3185  }
3186
3187  case ARMISD::VLD4DUP_UPD: {
3188    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3189                                        ARM::VLD4DUPd16Pseudo_UPD,
3190                                        ARM::VLD4DUPd32Pseudo_UPD };
3191    SelectVLDDup(N, true, 4, Opcodes);
3192    return;
3193  }
3194
3195  case ARMISD::VLD1_UPD: {
3196    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3197                                         ARM::VLD1d16wb_fixed,
3198                                         ARM::VLD1d32wb_fixed,
3199                                         ARM::VLD1d64wb_fixed };
3200    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3201                                         ARM::VLD1q16wb_fixed,
3202                                         ARM::VLD1q32wb_fixed,
3203                                         ARM::VLD1q64wb_fixed };
3204    SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3205    return;
3206  }
3207
3208  case ARMISD::VLD2_UPD: {
3209    static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3210                                         ARM::VLD2d16wb_fixed,
3211                                         ARM::VLD2d32wb_fixed,
3212                                         ARM::VLD1q64wb_fixed};
3213    static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3214                                         ARM::VLD2q16PseudoWB_fixed,
3215                                         ARM::VLD2q32PseudoWB_fixed };
3216    SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3217    return;
3218  }
3219
3220  case ARMISD::VLD3_UPD: {
3221    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3222                                         ARM::VLD3d16Pseudo_UPD,
3223                                         ARM::VLD3d32Pseudo_UPD,
3224                                         ARM::VLD1d64TPseudoWB_fixed};
3225    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3226                                          ARM::VLD3q16Pseudo_UPD,
3227                                          ARM::VLD3q32Pseudo_UPD };
3228    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3229                                          ARM::VLD3q16oddPseudo_UPD,
3230                                          ARM::VLD3q32oddPseudo_UPD };
3231    SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3232    return;
3233  }
3234
3235  case ARMISD::VLD4_UPD: {
3236    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3237                                         ARM::VLD4d16Pseudo_UPD,
3238                                         ARM::VLD4d32Pseudo_UPD,
3239                                         ARM::VLD1d64QPseudoWB_fixed};
3240    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3241                                          ARM::VLD4q16Pseudo_UPD,
3242                                          ARM::VLD4q32Pseudo_UPD };
3243    static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3244                                          ARM::VLD4q16oddPseudo_UPD,
3245                                          ARM::VLD4q32oddPseudo_UPD };
3246    SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3247    return;
3248  }
3249
3250  case ARMISD::VLD2LN_UPD: {
3251    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3252                                         ARM::VLD2LNd16Pseudo_UPD,
3253                                         ARM::VLD2LNd32Pseudo_UPD };
3254    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3255                                         ARM::VLD2LNq32Pseudo_UPD };
3256    SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3257    return;
3258  }
3259
3260  case ARMISD::VLD3LN_UPD: {
3261    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3262                                         ARM::VLD3LNd16Pseudo_UPD,
3263                                         ARM::VLD3LNd32Pseudo_UPD };
3264    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3265                                         ARM::VLD3LNq32Pseudo_UPD };
3266    SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3267    return;
3268  }
3269
3270  case ARMISD::VLD4LN_UPD: {
3271    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3272                                         ARM::VLD4LNd16Pseudo_UPD,
3273                                         ARM::VLD4LNd32Pseudo_UPD };
3274    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3275                                         ARM::VLD4LNq32Pseudo_UPD };
3276    SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3277    return;
3278  }
3279
3280  case ARMISD::VST1_UPD: {
3281    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3282                                         ARM::VST1d16wb_fixed,
3283                                         ARM::VST1d32wb_fixed,
3284                                         ARM::VST1d64wb_fixed };
3285    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3286                                         ARM::VST1q16wb_fixed,
3287                                         ARM::VST1q32wb_fixed,
3288                                         ARM::VST1q64wb_fixed };
3289    SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3290    return;
3291  }
3292
3293  case ARMISD::VST2_UPD: {
3294    static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3295                                         ARM::VST2d16wb_fixed,
3296                                         ARM::VST2d32wb_fixed,
3297                                         ARM::VST1q64wb_fixed};
3298    static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3299                                         ARM::VST2q16PseudoWB_fixed,
3300                                         ARM::VST2q32PseudoWB_fixed };
3301    SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3302    return;
3303  }
3304
3305  case ARMISD::VST3_UPD: {
3306    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3307                                         ARM::VST3d16Pseudo_UPD,
3308                                         ARM::VST3d32Pseudo_UPD,
3309                                         ARM::VST1d64TPseudoWB_fixed};
3310    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3311                                          ARM::VST3q16Pseudo_UPD,
3312                                          ARM::VST3q32Pseudo_UPD };
3313    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3314                                          ARM::VST3q16oddPseudo_UPD,
3315                                          ARM::VST3q32oddPseudo_UPD };
3316    SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3317    return;
3318  }
3319
3320  case ARMISD::VST4_UPD: {
3321    static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3322                                         ARM::VST4d16Pseudo_UPD,
3323                                         ARM::VST4d32Pseudo_UPD,
3324                                         ARM::VST1d64QPseudoWB_fixed};
3325    static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3326                                          ARM::VST4q16Pseudo_UPD,
3327                                          ARM::VST4q32Pseudo_UPD };
3328    static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3329                                          ARM::VST4q16oddPseudo_UPD,
3330                                          ARM::VST4q32oddPseudo_UPD };
3331    SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3332    return;
3333  }
3334
3335  case ARMISD::VST2LN_UPD: {
3336    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3337                                         ARM::VST2LNd16Pseudo_UPD,
3338                                         ARM::VST2LNd32Pseudo_UPD };
3339    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3340                                         ARM::VST2LNq32Pseudo_UPD };
3341    SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3342    return;
3343  }
3344
3345  case ARMISD::VST3LN_UPD: {
3346    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3347                                         ARM::VST3LNd16Pseudo_UPD,
3348                                         ARM::VST3LNd32Pseudo_UPD };
3349    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3350                                         ARM::VST3LNq32Pseudo_UPD };
3351    SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3352    return;
3353  }
3354
3355  case ARMISD::VST4LN_UPD: {
3356    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3357                                         ARM::VST4LNd16Pseudo_UPD,
3358                                         ARM::VST4LNd32Pseudo_UPD };
3359    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3360                                         ARM::VST4LNq32Pseudo_UPD };
3361    SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3362    return;
3363  }
3364
3365  case ISD::INTRINSIC_VOID:
3366  case ISD::INTRINSIC_W_CHAIN: {
3367    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3368    switch (IntNo) {
3369    default:
3370      break;
3371
3372    case Intrinsic::arm_mrrc:
3373    case Intrinsic::arm_mrrc2: {
3374      SDLoc dl(N);
3375      SDValue Chain = N->getOperand(0);
3376      unsigned Opc;
3377
3378      if (Subtarget->isThumb())
3379        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3380      else
3381        Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3382
3383      SmallVector<SDValue, 5> Ops;
3384      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3385      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3386      Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3387
3388      // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3389      // instruction will always be '1111' but it is possible in assembly language to specify
3390      // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3391      if (Opc != ARM::MRRC2) {
3392        Ops.push_back(getAL(CurDAG, dl));
3393        Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3394      }
3395
3396      Ops.push_back(Chain);
3397
3398      // Writes to two registers.
3399      const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3400
3401      ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3402      return;
3403    }
3404    case Intrinsic::arm_ldaexd:
3405    case Intrinsic::arm_ldrexd: {
3406      SDLoc dl(N);
3407      SDValue Chain = N->getOperand(0);
3408      SDValue MemAddr = N->getOperand(2);
3409      bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3410
3411      bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3412      unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3413                                : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3414
3415      // arm_ldrexd returns a i64 value in {i32, i32}
3416      std::vector<EVT> ResTys;
3417      if (isThumb) {
3418        ResTys.push_back(MVT::i32);
3419        ResTys.push_back(MVT::i32);
3420      } else
3421        ResTys.push_back(MVT::Untyped);
3422      ResTys.push_back(MVT::Other);
3423
3424      // Place arguments in the right order.
3425      SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3426                       CurDAG->getRegister(0, MVT::i32), Chain};
3427      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3428      // Transfer memoperands.
3429      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3430      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3431      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3432
3433      // Remap uses.
3434      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3435      if (!SDValue(N, 0).use_empty()) {
3436        SDValue Result;
3437        if (isThumb)
3438          Result = SDValue(Ld, 0);
3439        else {
3440          SDValue SubRegIdx =
3441            CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3442          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3443              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3444          Result = SDValue(ResNode,0);
3445        }
3446        ReplaceUses(SDValue(N, 0), Result);
3447      }
3448      if (!SDValue(N, 1).use_empty()) {
3449        SDValue Result;
3450        if (isThumb)
3451          Result = SDValue(Ld, 1);
3452        else {
3453          SDValue SubRegIdx =
3454            CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3455          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3456              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3457          Result = SDValue(ResNode,0);
3458        }
3459        ReplaceUses(SDValue(N, 1), Result);
3460      }
3461      ReplaceUses(SDValue(N, 2), OutChain);
3462      CurDAG->RemoveDeadNode(N);
3463      return;
3464    }
3465    case Intrinsic::arm_stlexd:
3466    case Intrinsic::arm_strexd: {
3467      SDLoc dl(N);
3468      SDValue Chain = N->getOperand(0);
3469      SDValue Val0 = N->getOperand(2);
3470      SDValue Val1 = N->getOperand(3);
3471      SDValue MemAddr = N->getOperand(4);
3472
3473      // Store exclusive double return a i32 value which is the return status
3474      // of the issued store.
3475      const EVT ResTys[] = {MVT::i32, MVT::Other};
3476
3477      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3478      // Place arguments in the right order.
3479      SmallVector<SDValue, 7> Ops;
3480      if (isThumb) {
3481        Ops.push_back(Val0);
3482        Ops.push_back(Val1);
3483      } else
3484        // arm_strexd uses GPRPair.
3485        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3486      Ops.push_back(MemAddr);
3487      Ops.push_back(getAL(CurDAG, dl));
3488      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3489      Ops.push_back(Chain);
3490
3491      bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3492      unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3493                                : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3494
3495      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3496      // Transfer memoperands.
3497      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3498      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3499      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3500
3501      ReplaceNode(N, St);
3502      return;
3503    }
3504
3505    case Intrinsic::arm_neon_vld1: {
3506      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3507                                           ARM::VLD1d32, ARM::VLD1d64 };
3508      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3509                                           ARM::VLD1q32, ARM::VLD1q64};
3510      SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3511      return;
3512    }
3513
3514    case Intrinsic::arm_neon_vld2: {
3515      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3516                                           ARM::VLD2d32, ARM::VLD1q64 };
3517      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3518                                           ARM::VLD2q32Pseudo };
3519      SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3520      return;
3521    }
3522
3523    case Intrinsic::arm_neon_vld3: {
3524      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3525                                           ARM::VLD3d16Pseudo,
3526                                           ARM::VLD3d32Pseudo,
3527                                           ARM::VLD1d64TPseudo };
3528      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3529                                            ARM::VLD3q16Pseudo_UPD,
3530                                            ARM::VLD3q32Pseudo_UPD };
3531      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3532                                            ARM::VLD3q16oddPseudo,
3533                                            ARM::VLD3q32oddPseudo };
3534      SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3535      return;
3536    }
3537
3538    case Intrinsic::arm_neon_vld4: {
3539      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3540                                           ARM::VLD4d16Pseudo,
3541                                           ARM::VLD4d32Pseudo,
3542                                           ARM::VLD1d64QPseudo };
3543      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3544                                            ARM::VLD4q16Pseudo_UPD,
3545                                            ARM::VLD4q32Pseudo_UPD };
3546      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3547                                            ARM::VLD4q16oddPseudo,
3548                                            ARM::VLD4q32oddPseudo };
3549      SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3550      return;
3551    }
3552
3553    case Intrinsic::arm_neon_vld2lane: {
3554      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3555                                           ARM::VLD2LNd16Pseudo,
3556                                           ARM::VLD2LNd32Pseudo };
3557      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3558                                           ARM::VLD2LNq32Pseudo };
3559      SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3560      return;
3561    }
3562
3563    case Intrinsic::arm_neon_vld3lane: {
3564      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3565                                           ARM::VLD3LNd16Pseudo,
3566                                           ARM::VLD3LNd32Pseudo };
3567      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3568                                           ARM::VLD3LNq32Pseudo };
3569      SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3570      return;
3571    }
3572
3573    case Intrinsic::arm_neon_vld4lane: {
3574      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3575                                           ARM::VLD4LNd16Pseudo,
3576                                           ARM::VLD4LNd32Pseudo };
3577      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3578                                           ARM::VLD4LNq32Pseudo };
3579      SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3580      return;
3581    }
3582
3583    case Intrinsic::arm_neon_vst1: {
3584      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3585                                           ARM::VST1d32, ARM::VST1d64 };
3586      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3587                                           ARM::VST1q32, ARM::VST1q64 };
3588      SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3589      return;
3590    }
3591
3592    case Intrinsic::arm_neon_vst2: {
3593      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3594                                           ARM::VST2d32, ARM::VST1q64 };
3595      static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3596                                           ARM::VST2q32Pseudo };
3597      SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3598      return;
3599    }
3600
3601    case Intrinsic::arm_neon_vst3: {
3602      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3603                                           ARM::VST3d16Pseudo,
3604                                           ARM::VST3d32Pseudo,
3605                                           ARM::VST1d64TPseudo };
3606      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3607                                            ARM::VST3q16Pseudo_UPD,
3608                                            ARM::VST3q32Pseudo_UPD };
3609      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3610                                            ARM::VST3q16oddPseudo,
3611                                            ARM::VST3q32oddPseudo };
3612      SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3613      return;
3614    }
3615
3616    case Intrinsic::arm_neon_vst4: {
3617      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3618                                           ARM::VST4d16Pseudo,
3619                                           ARM::VST4d32Pseudo,
3620                                           ARM::VST1d64QPseudo };
3621      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3622                                            ARM::VST4q16Pseudo_UPD,
3623                                            ARM::VST4q32Pseudo_UPD };
3624      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3625                                            ARM::VST4q16oddPseudo,
3626                                            ARM::VST4q32oddPseudo };
3627      SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3628      return;
3629    }
3630
3631    case Intrinsic::arm_neon_vst2lane: {
3632      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3633                                           ARM::VST2LNd16Pseudo,
3634                                           ARM::VST2LNd32Pseudo };
3635      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3636                                           ARM::VST2LNq32Pseudo };
3637      SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3638      return;
3639    }
3640
3641    case Intrinsic::arm_neon_vst3lane: {
3642      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3643                                           ARM::VST3LNd16Pseudo,
3644                                           ARM::VST3LNd32Pseudo };
3645      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3646                                           ARM::VST3LNq32Pseudo };
3647      SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3648      return;
3649    }
3650
3651    case Intrinsic::arm_neon_vst4lane: {
3652      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3653                                           ARM::VST4LNd16Pseudo,
3654                                           ARM::VST4LNd32Pseudo };
3655      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3656                                           ARM::VST4LNq32Pseudo };
3657      SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3658      return;
3659    }
3660    }
3661    break;
3662  }
3663
3664  case ISD::INTRINSIC_WO_CHAIN: {
3665    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3666    switch (IntNo) {
3667    default:
3668      break;
3669
3670    case Intrinsic::arm_neon_vtbl2:
3671      SelectVTBL(N, false, 2, ARM::VTBL2);
3672      return;
3673    case Intrinsic::arm_neon_vtbl3:
3674      SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3675      return;
3676    case Intrinsic::arm_neon_vtbl4:
3677      SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3678      return;
3679
3680    case Intrinsic::arm_neon_vtbx2:
3681      SelectVTBL(N, true, 2, ARM::VTBX2);
3682      return;
3683    case Intrinsic::arm_neon_vtbx3:
3684      SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3685      return;
3686    case Intrinsic::arm_neon_vtbx4:
3687      SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3688      return;
3689    }
3690    break;
3691  }
3692
3693  case ARMISD::VTBL1: {
3694    SDLoc dl(N);
3695    EVT VT = N->getValueType(0);
3696    SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3697                     getAL(CurDAG, dl),                 // Predicate
3698                     CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3699    ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
3700    return;
3701  }
3702  case ARMISD::VTBL2: {
3703    SDLoc dl(N);
3704    EVT VT = N->getValueType(0);
3705
3706    // Form a REG_SEQUENCE to force register allocation.
3707    SDValue V0 = N->getOperand(0);
3708    SDValue V1 = N->getOperand(1);
3709    SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3710
3711    SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
3712                     CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3713    ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
3714    return;
3715  }
3716
3717  case ISD::CONCAT_VECTORS:
3718    SelectConcatVector(N);
3719    return;
3720
3721  case ISD::ATOMIC_CMP_SWAP:
3722    SelectCMP_SWAP(N);
3723    return;
3724  }
3725
3726  SelectCode(N);
3727}
3728
3729// Inspect a register string of the form
3730// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3731// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3732// and obtain the integer operands from them, adding these operands to the
3733// provided vector.
3734static void getIntOperandsFromRegisterString(StringRef RegString,
3735                                             SelectionDAG *CurDAG,
3736                                             const SDLoc &DL,
3737                                             std::vector<SDValue> &Ops) {
3738  SmallVector<StringRef, 5> Fields;
3739  RegString.split(Fields, ':');
3740
3741  if (Fields.size() > 1) {
3742    bool AllIntFields = true;
3743
3744    for (StringRef Field : Fields) {
3745      // Need to trim out leading 'cp' characters and get the integer field.
3746      unsigned IntField;
3747      AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3748      Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3749    }
3750
3751    assert(AllIntFields &&
3752            "Unexpected non-integer value in special register string.");
3753  }
3754}
3755
3756// Maps a Banked Register string to its mask value. The mask value returned is
3757// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3758// mask operand, which expresses which register is to be used, e.g. r8, and in
3759// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3760// was invalid.
3761static inline int getBankedRegisterMask(StringRef RegString) {
3762  return StringSwitch<int>(RegString.lower())
3763          .Case("r8_usr", 0x00)
3764          .Case("r9_usr", 0x01)
3765          .Case("r10_usr", 0x02)
3766          .Case("r11_usr", 0x03)
3767          .Case("r12_usr", 0x04)
3768          .Case("sp_usr", 0x05)
3769          .Case("lr_usr", 0x06)
3770          .Case("r8_fiq", 0x08)
3771          .Case("r9_fiq", 0x09)
3772          .Case("r10_fiq", 0x0a)
3773          .Case("r11_fiq", 0x0b)
3774          .Case("r12_fiq", 0x0c)
3775          .Case("sp_fiq", 0x0d)
3776          .Case("lr_fiq", 0x0e)
3777          .Case("lr_irq", 0x10)
3778          .Case("sp_irq", 0x11)
3779          .Case("lr_svc", 0x12)
3780          .Case("sp_svc", 0x13)
3781          .Case("lr_abt", 0x14)
3782          .Case("sp_abt", 0x15)
3783          .Case("lr_und", 0x16)
3784          .Case("sp_und", 0x17)
3785          .Case("lr_mon", 0x1c)
3786          .Case("sp_mon", 0x1d)
3787          .Case("elr_hyp", 0x1e)
3788          .Case("sp_hyp", 0x1f)
3789          .Case("spsr_fiq", 0x2e)
3790          .Case("spsr_irq", 0x30)
3791          .Case("spsr_svc", 0x32)
3792          .Case("spsr_abt", 0x34)
3793          .Case("spsr_und", 0x36)
3794          .Case("spsr_mon", 0x3c)
3795          .Case("spsr_hyp", 0x3e)
3796          .Default(-1);
3797}
3798
3799// Maps a MClass special register string to its value for use in the
3800// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3801// Returns -1 to signify that the string was invalid.
3802static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3803  return StringSwitch<int>(RegString.lower())
3804          .Case("apsr", 0x0)
3805          .Case("iapsr", 0x1)
3806          .Case("eapsr", 0x2)
3807          .Case("xpsr", 0x3)
3808          .Case("ipsr", 0x5)
3809          .Case("epsr", 0x6)
3810          .Case("iepsr", 0x7)
3811          .Case("msp", 0x8)
3812          .Case("psp", 0x9)
3813          .Case("primask", 0x10)
3814          .Case("basepri", 0x11)
3815          .Case("basepri_max", 0x12)
3816          .Case("faultmask", 0x13)
3817          .Case("control", 0x14)
3818          .Case("msplim", 0x0a)
3819          .Case("psplim", 0x0b)
3820          .Case("sp", 0x18)
3821          .Default(-1);
3822}
3823
3824// The flags here are common to those allowed for apsr in the A class cores and
3825// those allowed for the special registers in the M class cores. Returns a
3826// value representing which flags were present, -1 if invalid.
3827static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3828  if (Flags.empty())
3829    return 0x2 | (int)hasDSP;
3830
3831  return StringSwitch<int>(Flags)
3832          .Case("g", 0x1)
3833          .Case("nzcvq", 0x2)
3834          .Case("nzcvqg", 0x3)
3835          .Default(-1);
3836}
3837
3838static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3839                                 const ARMSubtarget *Subtarget) {
3840  // Ensure that the register (without flags) was a valid M Class special
3841  // register.
3842  int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3843  if (SYSmvalue == -1)
3844    return -1;
3845
3846  // basepri, basepri_max and faultmask are only valid for V7m.
3847  if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3848    return -1;
3849
3850  if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3851    Flags = "";
3852    SYSmvalue |= 0x80;
3853  }
3854
3855  if (!Subtarget->has8MSecExt() &&
3856      (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3857    return -1;
3858
3859  if (!Subtarget->hasV8MMainlineOps() &&
3860      (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3861       SYSmvalue == 0x93))
3862    return -1;
3863
3864  // If it was a read then we won't be expecting flags and so at this point
3865  // we can return the mask.
3866  if (IsRead) {
3867    if (Flags.empty())
3868      return SYSmvalue;
3869    else
3870      return -1;
3871  }
3872
3873  // We know we are now handling a write so need to get the mask for the flags.
3874  int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3875
3876  // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3877  // shouldn't have flags present.
3878  if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3879    return -1;
3880
3881  // The _g and _nzcvqg versions are only valid if the DSP extension is
3882  // available.
3883  if (!Subtarget->hasDSP() && (Mask & 0x1))
3884    return -1;
3885
3886  // The register was valid so need to put the mask in the correct place
3887  // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3888  // construct the operand for the instruction node.
3889  if (SYSmvalue < 0x4)
3890    return SYSmvalue | Mask << 10;
3891
3892  return SYSmvalue;
3893}
3894
3895static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3896  // The mask operand contains the special register (R Bit) in bit 4, whether
3897  // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3898  // bits 3-0 contains the fields to be accessed in the special register, set by
3899  // the flags provided with the register.
3900  int Mask = 0;
3901  if (Reg == "apsr") {
3902    // The flags permitted for apsr are the same flags that are allowed in
3903    // M class registers. We get the flag value and then shift the flags into
3904    // the correct place to combine with the mask.
3905    Mask = getMClassFlagsMask(Flags, true);
3906    if (Mask == -1)
3907      return -1;
3908    return Mask << 2;
3909  }
3910
3911  if (Reg != "cpsr" && Reg != "spsr") {
3912    return -1;
3913  }
3914
3915  // This is the same as if the flags were "fc"
3916  if (Flags.empty() || Flags == "all")
3917    return Mask | 0x9;
3918
3919  // Inspect the supplied flags string and set the bits in the mask for
3920  // the relevant and valid flags allowed for cpsr and spsr.
3921  for (char Flag : Flags) {
3922    int FlagVal;
3923    switch (Flag) {
3924      case 'c':
3925        FlagVal = 0x1;
3926        break;
3927      case 'x':
3928        FlagVal = 0x2;
3929        break;
3930      case 's':
3931        FlagVal = 0x4;
3932        break;
3933      case 'f':
3934        FlagVal = 0x8;
3935        break;
3936      default:
3937        FlagVal = 0;
3938    }
3939
3940    // This avoids allowing strings where the same flag bit appears twice.
3941    if (!FlagVal || (Mask & FlagVal))
3942      return -1;
3943    Mask |= FlagVal;
3944  }
3945
3946  // If the register is spsr then we need to set the R bit.
3947  if (Reg == "spsr")
3948    Mask |= 0x10;
3949
3950  return Mask;
3951}
3952
3953// Lower the read_register intrinsic to ARM specific DAG nodes
3954// using the supplied metadata string to select the instruction node to use
3955// and the registers/masks to construct as operands for the node.
3956bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3957  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3958  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3959  bool IsThumb2 = Subtarget->isThumb2();
3960  SDLoc DL(N);
3961
3962  std::vector<SDValue> Ops;
3963  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3964
3965  if (!Ops.empty()) {
3966    // If the special register string was constructed of fields (as defined
3967    // in the ACLE) then need to lower to MRC node (32 bit) or
3968    // MRRC node(64 bit), we can make the distinction based on the number of
3969    // operands we have.
3970    unsigned Opcode;
3971    SmallVector<EVT, 3> ResTypes;
3972    if (Ops.size() == 5){
3973      Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3974      ResTypes.append({ MVT::i32, MVT::Other });
3975    } else {
3976      assert(Ops.size() == 3 &&
3977              "Invalid number of fields in special register string.");
3978      Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3979      ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3980    }
3981
3982    Ops.push_back(getAL(CurDAG, DL));
3983    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3984    Ops.push_back(N->getOperand(0));
3985    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3986    return true;
3987  }
3988
3989  std::string SpecialReg = RegString->getString().lower();
3990
3991  int BankedReg = getBankedRegisterMask(SpecialReg);
3992  if (BankedReg != -1) {
3993    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3994            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3995            N->getOperand(0) };
3996    ReplaceNode(
3997        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3998                                  DL, MVT::i32, MVT::Other, Ops));
3999    return true;
4000  }
4001
4002  // The VFP registers are read by creating SelectionDAG nodes with opcodes
4003  // corresponding to the register that is being read from. So we switch on the
4004  // string to find which opcode we need to use.
4005  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4006                    .Case("fpscr", ARM::VMRS)
4007                    .Case("fpexc", ARM::VMRS_FPEXC)
4008                    .Case("fpsid", ARM::VMRS_FPSID)
4009                    .Case("mvfr0", ARM::VMRS_MVFR0)
4010                    .Case("mvfr1", ARM::VMRS_MVFR1)
4011                    .Case("mvfr2", ARM::VMRS_MVFR2)
4012                    .Case("fpinst", ARM::VMRS_FPINST)
4013                    .Case("fpinst2", ARM::VMRS_FPINST2)
4014                    .Default(0);
4015
4016  // If an opcode was found then we can lower the read to a VFP instruction.
4017  if (Opcode) {
4018    if (!Subtarget->hasVFP2())
4019      return false;
4020    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4021      return false;
4022
4023    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4024            N->getOperand(0) };
4025    ReplaceNode(N,
4026                CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4027    return true;
4028  }
4029
4030  // If the target is M Class then need to validate that the register string
4031  // is an acceptable value, so check that a mask can be constructed from the
4032  // string.
4033  if (Subtarget->isMClass()) {
4034    StringRef Flags = "", Reg = SpecialReg;
4035    if (Reg.endswith("_ns")) {
4036      Flags = "ns";
4037      Reg = Reg.drop_back(3);
4038    }
4039
4040    int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4041    if (SYSmValue == -1)
4042      return false;
4043
4044    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4045                      getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4046                      N->getOperand(0) };
4047    ReplaceNode(
4048        N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4049    return true;
4050  }
4051
4052  // Here we know the target is not M Class so we need to check if it is one
4053  // of the remaining possible values which are apsr, cpsr or spsr.
4054  if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4055    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4056            N->getOperand(0) };
4057    ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4058                                          DL, MVT::i32, MVT::Other, Ops));
4059    return true;
4060  }
4061
4062  if (SpecialReg == "spsr") {
4063    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4064            N->getOperand(0) };
4065    ReplaceNode(
4066        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4067                                  MVT::i32, MVT::Other, Ops));
4068    return true;
4069  }
4070
4071  return false;
4072}
4073
4074// Lower the write_register intrinsic to ARM specific DAG nodes
4075// using the supplied metadata string to select the instruction node to use
4076// and the registers/masks to use in the nodes
4077bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4078  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4079  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4080  bool IsThumb2 = Subtarget->isThumb2();
4081  SDLoc DL(N);
4082
4083  std::vector<SDValue> Ops;
4084  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4085
4086  if (!Ops.empty()) {
4087    // If the special register string was constructed of fields (as defined
4088    // in the ACLE) then need to lower to MCR node (32 bit) or
4089    // MCRR node(64 bit), we can make the distinction based on the number of
4090    // operands we have.
4091    unsigned Opcode;
4092    if (Ops.size() == 5) {
4093      Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4094      Ops.insert(Ops.begin()+2, N->getOperand(2));
4095    } else {
4096      assert(Ops.size() == 3 &&
4097              "Invalid number of fields in special register string.");
4098      Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4099      SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4100      Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4101    }
4102
4103    Ops.push_back(getAL(CurDAG, DL));
4104    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4105    Ops.push_back(N->getOperand(0));
4106
4107    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4108    return true;
4109  }
4110
4111  std::string SpecialReg = RegString->getString().lower();
4112  int BankedReg = getBankedRegisterMask(SpecialReg);
4113  if (BankedReg != -1) {
4114    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4115            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4116            N->getOperand(0) };
4117    ReplaceNode(
4118        N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4119                                  DL, MVT::Other, Ops));
4120    return true;
4121  }
4122
4123  // The VFP registers are written to by creating SelectionDAG nodes with
4124  // opcodes corresponding to the register that is being written. So we switch
4125  // on the string to find which opcode we need to use.
4126  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4127                    .Case("fpscr", ARM::VMSR)
4128                    .Case("fpexc", ARM::VMSR_FPEXC)
4129                    .Case("fpsid", ARM::VMSR_FPSID)
4130                    .Case("fpinst", ARM::VMSR_FPINST)
4131                    .Case("fpinst2", ARM::VMSR_FPINST2)
4132                    .Default(0);
4133
4134  if (Opcode) {
4135    if (!Subtarget->hasVFP2())
4136      return false;
4137    Ops = { N->getOperand(2), getAL(CurDAG, DL),
4138            CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4139    ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4140    return true;
4141  }
4142
4143  std::pair<StringRef, StringRef> Fields;
4144  Fields = StringRef(SpecialReg).rsplit('_');
4145  std::string Reg = Fields.first.str();
4146  StringRef Flags = Fields.second;
4147
4148  // If the target was M Class then need to validate the special register value
4149  // and retrieve the mask for use in the instruction node.
4150  if (Subtarget->isMClass()) {
4151    // basepri_max gets split so need to correct Reg and Flags.
4152    if (SpecialReg == "basepri_max") {
4153      Reg = SpecialReg;
4154      Flags = "";
4155    }
4156    int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4157    if (SYSmValue == -1)
4158      return false;
4159
4160    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4161                      N->getOperand(2), getAL(CurDAG, DL),
4162                      CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4163    ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4164    return true;
4165  }
4166
4167  // We then check to see if a valid mask can be constructed for one of the
4168  // register string values permitted for the A and R class cores. These values
4169  // are apsr, spsr and cpsr; these are also valid on older cores.
4170  int Mask = getARClassRegisterMask(Reg, Flags);
4171  if (Mask != -1) {
4172    Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4173            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4174            N->getOperand(0) };
4175    ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4176                                          DL, MVT::Other, Ops));
4177    return true;
4178  }
4179
4180  return false;
4181}
4182
4183bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4184  std::vector<SDValue> AsmNodeOperands;
4185  unsigned Flag, Kind;
4186  bool Changed = false;
4187  unsigned NumOps = N->getNumOperands();
4188
4189  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4190  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4191  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4192  // respectively. Since there is no constraint to explicitly specify a
4193  // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4194  // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4195  // them into a GPRPair.
4196
4197  SDLoc dl(N);
4198  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4199                                   : SDValue(nullptr,0);
4200
4201  SmallVector<bool, 8> OpChanged;
4202  // Glue node will be appended late.
4203  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4204    SDValue op = N->getOperand(i);
4205    AsmNodeOperands.push_back(op);
4206
4207    if (i < InlineAsm::Op_FirstOperand)
4208      continue;
4209
4210    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4211      Flag = C->getZExtValue();
4212      Kind = InlineAsm::getKind(Flag);
4213    }
4214    else
4215      continue;
4216
4217    // Immediate operands to inline asm in the SelectionDAG are modeled with
4218    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4219    // the second is a constant with the value of the immediate. If we get here
4220    // and we have a Kind_Imm, skip the next operand, and continue.
4221    if (Kind == InlineAsm::Kind_Imm) {
4222      SDValue op = N->getOperand(++i);
4223      AsmNodeOperands.push_back(op);
4224      continue;
4225    }
4226
4227    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4228    if (NumRegs)
4229      OpChanged.push_back(false);
4230
4231    unsigned DefIdx = 0;
4232    bool IsTiedToChangedOp = false;
4233    // If it's a use that is tied with a previous def, it has no
4234    // reg class constraint.
4235    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4236      IsTiedToChangedOp = OpChanged[DefIdx];
4237
4238    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4239        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4240      continue;
4241
4242    unsigned RC;
4243    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4244    if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4245        || NumRegs != 2)
4246      continue;
4247
4248    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4249    SDValue V0 = N->getOperand(i+1);
4250    SDValue V1 = N->getOperand(i+2);
4251    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4252    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4253    SDValue PairedReg;
4254    MachineRegisterInfo &MRI = MF->getRegInfo();
4255
4256    if (Kind == InlineAsm::Kind_RegDef ||
4257        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4258      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4259      // the original GPRs.
4260
4261      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4262      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4263      SDValue Chain = SDValue(N,0);
4264
4265      SDNode *GU = N->getGluedUser();
4266      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4267                                               Chain.getValue(1));
4268
4269      // Extract values from a GPRPair reg and copy to the original GPR reg.
4270      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4271                                                    RegCopy);
4272      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4273                                                    RegCopy);
4274      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4275                                        RegCopy.getValue(1));
4276      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4277
4278      // Update the original glue user.
4279      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4280      Ops.push_back(T1.getValue(1));
4281      CurDAG->UpdateNodeOperands(GU, Ops);
4282    }
4283    else {
4284      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4285      // GPRPair and then pass the GPRPair to the inline asm.
4286      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4287
4288      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4289      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4290                                          Chain.getValue(1));
4291      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4292                                          T0.getValue(1));
4293      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4294
4295      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4296      // i32 VRs of inline asm with it.
4297      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4298      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4299      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4300
4301      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4302      Glue = Chain.getValue(1);
4303    }
4304
4305    Changed = true;
4306
4307    if(PairedReg.getNode()) {
4308      OpChanged[OpChanged.size() -1 ] = true;
4309      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4310      if (IsTiedToChangedOp)
4311        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4312      else
4313        Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4314      // Replace the current flag.
4315      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4316          Flag, dl, MVT::i32);
4317      // Add the new register node and skip the original two GPRs.
4318      AsmNodeOperands.push_back(PairedReg);
4319      // Skip the next two GPRs.
4320      i += 2;
4321    }
4322  }
4323
4324  if (Glue.getNode())
4325    AsmNodeOperands.push_back(Glue);
4326  if (!Changed)
4327    return false;
4328
4329  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4330      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4331  New->setNodeId(-1);
4332  ReplaceNode(N, New.getNode());
4333  return true;
4334}
4335
4336
4337bool ARMDAGToDAGISel::
4338SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4339                             std::vector<SDValue> &OutOps) {
4340  switch(ConstraintID) {
4341  default:
4342    llvm_unreachable("Unexpected asm memory constraint");
4343  case InlineAsm::Constraint_i:
4344    // FIXME: It seems strange that 'i' is needed here since it's supposed to
4345    //        be an immediate and not a memory constraint.
4346    // Fallthrough.
4347  case InlineAsm::Constraint_m:
4348  case InlineAsm::Constraint_o:
4349  case InlineAsm::Constraint_Q:
4350  case InlineAsm::Constraint_Um:
4351  case InlineAsm::Constraint_Un:
4352  case InlineAsm::Constraint_Uq:
4353  case InlineAsm::Constraint_Us:
4354  case InlineAsm::Constraint_Ut:
4355  case InlineAsm::Constraint_Uv:
4356  case InlineAsm::Constraint_Uy:
4357    // Require the address to be in a register.  That is safe for all ARM
4358    // variants and it is hard to do anything much smarter without knowing
4359    // how the operand is used.
4360    OutOps.push_back(Op);
4361    return false;
4362  }
4363  return true;
4364}
4365
4366/// createARMISelDag - This pass converts a legalized DAG into a
4367/// ARM-specific DAG, ready for instruction scheduling.
4368///
4369FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4370                                     CodeGenOpt::Level OptLevel) {
4371  return new ARMDAGToDAGISel(TM, OptLevel);
4372}
4373