1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the ARM target.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "arm-isel"
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMTargetMachine.h"
18#include "MCTargetDesc/ARMAddressingModes.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/SelectionDAGISel.h"
25#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Compiler.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/Target/TargetLowering.h"
37#include "llvm/Target/TargetOptions.h"
38
39using namespace llvm;
40
41static cl::opt<bool>
42DisableShifterOp("disable-shifter-op", cl::Hidden,
43  cl::desc("Disable isel of shifter-op"),
44  cl::init(false));
45
46static cl::opt<bool>
47CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
48  cl::desc("Check fp vmla / vmls hazard at isel time"),
49  cl::init(true));
50
51//===--------------------------------------------------------------------===//
52/// ARMDAGToDAGISel - ARM specific code to select ARM machine
53/// instructions for SelectionDAG operations.
54///
55namespace {
56
57enum AddrMode2Type {
58  AM2_BASE, // Simple AM2 (+-imm12)
59  AM2_SHOP  // Shifter-op AM2
60};
61
62class ARMDAGToDAGISel : public SelectionDAGISel {
63  ARMBaseTargetMachine &TM;
64
65  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
66  /// make the right decision when generating code for different targets.
67  const ARMSubtarget *Subtarget;
68
69public:
70  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
71                           CodeGenOpt::Level OptLevel)
72    : SelectionDAGISel(tm, OptLevel), TM(tm),
73      Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
74  }
75
76  virtual const char *getPassName() const {
77    return "ARM Instruction Selection";
78  }
79
80  virtual void PreprocessISelDAG();
81
82  /// getI32Imm - Return a target constant of type i32 with the specified
83  /// value.
84  inline SDValue getI32Imm(unsigned Imm) {
85    return CurDAG->getTargetConstant(Imm, MVT::i32);
86  }
87
88  SDNode *Select(SDNode *N);
89
90
91  bool hasNoVMLxHazardUse(SDNode *N) const;
92  bool isShifterOpProfitable(const SDValue &Shift,
93                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
94  bool SelectRegShifterOperand(SDValue N, SDValue &A,
95                               SDValue &B, SDValue &C,
96                               bool CheckProfitability = true);
97  bool SelectImmShifterOperand(SDValue N, SDValue &A,
98                               SDValue &B, bool CheckProfitability = true);
99  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
100                                    SDValue &B, SDValue &C) {
101    // Don't apply the profitability check
102    return SelectRegShifterOperand(N, A, B, C, false);
103  }
104  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
105                                    SDValue &B) {
106    // Don't apply the profitability check
107    return SelectImmShifterOperand(N, A, B, false);
108  }
109
110  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
111  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
112
113  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
114                                      SDValue &Offset, SDValue &Opc);
115  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
116                           SDValue &Opc) {
117    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
118  }
119
120  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
121                           SDValue &Opc) {
122    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
123  }
124
125  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
126                       SDValue &Opc) {
127    SelectAddrMode2Worker(N, Base, Offset, Opc);
128//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
129    // This always matches one way or another.
130    return true;
131  }
132
133  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
134                             SDValue &Offset, SDValue &Opc);
135  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
136                             SDValue &Offset, SDValue &Opc);
137  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
138                             SDValue &Offset, SDValue &Opc);
139  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
140  bool SelectAddrMode3(SDValue N, SDValue &Base,
141                       SDValue &Offset, SDValue &Opc);
142  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
143                             SDValue &Offset, SDValue &Opc);
144  bool SelectAddrMode5(SDValue N, SDValue &Base,
145                       SDValue &Offset);
146  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
147  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
148
149  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
150
151  // Thumb Addressing Modes:
152  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
153  bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
154                             unsigned Scale);
155  bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
156  bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
157  bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
158  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
159                                SDValue &OffImm);
160  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
161                                 SDValue &OffImm);
162  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
163                                 SDValue &OffImm);
164  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
165                                 SDValue &OffImm);
166  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
167
168  // Thumb 2 Addressing Modes:
169  bool SelectT2ShifterOperandReg(SDValue N,
170                                 SDValue &BaseReg, SDValue &Opc);
171  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
172  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
173                            SDValue &OffImm);
174  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
175                                 SDValue &OffImm);
176  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
177                             SDValue &OffReg, SDValue &ShImm);
178  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
179
180  inline bool is_so_imm(unsigned Imm) const {
181    return ARM_AM::getSOImmVal(Imm) != -1;
182  }
183
184  inline bool is_so_imm_not(unsigned Imm) const {
185    return ARM_AM::getSOImmVal(~Imm) != -1;
186  }
187
188  inline bool is_t2_so_imm(unsigned Imm) const {
189    return ARM_AM::getT2SOImmVal(Imm) != -1;
190  }
191
192  inline bool is_t2_so_imm_not(unsigned Imm) const {
193    return ARM_AM::getT2SOImmVal(~Imm) != -1;
194  }
195
196  // Include the pieces autogenerated from the target description.
197#include "ARMGenDAGISel.inc"
198
199private:
200  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
201  /// ARM.
202  SDNode *SelectARMIndexedLoad(SDNode *N);
203  SDNode *SelectT2IndexedLoad(SDNode *N);
204
205  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
206  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
207  /// loads of D registers and even subregs and odd subregs of Q registers.
208  /// For NumVecs <= 2, QOpcodes1 is not used.
209  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
210                    const uint16_t *DOpcodes,
211                    const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
212
213  /// SelectVST - Select NEON store intrinsics.  NumVecs should
214  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
215  /// stores of D registers and even subregs and odd subregs of Q registers.
216  /// For NumVecs <= 2, QOpcodes1 is not used.
217  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
218                    const uint16_t *DOpcodes,
219                    const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
220
221  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
222  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
223  /// load/store of D registers and Q registers.
224  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
225                          bool isUpdating, unsigned NumVecs,
226                          const uint16_t *DOpcodes, const uint16_t *QOpcodes);
227
228  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
229  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
230  /// for loading D registers.  (Q registers are not supported.)
231  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
232                       const uint16_t *Opcodes);
233
234  /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
235  /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
236  /// generated to force the table registers to be consecutive.
237  SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
238
239  /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
240  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
241
242  /// SelectCMOVOp - Select CMOV instructions for ARM.
243  SDNode *SelectCMOVOp(SDNode *N);
244  SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
245                              ARMCC::CondCodes CCVal, SDValue CCR,
246                              SDValue InFlag);
247  SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
248                               ARMCC::CondCodes CCVal, SDValue CCR,
249                               SDValue InFlag);
250  SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
251                              ARMCC::CondCodes CCVal, SDValue CCR,
252                              SDValue InFlag);
253  SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
254                               ARMCC::CondCodes CCVal, SDValue CCR,
255                               SDValue InFlag);
256
257  // Select special operations if node forms integer ABS pattern
258  SDNode *SelectABSOp(SDNode *N);
259
260  SDNode *SelectInlineAsm(SDNode *N);
261
262  SDNode *SelectConcatVector(SDNode *N);
263
264  SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
265
266  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
267  /// inline asm expressions.
268  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
269                                            char ConstraintCode,
270                                            std::vector<SDValue> &OutOps);
271
272  // Form pairs of consecutive R, S, D, or Q registers.
273  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
274  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
275  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
276  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
277
278  // Form sequences of 4 consecutive S, D, or Q registers.
279  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
280  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
281  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
282
283  // Get the alignment operand for a NEON VLD or VST instruction.
284  SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
285};
286}
287
288/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
289/// operand. If so Imm will receive the 32-bit value.
290static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
291  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
292    Imm = cast<ConstantSDNode>(N)->getZExtValue();
293    return true;
294  }
295  return false;
296}
297
298// isInt32Immediate - This method tests to see if a constant operand.
299// If so Imm will receive the 32 bit value.
300static bool isInt32Immediate(SDValue N, unsigned &Imm) {
301  return isInt32Immediate(N.getNode(), Imm);
302}
303
304// isOpcWithIntImmediate - This method tests to see if the node is a specific
305// opcode and that it has a immediate integer right operand.
306// If so Imm will receive the 32 bit value.
307static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
308  return N->getOpcode() == Opc &&
309         isInt32Immediate(N->getOperand(1).getNode(), Imm);
310}
311
312/// \brief Check whether a particular node is a constant value representable as
313/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
314///
315/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
316static bool isScaledConstantInRange(SDValue Node, int Scale,
317                                    int RangeMin, int RangeMax,
318                                    int &ScaledConstant) {
319  assert(Scale > 0 && "Invalid scale!");
320
321  // Check that this is a constant.
322  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
323  if (!C)
324    return false;
325
326  ScaledConstant = (int) C->getZExtValue();
327  if ((ScaledConstant % Scale) != 0)
328    return false;
329
330  ScaledConstant /= Scale;
331  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
332}
333
334void ARMDAGToDAGISel::PreprocessISelDAG() {
335  if (!Subtarget->hasV6T2Ops())
336    return;
337
338  bool isThumb2 = Subtarget->isThumb();
339  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
340       E = CurDAG->allnodes_end(); I != E; ) {
341    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
342
343    if (N->getOpcode() != ISD::ADD)
344      continue;
345
346    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
347    // leading zeros, followed by consecutive set bits, followed by 1 or 2
348    // trailing zeros, e.g. 1020.
349    // Transform the expression to
350    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
351    // of trailing zeros of c2. The left shift would be folded as an shifter
352    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
353    // node (UBFX).
354
355    SDValue N0 = N->getOperand(0);
356    SDValue N1 = N->getOperand(1);
357    unsigned And_imm = 0;
358    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
359      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
360        std::swap(N0, N1);
361    }
362    if (!And_imm)
363      continue;
364
365    // Check if the AND mask is an immediate of the form: 000.....1111111100
366    unsigned TZ = countTrailingZeros(And_imm);
367    if (TZ != 1 && TZ != 2)
368      // Be conservative here. Shifter operands aren't always free. e.g. On
369      // Swift, left shifter operand of 1 / 2 for free but others are not.
370      // e.g.
371      //  ubfx   r3, r1, #16, #8
372      //  ldr.w  r3, [r0, r3, lsl #2]
373      // vs.
374      //  mov.w  r9, #1020
375      //  and.w  r2, r9, r1, lsr #14
376      //  ldr    r2, [r0, r2]
377      continue;
378    And_imm >>= TZ;
379    if (And_imm & (And_imm + 1))
380      continue;
381
382    // Look for (and (srl X, c1), c2).
383    SDValue Srl = N1.getOperand(0);
384    unsigned Srl_imm = 0;
385    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
386        (Srl_imm <= 2))
387      continue;
388
389    // Make sure first operand is not a shifter operand which would prevent
390    // folding of the left shift.
391    SDValue CPTmp0;
392    SDValue CPTmp1;
393    SDValue CPTmp2;
394    if (isThumb2) {
395      if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
396        continue;
397    } else {
398      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
399          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
400        continue;
401    }
402
403    // Now make the transformation.
404    Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
405                          Srl.getOperand(0),
406                          CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
407    N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
408                         Srl, CurDAG->getConstant(And_imm, MVT::i32));
409    N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
410                         N1, CurDAG->getConstant(TZ, MVT::i32));
411    CurDAG->UpdateNodeOperands(N, N0, N1);
412  }
413}
414
415/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
416/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
417/// least on current ARM implementations) which should be avoidded.
418bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
419  if (OptLevel == CodeGenOpt::None)
420    return true;
421
422  if (!CheckVMLxHazard)
423    return true;
424
425  if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() &&
426      !Subtarget->isSwift())
427    return true;
428
429  if (!N->hasOneUse())
430    return false;
431
432  SDNode *Use = *N->use_begin();
433  if (Use->getOpcode() == ISD::CopyToReg)
434    return true;
435  if (Use->isMachineOpcode()) {
436    const ARMBaseInstrInfo *TII =
437      static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
438
439    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
440    if (MCID.mayStore())
441      return true;
442    unsigned Opcode = MCID.getOpcode();
443    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
444      return true;
445    // vmlx feeding into another vmlx. We actually want to unfold
446    // the use later in the MLxExpansion pass. e.g.
447    // vmla
448    // vmla (stall 8 cycles)
449    //
450    // vmul (5 cycles)
451    // vadd (5 cycles)
452    // vmla
453    // This adds up to about 18 - 19 cycles.
454    //
455    // vmla
456    // vmul (stall 4 cycles)
457    // vadd adds up to about 14 cycles.
458    return TII->isFpMLxInstruction(Opcode);
459  }
460
461  return false;
462}
463
464bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
465                                            ARM_AM::ShiftOpc ShOpcVal,
466                                            unsigned ShAmt) {
467  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
468    return true;
469  if (Shift.hasOneUse())
470    return true;
471  // R << 2 is free.
472  return ShOpcVal == ARM_AM::lsl &&
473         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
474}
475
476bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
477                                              SDValue &BaseReg,
478                                              SDValue &Opc,
479                                              bool CheckProfitability) {
480  if (DisableShifterOp)
481    return false;
482
483  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
484
485  // Don't match base register only case. That is matched to a separate
486  // lower complexity pattern with explicit register operand.
487  if (ShOpcVal == ARM_AM::no_shift) return false;
488
489  BaseReg = N.getOperand(0);
490  unsigned ShImmVal = 0;
491  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
492  if (!RHS) return false;
493  ShImmVal = RHS->getZExtValue() & 31;
494  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
495                                  MVT::i32);
496  return true;
497}
498
499bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
500                                              SDValue &BaseReg,
501                                              SDValue &ShReg,
502                                              SDValue &Opc,
503                                              bool CheckProfitability) {
504  if (DisableShifterOp)
505    return false;
506
507  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
508
509  // Don't match base register only case. That is matched to a separate
510  // lower complexity pattern with explicit register operand.
511  if (ShOpcVal == ARM_AM::no_shift) return false;
512
513  BaseReg = N.getOperand(0);
514  unsigned ShImmVal = 0;
515  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
516  if (RHS) return false;
517
518  ShReg = N.getOperand(1);
519  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
520    return false;
521  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
522                                  MVT::i32);
523  return true;
524}
525
526
527bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
528                                          SDValue &Base,
529                                          SDValue &OffImm) {
530  // Match simple R + imm12 operands.
531
532  // Base only.
533  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
534      !CurDAG->isBaseWithConstantOffset(N)) {
535    if (N.getOpcode() == ISD::FrameIndex) {
536      // Match frame index.
537      int FI = cast<FrameIndexSDNode>(N)->getIndex();
538      Base = CurDAG->getTargetFrameIndex(FI,
539                                         getTargetLowering()->getPointerTy());
540      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
541      return true;
542    }
543
544    if (N.getOpcode() == ARMISD::Wrapper &&
545        !(Subtarget->useMovt() &&
546                     N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
547      Base = N.getOperand(0);
548    } else
549      Base = N;
550    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
551    return true;
552  }
553
554  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
555    int RHSC = (int)RHS->getZExtValue();
556    if (N.getOpcode() == ISD::SUB)
557      RHSC = -RHSC;
558
559    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
560      Base   = N.getOperand(0);
561      if (Base.getOpcode() == ISD::FrameIndex) {
562        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
563        Base = CurDAG->getTargetFrameIndex(FI,
564                                           getTargetLowering()->getPointerTy());
565      }
566      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
567      return true;
568    }
569  }
570
571  // Base only.
572  Base = N;
573  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
574  return true;
575}
576
577
578
579bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
580                                      SDValue &Opc) {
581  if (N.getOpcode() == ISD::MUL &&
582      ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
583    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
584      // X * [3,5,9] -> X + X * [2,4,8] etc.
585      int RHSC = (int)RHS->getZExtValue();
586      if (RHSC & 1) {
587        RHSC = RHSC & ~1;
588        ARM_AM::AddrOpc AddSub = ARM_AM::add;
589        if (RHSC < 0) {
590          AddSub = ARM_AM::sub;
591          RHSC = - RHSC;
592        }
593        if (isPowerOf2_32(RHSC)) {
594          unsigned ShAmt = Log2_32(RHSC);
595          Base = Offset = N.getOperand(0);
596          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
597                                                            ARM_AM::lsl),
598                                          MVT::i32);
599          return true;
600        }
601      }
602    }
603  }
604
605  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
606      // ISD::OR that is equivalent to an ISD::ADD.
607      !CurDAG->isBaseWithConstantOffset(N))
608    return false;
609
610  // Leave simple R +/- imm12 operands for LDRi12
611  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
612    int RHSC;
613    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
614                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
615      return false;
616  }
617
618  // Otherwise this is R +/- [possibly shifted] R.
619  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
620  ARM_AM::ShiftOpc ShOpcVal =
621    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
622  unsigned ShAmt = 0;
623
624  Base   = N.getOperand(0);
625  Offset = N.getOperand(1);
626
627  if (ShOpcVal != ARM_AM::no_shift) {
628    // Check to see if the RHS of the shift is a constant, if not, we can't fold
629    // it.
630    if (ConstantSDNode *Sh =
631           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
632      ShAmt = Sh->getZExtValue();
633      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
634        Offset = N.getOperand(1).getOperand(0);
635      else {
636        ShAmt = 0;
637        ShOpcVal = ARM_AM::no_shift;
638      }
639    } else {
640      ShOpcVal = ARM_AM::no_shift;
641    }
642  }
643
644  // Try matching (R shl C) + (R).
645  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
646      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
647        N.getOperand(0).hasOneUse())) {
648    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
649    if (ShOpcVal != ARM_AM::no_shift) {
650      // Check to see if the RHS of the shift is a constant, if not, we can't
651      // fold it.
652      if (ConstantSDNode *Sh =
653          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
654        ShAmt = Sh->getZExtValue();
655        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
656          Offset = N.getOperand(0).getOperand(0);
657          Base = N.getOperand(1);
658        } else {
659          ShAmt = 0;
660          ShOpcVal = ARM_AM::no_shift;
661        }
662      } else {
663        ShOpcVal = ARM_AM::no_shift;
664      }
665    }
666  }
667
668  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
669                                  MVT::i32);
670  return true;
671}
672
673
674//-----
675
676AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
677                                                     SDValue &Base,
678                                                     SDValue &Offset,
679                                                     SDValue &Opc) {
680  if (N.getOpcode() == ISD::MUL &&
681      (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
682    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
683      // X * [3,5,9] -> X + X * [2,4,8] etc.
684      int RHSC = (int)RHS->getZExtValue();
685      if (RHSC & 1) {
686        RHSC = RHSC & ~1;
687        ARM_AM::AddrOpc AddSub = ARM_AM::add;
688        if (RHSC < 0) {
689          AddSub = ARM_AM::sub;
690          RHSC = - RHSC;
691        }
692        if (isPowerOf2_32(RHSC)) {
693          unsigned ShAmt = Log2_32(RHSC);
694          Base = Offset = N.getOperand(0);
695          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
696                                                            ARM_AM::lsl),
697                                          MVT::i32);
698          return AM2_SHOP;
699        }
700      }
701    }
702  }
703
704  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
705      // ISD::OR that is equivalent to an ADD.
706      !CurDAG->isBaseWithConstantOffset(N)) {
707    Base = N;
708    if (N.getOpcode() == ISD::FrameIndex) {
709      int FI = cast<FrameIndexSDNode>(N)->getIndex();
710      Base = CurDAG->getTargetFrameIndex(FI,
711                                         getTargetLowering()->getPointerTy());
712    } else if (N.getOpcode() == ARMISD::Wrapper &&
713               !(Subtarget->useMovt() &&
714                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
715      Base = N.getOperand(0);
716    }
717    Offset = CurDAG->getRegister(0, MVT::i32);
718    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
719                                                      ARM_AM::no_shift),
720                                    MVT::i32);
721    return AM2_BASE;
722  }
723
724  // Match simple R +/- imm12 operands.
725  if (N.getOpcode() != ISD::SUB) {
726    int RHSC;
727    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
728                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
729      Base = N.getOperand(0);
730      if (Base.getOpcode() == ISD::FrameIndex) {
731        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
732        Base = CurDAG->getTargetFrameIndex(FI,
733                                           getTargetLowering()->getPointerTy());
734      }
735      Offset = CurDAG->getRegister(0, MVT::i32);
736
737      ARM_AM::AddrOpc AddSub = ARM_AM::add;
738      if (RHSC < 0) {
739        AddSub = ARM_AM::sub;
740        RHSC = - RHSC;
741      }
742      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
743                                                        ARM_AM::no_shift),
744                                      MVT::i32);
745      return AM2_BASE;
746    }
747  }
748
749  if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
750    // Compute R +/- (R << N) and reuse it.
751    Base = N;
752    Offset = CurDAG->getRegister(0, MVT::i32);
753    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
754                                                      ARM_AM::no_shift),
755                                    MVT::i32);
756    return AM2_BASE;
757  }
758
759  // Otherwise this is R +/- [possibly shifted] R.
760  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
761  ARM_AM::ShiftOpc ShOpcVal =
762    ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
763  unsigned ShAmt = 0;
764
765  Base   = N.getOperand(0);
766  Offset = N.getOperand(1);
767
768  if (ShOpcVal != ARM_AM::no_shift) {
769    // Check to see if the RHS of the shift is a constant, if not, we can't fold
770    // it.
771    if (ConstantSDNode *Sh =
772           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
773      ShAmt = Sh->getZExtValue();
774      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
775        Offset = N.getOperand(1).getOperand(0);
776      else {
777        ShAmt = 0;
778        ShOpcVal = ARM_AM::no_shift;
779      }
780    } else {
781      ShOpcVal = ARM_AM::no_shift;
782    }
783  }
784
785  // Try matching (R shl C) + (R).
786  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
787      !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
788        N.getOperand(0).hasOneUse())) {
789    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
790    if (ShOpcVal != ARM_AM::no_shift) {
791      // Check to see if the RHS of the shift is a constant, if not, we can't
792      // fold it.
793      if (ConstantSDNode *Sh =
794          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
795        ShAmt = Sh->getZExtValue();
796        if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
797          Offset = N.getOperand(0).getOperand(0);
798          Base = N.getOperand(1);
799        } else {
800          ShAmt = 0;
801          ShOpcVal = ARM_AM::no_shift;
802        }
803      } else {
804        ShOpcVal = ARM_AM::no_shift;
805      }
806    }
807  }
808
809  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
810                                  MVT::i32);
811  return AM2_SHOP;
812}
813
814bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
815                                            SDValue &Offset, SDValue &Opc) {
816  unsigned Opcode = Op->getOpcode();
817  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
818    ? cast<LoadSDNode>(Op)->getAddressingMode()
819    : cast<StoreSDNode>(Op)->getAddressingMode();
820  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
821    ? ARM_AM::add : ARM_AM::sub;
822  int Val;
823  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
824    return false;
825
826  Offset = N;
827  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
828  unsigned ShAmt = 0;
829  if (ShOpcVal != ARM_AM::no_shift) {
830    // Check to see if the RHS of the shift is a constant, if not, we can't fold
831    // it.
832    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
833      ShAmt = Sh->getZExtValue();
834      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
835        Offset = N.getOperand(0);
836      else {
837        ShAmt = 0;
838        ShOpcVal = ARM_AM::no_shift;
839      }
840    } else {
841      ShOpcVal = ARM_AM::no_shift;
842    }
843  }
844
845  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
846                                  MVT::i32);
847  return true;
848}
849
850bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
851                                            SDValue &Offset, SDValue &Opc) {
852  unsigned Opcode = Op->getOpcode();
853  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
854    ? cast<LoadSDNode>(Op)->getAddressingMode()
855    : cast<StoreSDNode>(Op)->getAddressingMode();
856  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
857    ? ARM_AM::add : ARM_AM::sub;
858  int Val;
859  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
860    if (AddSub == ARM_AM::sub) Val *= -1;
861    Offset = CurDAG->getRegister(0, MVT::i32);
862    Opc = CurDAG->getTargetConstant(Val, MVT::i32);
863    return true;
864  }
865
866  return false;
867}
868
869
870bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
871                                            SDValue &Offset, SDValue &Opc) {
872  unsigned Opcode = Op->getOpcode();
873  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
874    ? cast<LoadSDNode>(Op)->getAddressingMode()
875    : cast<StoreSDNode>(Op)->getAddressingMode();
876  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
877    ? ARM_AM::add : ARM_AM::sub;
878  int Val;
879  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
880    Offset = CurDAG->getRegister(0, MVT::i32);
881    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
882                                                      ARM_AM::no_shift),
883                                    MVT::i32);
884    return true;
885  }
886
887  return false;
888}
889
890bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
891  Base = N;
892  return true;
893}
894
895bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
896                                      SDValue &Base, SDValue &Offset,
897                                      SDValue &Opc) {
898  if (N.getOpcode() == ISD::SUB) {
899    // X - C  is canonicalize to X + -C, no need to handle it here.
900    Base = N.getOperand(0);
901    Offset = N.getOperand(1);
902    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
903    return true;
904  }
905
906  if (!CurDAG->isBaseWithConstantOffset(N)) {
907    Base = N;
908    if (N.getOpcode() == ISD::FrameIndex) {
909      int FI = cast<FrameIndexSDNode>(N)->getIndex();
910      Base = CurDAG->getTargetFrameIndex(FI,
911                                         getTargetLowering()->getPointerTy());
912    }
913    Offset = CurDAG->getRegister(0, MVT::i32);
914    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
915    return true;
916  }
917
918  // If the RHS is +/- imm8, fold into addr mode.
919  int RHSC;
920  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
921                              -256 + 1, 256, RHSC)) { // 8 bits.
922    Base = N.getOperand(0);
923    if (Base.getOpcode() == ISD::FrameIndex) {
924      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
925      Base = CurDAG->getTargetFrameIndex(FI,
926                                         getTargetLowering()->getPointerTy());
927    }
928    Offset = CurDAG->getRegister(0, MVT::i32);
929
930    ARM_AM::AddrOpc AddSub = ARM_AM::add;
931    if (RHSC < 0) {
932      AddSub = ARM_AM::sub;
933      RHSC = -RHSC;
934    }
935    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
936    return true;
937  }
938
939  Base = N.getOperand(0);
940  Offset = N.getOperand(1);
941  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
942  return true;
943}
944
945bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
946                                            SDValue &Offset, SDValue &Opc) {
947  unsigned Opcode = Op->getOpcode();
948  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
949    ? cast<LoadSDNode>(Op)->getAddressingMode()
950    : cast<StoreSDNode>(Op)->getAddressingMode();
951  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
952    ? ARM_AM::add : ARM_AM::sub;
953  int Val;
954  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
955    Offset = CurDAG->getRegister(0, MVT::i32);
956    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
957    return true;
958  }
959
960  Offset = N;
961  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
962  return true;
963}
964
965bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
966                                      SDValue &Base, SDValue &Offset) {
967  if (!CurDAG->isBaseWithConstantOffset(N)) {
968    Base = N;
969    if (N.getOpcode() == ISD::FrameIndex) {
970      int FI = cast<FrameIndexSDNode>(N)->getIndex();
971      Base = CurDAG->getTargetFrameIndex(FI,
972                                         getTargetLowering()->getPointerTy());
973    } else if (N.getOpcode() == ARMISD::Wrapper &&
974               !(Subtarget->useMovt() &&
975                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
976      Base = N.getOperand(0);
977    }
978    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
979                                       MVT::i32);
980    return true;
981  }
982
983  // If the RHS is +/- imm8, fold into addr mode.
984  int RHSC;
985  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
986                              -256 + 1, 256, RHSC)) {
987    Base = N.getOperand(0);
988    if (Base.getOpcode() == ISD::FrameIndex) {
989      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
990      Base = CurDAG->getTargetFrameIndex(FI,
991                                         getTargetLowering()->getPointerTy());
992    }
993
994    ARM_AM::AddrOpc AddSub = ARM_AM::add;
995    if (RHSC < 0) {
996      AddSub = ARM_AM::sub;
997      RHSC = -RHSC;
998    }
999    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1000                                       MVT::i32);
1001    return true;
1002  }
1003
1004  Base = N;
1005  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1006                                     MVT::i32);
1007  return true;
1008}
1009
1010bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1011                                      SDValue &Align) {
1012  Addr = N;
1013
1014  unsigned Alignment = 0;
1015  if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
1016    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1017    // The maximum alignment is equal to the memory size being referenced.
1018    unsigned LSNAlign = LSN->getAlignment();
1019    unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
1020    if (LSNAlign >= MemSize && MemSize > 1)
1021      Alignment = MemSize;
1022  } else {
1023    // All other uses of addrmode6 are for intrinsics.  For now just record
1024    // the raw alignment value; it will be refined later based on the legal
1025    // alignment operands for the intrinsic.
1026    Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
1027  }
1028
1029  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
1030  return true;
1031}
1032
1033bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1034                                            SDValue &Offset) {
1035  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1036  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1037  if (AM != ISD::POST_INC)
1038    return false;
1039  Offset = N;
1040  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1041    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1042      Offset = CurDAG->getRegister(0, MVT::i32);
1043  }
1044  return true;
1045}
1046
1047bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1048                                       SDValue &Offset, SDValue &Label) {
1049  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1050    Offset = N.getOperand(0);
1051    SDValue N1 = N.getOperand(1);
1052    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1053                                      MVT::i32);
1054    return true;
1055  }
1056
1057  return false;
1058}
1059
1060
1061//===----------------------------------------------------------------------===//
1062//                         Thumb Addressing Modes
1063//===----------------------------------------------------------------------===//
1064
1065bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1066                                            SDValue &Base, SDValue &Offset){
1067  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1068    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1069    if (!NC || !NC->isNullValue())
1070      return false;
1071
1072    Base = Offset = N;
1073    return true;
1074  }
1075
1076  Base = N.getOperand(0);
1077  Offset = N.getOperand(1);
1078  return true;
1079}
1080
1081bool
1082ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
1083                                       SDValue &Offset, unsigned Scale) {
1084  if (Scale == 4) {
1085    SDValue TmpBase, TmpOffImm;
1086    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1087      return false;  // We want to select tLDRspi / tSTRspi instead.
1088
1089    if (N.getOpcode() == ARMISD::Wrapper &&
1090        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1091      return false;  // We want to select tLDRpci instead.
1092  }
1093
1094  if (!CurDAG->isBaseWithConstantOffset(N))
1095    return false;
1096
1097  // Thumb does not have [sp, r] address mode.
1098  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1099  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1100  if ((LHSR && LHSR->getReg() == ARM::SP) ||
1101      (RHSR && RHSR->getReg() == ARM::SP))
1102    return false;
1103
1104  // FIXME: Why do we explicitly check for a match here and then return false?
1105  // Presumably to allow something else to match, but shouldn't this be
1106  // documented?
1107  int RHSC;
1108  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
1109    return false;
1110
1111  Base = N.getOperand(0);
1112  Offset = N.getOperand(1);
1113  return true;
1114}
1115
1116bool
1117ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1118                                          SDValue &Base,
1119                                          SDValue &Offset) {
1120  return SelectThumbAddrModeRI(N, Base, Offset, 1);
1121}
1122
1123bool
1124ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1125                                          SDValue &Base,
1126                                          SDValue &Offset) {
1127  return SelectThumbAddrModeRI(N, Base, Offset, 2);
1128}
1129
1130bool
1131ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1132                                          SDValue &Base,
1133                                          SDValue &Offset) {
1134  return SelectThumbAddrModeRI(N, Base, Offset, 4);
1135}
1136
1137bool
1138ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1139                                          SDValue &Base, SDValue &OffImm) {
1140  if (Scale == 4) {
1141    SDValue TmpBase, TmpOffImm;
1142    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1143      return false;  // We want to select tLDRspi / tSTRspi instead.
1144
1145    if (N.getOpcode() == ARMISD::Wrapper &&
1146        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1147      return false;  // We want to select tLDRpci instead.
1148  }
1149
1150  if (!CurDAG->isBaseWithConstantOffset(N)) {
1151    if (N.getOpcode() == ARMISD::Wrapper &&
1152        !(Subtarget->useMovt() &&
1153          N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
1154      Base = N.getOperand(0);
1155    } else {
1156      Base = N;
1157    }
1158
1159    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1160    return true;
1161  }
1162
1163  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1164  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1165  if ((LHSR && LHSR->getReg() == ARM::SP) ||
1166      (RHSR && RHSR->getReg() == ARM::SP)) {
1167    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1168    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1169    unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1170    unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1171
1172    // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1173    if (LHSC != 0 || RHSC != 0) return false;
1174
1175    Base = N;
1176    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1177    return true;
1178  }
1179
1180  // If the RHS is + imm5 * scale, fold into addr mode.
1181  int RHSC;
1182  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1183    Base = N.getOperand(0);
1184    OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1185    return true;
1186  }
1187
1188  Base = N.getOperand(0);
1189  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1190  return true;
1191}
1192
1193bool
1194ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1195                                           SDValue &OffImm) {
1196  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1197}
1198
1199bool
1200ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1201                                           SDValue &OffImm) {
1202  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1203}
1204
1205bool
1206ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1207                                           SDValue &OffImm) {
1208  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1209}
1210
1211bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1212                                            SDValue &Base, SDValue &OffImm) {
1213  if (N.getOpcode() == ISD::FrameIndex) {
1214    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1215    Base = CurDAG->getTargetFrameIndex(FI,
1216                                       getTargetLowering()->getPointerTy());
1217    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1218    return true;
1219  }
1220
1221  if (!CurDAG->isBaseWithConstantOffset(N))
1222    return false;
1223
1224  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1225  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1226      (LHSR && LHSR->getReg() == ARM::SP)) {
1227    // If the RHS is + imm8 * scale, fold into addr mode.
1228    int RHSC;
1229    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1230      Base = N.getOperand(0);
1231      if (Base.getOpcode() == ISD::FrameIndex) {
1232        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1233        Base = CurDAG->getTargetFrameIndex(FI,
1234                                           getTargetLowering()->getPointerTy());
1235      }
1236      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1237      return true;
1238    }
1239  }
1240
1241  return false;
1242}
1243
1244
1245//===----------------------------------------------------------------------===//
1246//                        Thumb 2 Addressing Modes
1247//===----------------------------------------------------------------------===//
1248
1249
1250bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1251                                                SDValue &Opc) {
1252  if (DisableShifterOp)
1253    return false;
1254
1255  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1256
1257  // Don't match base register only case. That is matched to a separate
1258  // lower complexity pattern with explicit register operand.
1259  if (ShOpcVal == ARM_AM::no_shift) return false;
1260
1261  BaseReg = N.getOperand(0);
1262  unsigned ShImmVal = 0;
1263  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1264    ShImmVal = RHS->getZExtValue() & 31;
1265    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
1266    return true;
1267  }
1268
1269  return false;
1270}
1271
1272bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1273                                            SDValue &Base, SDValue &OffImm) {
1274  // Match simple R + imm12 operands.
1275
1276  // Base only.
1277  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1278      !CurDAG->isBaseWithConstantOffset(N)) {
1279    if (N.getOpcode() == ISD::FrameIndex) {
1280      // Match frame index.
1281      int FI = cast<FrameIndexSDNode>(N)->getIndex();
1282      Base = CurDAG->getTargetFrameIndex(FI,
1283                                         getTargetLowering()->getPointerTy());
1284      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1285      return true;
1286    }
1287
1288    if (N.getOpcode() == ARMISD::Wrapper &&
1289               !(Subtarget->useMovt() &&
1290                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
1291      Base = N.getOperand(0);
1292      if (Base.getOpcode() == ISD::TargetConstantPool)
1293        return false;  // We want to select t2LDRpci instead.
1294    } else
1295      Base = N;
1296    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1297    return true;
1298  }
1299
1300  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1301    if (SelectT2AddrModeImm8(N, Base, OffImm))
1302      // Let t2LDRi8 handle (R - imm8).
1303      return false;
1304
1305    int RHSC = (int)RHS->getZExtValue();
1306    if (N.getOpcode() == ISD::SUB)
1307      RHSC = -RHSC;
1308
1309    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1310      Base   = N.getOperand(0);
1311      if (Base.getOpcode() == ISD::FrameIndex) {
1312        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1313        Base = CurDAG->getTargetFrameIndex(FI,
1314                                           getTargetLowering()->getPointerTy());
1315      }
1316      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1317      return true;
1318    }
1319  }
1320
1321  // Base only.
1322  Base = N;
1323  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1324  return true;
1325}
1326
1327bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1328                                           SDValue &Base, SDValue &OffImm) {
1329  // Match simple R - imm8 operands.
1330  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1331      !CurDAG->isBaseWithConstantOffset(N))
1332    return false;
1333
1334  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1335    int RHSC = (int)RHS->getSExtValue();
1336    if (N.getOpcode() == ISD::SUB)
1337      RHSC = -RHSC;
1338
1339    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1340      Base = N.getOperand(0);
1341      if (Base.getOpcode() == ISD::FrameIndex) {
1342        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1343        Base = CurDAG->getTargetFrameIndex(FI,
1344                                           getTargetLowering()->getPointerTy());
1345      }
1346      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1347      return true;
1348    }
1349  }
1350
1351  return false;
1352}
1353
1354bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1355                                                 SDValue &OffImm){
1356  unsigned Opcode = Op->getOpcode();
1357  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1358    ? cast<LoadSDNode>(Op)->getAddressingMode()
1359    : cast<StoreSDNode>(Op)->getAddressingMode();
1360  int RHSC;
1361  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1362    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1363      ? CurDAG->getTargetConstant(RHSC, MVT::i32)
1364      : CurDAG->getTargetConstant(-RHSC, MVT::i32);
1365    return true;
1366  }
1367
1368  return false;
1369}
1370
1371bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1372                                            SDValue &Base,
1373                                            SDValue &OffReg, SDValue &ShImm) {
1374  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1375  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1376    return false;
1377
1378  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1379  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1380    int RHSC = (int)RHS->getZExtValue();
1381    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1382      return false;
1383    else if (RHSC < 0 && RHSC >= -255) // 8 bits
1384      return false;
1385  }
1386
1387  // Look for (R + R) or (R + (R << [1,2,3])).
1388  unsigned ShAmt = 0;
1389  Base   = N.getOperand(0);
1390  OffReg = N.getOperand(1);
1391
1392  // Swap if it is ((R << c) + R).
1393  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1394  if (ShOpcVal != ARM_AM::lsl) {
1395    ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1396    if (ShOpcVal == ARM_AM::lsl)
1397      std::swap(Base, OffReg);
1398  }
1399
1400  if (ShOpcVal == ARM_AM::lsl) {
1401    // Check to see if the RHS of the shift is a constant, if not, we can't fold
1402    // it.
1403    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1404      ShAmt = Sh->getZExtValue();
1405      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1406        OffReg = OffReg.getOperand(0);
1407      else {
1408        ShAmt = 0;
1409        ShOpcVal = ARM_AM::no_shift;
1410      }
1411    } else {
1412      ShOpcVal = ARM_AM::no_shift;
1413    }
1414  }
1415
1416  ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
1417
1418  return true;
1419}
1420
1421bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1422                                                SDValue &OffImm) {
1423  // This *must* succeed since it's used for the irreplacable ldrex and strex
1424  // instructions.
1425  Base = N;
1426  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1427
1428  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1429    return true;
1430
1431  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1432  if (!RHS)
1433    return true;
1434
1435  uint32_t RHSC = (int)RHS->getZExtValue();
1436  if (RHSC > 1020 || RHSC % 4 != 0)
1437    return true;
1438
1439  Base = N.getOperand(0);
1440  if (Base.getOpcode() == ISD::FrameIndex) {
1441    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1442    Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
1443  }
1444
1445  OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
1446  return true;
1447}
1448
1449//===--------------------------------------------------------------------===//
1450
1451/// getAL - Returns a ARMCC::AL immediate node.
1452static inline SDValue getAL(SelectionDAG *CurDAG) {
1453  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
1454}
1455
1456SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1457  LoadSDNode *LD = cast<LoadSDNode>(N);
1458  ISD::MemIndexedMode AM = LD->getAddressingMode();
1459  if (AM == ISD::UNINDEXED)
1460    return NULL;
1461
1462  EVT LoadedVT = LD->getMemoryVT();
1463  SDValue Offset, AMOpc;
1464  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1465  unsigned Opcode = 0;
1466  bool Match = false;
1467  if (LoadedVT == MVT::i32 && isPre &&
1468      SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1469    Opcode = ARM::LDR_PRE_IMM;
1470    Match = true;
1471  } else if (LoadedVT == MVT::i32 && !isPre &&
1472      SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1473    Opcode = ARM::LDR_POST_IMM;
1474    Match = true;
1475  } else if (LoadedVT == MVT::i32 &&
1476      SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1477    Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1478    Match = true;
1479
1480  } else if (LoadedVT == MVT::i16 &&
1481             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1482    Match = true;
1483    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1484      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1485      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1486  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1487    if (LD->getExtensionType() == ISD::SEXTLOAD) {
1488      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1489        Match = true;
1490        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1491      }
1492    } else {
1493      if (isPre &&
1494          SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1495        Match = true;
1496        Opcode = ARM::LDRB_PRE_IMM;
1497      } else if (!isPre &&
1498                  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1499        Match = true;
1500        Opcode = ARM::LDRB_POST_IMM;
1501      } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1502        Match = true;
1503        Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1504      }
1505    }
1506  }
1507
1508  if (Match) {
1509    if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1510      SDValue Chain = LD->getChain();
1511      SDValue Base = LD->getBasePtr();
1512      SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
1513                       CurDAG->getRegister(0, MVT::i32), Chain };
1514      return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1515                                    MVT::i32, MVT::Other, Ops);
1516    } else {
1517      SDValue Chain = LD->getChain();
1518      SDValue Base = LD->getBasePtr();
1519      SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
1520                       CurDAG->getRegister(0, MVT::i32), Chain };
1521      return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1522                                    MVT::i32, MVT::Other, Ops);
1523    }
1524  }
1525
1526  return NULL;
1527}
1528
1529SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1530  LoadSDNode *LD = cast<LoadSDNode>(N);
1531  ISD::MemIndexedMode AM = LD->getAddressingMode();
1532  if (AM == ISD::UNINDEXED)
1533    return NULL;
1534
1535  EVT LoadedVT = LD->getMemoryVT();
1536  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1537  SDValue Offset;
1538  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1539  unsigned Opcode = 0;
1540  bool Match = false;
1541  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1542    switch (LoadedVT.getSimpleVT().SimpleTy) {
1543    case MVT::i32:
1544      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1545      break;
1546    case MVT::i16:
1547      if (isSExtLd)
1548        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1549      else
1550        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1551      break;
1552    case MVT::i8:
1553    case MVT::i1:
1554      if (isSExtLd)
1555        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1556      else
1557        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1558      break;
1559    default:
1560      return NULL;
1561    }
1562    Match = true;
1563  }
1564
1565  if (Match) {
1566    SDValue Chain = LD->getChain();
1567    SDValue Base = LD->getBasePtr();
1568    SDValue Ops[]= { Base, Offset, getAL(CurDAG),
1569                     CurDAG->getRegister(0, MVT::i32), Chain };
1570    return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1571                                  MVT::Other, Ops);
1572  }
1573
1574  return NULL;
1575}
1576
1577/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1578SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1579  SDLoc dl(V0.getNode());
1580  SDValue RegClass =
1581    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
1582  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
1583  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
1584  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1585  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1586}
1587
1588/// \brief Form a D register from a pair of S registers.
1589SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1590  SDLoc dl(V0.getNode());
1591  SDValue RegClass =
1592    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
1593  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
1594  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
1595  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1596  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1597}
1598
1599/// \brief Form a quad register from a pair of D registers.
1600SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1601  SDLoc dl(V0.getNode());
1602  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
1603  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
1604  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
1605  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1606  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1607}
1608
1609/// \brief Form 4 consecutive D registers from a pair of Q registers.
1610SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1611  SDLoc dl(V0.getNode());
1612  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
1613  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
1614  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
1615  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1616  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1617}
1618
1619/// \brief Form 4 consecutive S registers.
1620SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1621                                   SDValue V2, SDValue V3) {
1622  SDLoc dl(V0.getNode());
1623  SDValue RegClass =
1624    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
1625  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
1626  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
1627  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
1628  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
1629  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1630                                    V2, SubReg2, V3, SubReg3 };
1631  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1632}
1633
1634/// \brief Form 4 consecutive D registers.
1635SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1636                                   SDValue V2, SDValue V3) {
1637  SDLoc dl(V0.getNode());
1638  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
1639  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
1640  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
1641  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
1642  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
1643  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1644                                    V2, SubReg2, V3, SubReg3 };
1645  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1646}
1647
1648/// \brief Form 4 consecutive Q registers.
1649SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1650                                   SDValue V2, SDValue V3) {
1651  SDLoc dl(V0.getNode());
1652  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
1653  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
1654  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
1655  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
1656  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
1657  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1658                                    V2, SubReg2, V3, SubReg3 };
1659  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1660}
1661
1662/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1663/// of a NEON VLD or VST instruction.  The supported values depend on the
1664/// number of registers being loaded.
1665SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
1666                                       bool is64BitVector) {
1667  unsigned NumRegs = NumVecs;
1668  if (!is64BitVector && NumVecs < 3)
1669    NumRegs *= 2;
1670
1671  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1672  if (Alignment >= 32 && NumRegs == 4)
1673    Alignment = 32;
1674  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1675    Alignment = 16;
1676  else if (Alignment >= 8)
1677    Alignment = 8;
1678  else
1679    Alignment = 0;
1680
1681  return CurDAG->getTargetConstant(Alignment, MVT::i32);
1682}
1683
1684// Get the register stride update opcode of a VLD/VST instruction that
1685// is otherwise equivalent to the given fixed stride updating instruction.
1686static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1687  switch (Opc) {
1688  default: break;
1689  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1690  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1691  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1692  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1693  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1694  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1695  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1696  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1697
1698  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1699  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1700  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1701  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1702  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1703  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1704  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1705  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1706  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1707  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1708
1709  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1710  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1711  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1712  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1713  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1714  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1715
1716  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1717  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1718  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1719  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1720  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1721  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1722
1723  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1724  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1725  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1726  }
1727  return Opc; // If not one we handle, return it unchanged.
1728}
1729
1730SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1731                                   const uint16_t *DOpcodes,
1732                                   const uint16_t *QOpcodes0,
1733                                   const uint16_t *QOpcodes1) {
1734  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1735  SDLoc dl(N);
1736
1737  SDValue MemAddr, Align;
1738  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1739  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1740    return NULL;
1741
1742  SDValue Chain = N->getOperand(0);
1743  EVT VT = N->getValueType(0);
1744  bool is64BitVector = VT.is64BitVector();
1745  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
1746
1747  unsigned OpcodeIndex;
1748  switch (VT.getSimpleVT().SimpleTy) {
1749  default: llvm_unreachable("unhandled vld type");
1750    // Double-register operations:
1751  case MVT::v8i8:  OpcodeIndex = 0; break;
1752  case MVT::v4i16: OpcodeIndex = 1; break;
1753  case MVT::v2f32:
1754  case MVT::v2i32: OpcodeIndex = 2; break;
1755  case MVT::v1i64: OpcodeIndex = 3; break;
1756    // Quad-register operations:
1757  case MVT::v16i8: OpcodeIndex = 0; break;
1758  case MVT::v8i16: OpcodeIndex = 1; break;
1759  case MVT::v4f32:
1760  case MVT::v4i32: OpcodeIndex = 2; break;
1761  case MVT::v2i64: OpcodeIndex = 3;
1762    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1763    break;
1764  }
1765
1766  EVT ResTy;
1767  if (NumVecs == 1)
1768    ResTy = VT;
1769  else {
1770    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1771    if (!is64BitVector)
1772      ResTyElts *= 2;
1773    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1774  }
1775  std::vector<EVT> ResTys;
1776  ResTys.push_back(ResTy);
1777  if (isUpdating)
1778    ResTys.push_back(MVT::i32);
1779  ResTys.push_back(MVT::Other);
1780
1781  SDValue Pred = getAL(CurDAG);
1782  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1783  SDNode *VLd;
1784  SmallVector<SDValue, 7> Ops;
1785
1786  // Double registers and VLD1/VLD2 quad registers are directly supported.
1787  if (is64BitVector || NumVecs <= 2) {
1788    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1789                    QOpcodes0[OpcodeIndex]);
1790    Ops.push_back(MemAddr);
1791    Ops.push_back(Align);
1792    if (isUpdating) {
1793      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1794      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1795      // case entirely when the rest are updated to that form, too.
1796      if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
1797        Opc = getVLDSTRegisterUpdateOpcode(Opc);
1798      // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1799      // check for that explicitly too. Horribly hacky, but temporary.
1800      if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
1801          !isa<ConstantSDNode>(Inc.getNode()))
1802        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1803    }
1804    Ops.push_back(Pred);
1805    Ops.push_back(Reg0);
1806    Ops.push_back(Chain);
1807    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1808
1809  } else {
1810    // Otherwise, quad registers are loaded with two separate instructions,
1811    // where one loads the even registers and the other loads the odd registers.
1812    EVT AddrTy = MemAddr.getValueType();
1813
1814    // Load the even subregs.  This is always an updating load, so that it
1815    // provides the address to the second load for the odd subregs.
1816    SDValue ImplDef =
1817      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1818    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1819    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1820                                          ResTy, AddrTy, MVT::Other, OpsA);
1821    Chain = SDValue(VLdA, 2);
1822
1823    // Load the odd subregs.
1824    Ops.push_back(SDValue(VLdA, 1));
1825    Ops.push_back(Align);
1826    if (isUpdating) {
1827      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1828      assert(isa<ConstantSDNode>(Inc.getNode()) &&
1829             "only constant post-increment update allowed for VLD3/4");
1830      (void)Inc;
1831      Ops.push_back(Reg0);
1832    }
1833    Ops.push_back(SDValue(VLdA, 0));
1834    Ops.push_back(Pred);
1835    Ops.push_back(Reg0);
1836    Ops.push_back(Chain);
1837    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1838  }
1839
1840  // Transfer memoperands.
1841  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1842  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1843  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1844
1845  if (NumVecs == 1)
1846    return VLd;
1847
1848  // Extract out the subregisters.
1849  SDValue SuperReg = SDValue(VLd, 0);
1850  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1851         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1852  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1853  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1854    ReplaceUses(SDValue(N, Vec),
1855                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1856  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1857  if (isUpdating)
1858    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1859  return NULL;
1860}
1861
1862SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1863                                   const uint16_t *DOpcodes,
1864                                   const uint16_t *QOpcodes0,
1865                                   const uint16_t *QOpcodes1) {
1866  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1867  SDLoc dl(N);
1868
1869  SDValue MemAddr, Align;
1870  unsigned AddrOpIdx = isUpdating ? 1 : 2;
1871  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1872  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1873    return NULL;
1874
1875  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1876  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1877
1878  SDValue Chain = N->getOperand(0);
1879  EVT VT = N->getOperand(Vec0Idx).getValueType();
1880  bool is64BitVector = VT.is64BitVector();
1881  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
1882
1883  unsigned OpcodeIndex;
1884  switch (VT.getSimpleVT().SimpleTy) {
1885  default: llvm_unreachable("unhandled vst type");
1886    // Double-register operations:
1887  case MVT::v8i8:  OpcodeIndex = 0; break;
1888  case MVT::v4i16: OpcodeIndex = 1; break;
1889  case MVT::v2f32:
1890  case MVT::v2i32: OpcodeIndex = 2; break;
1891  case MVT::v1i64: OpcodeIndex = 3; break;
1892    // Quad-register operations:
1893  case MVT::v16i8: OpcodeIndex = 0; break;
1894  case MVT::v8i16: OpcodeIndex = 1; break;
1895  case MVT::v4f32:
1896  case MVT::v4i32: OpcodeIndex = 2; break;
1897  case MVT::v2i64: OpcodeIndex = 3;
1898    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1899    break;
1900  }
1901
1902  std::vector<EVT> ResTys;
1903  if (isUpdating)
1904    ResTys.push_back(MVT::i32);
1905  ResTys.push_back(MVT::Other);
1906
1907  SDValue Pred = getAL(CurDAG);
1908  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1909  SmallVector<SDValue, 7> Ops;
1910
1911  // Double registers and VST1/VST2 quad registers are directly supported.
1912  if (is64BitVector || NumVecs <= 2) {
1913    SDValue SrcReg;
1914    if (NumVecs == 1) {
1915      SrcReg = N->getOperand(Vec0Idx);
1916    } else if (is64BitVector) {
1917      // Form a REG_SEQUENCE to force register allocation.
1918      SDValue V0 = N->getOperand(Vec0Idx + 0);
1919      SDValue V1 = N->getOperand(Vec0Idx + 1);
1920      if (NumVecs == 2)
1921        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1922      else {
1923        SDValue V2 = N->getOperand(Vec0Idx + 2);
1924        // If it's a vst3, form a quad D-register and leave the last part as
1925        // an undef.
1926        SDValue V3 = (NumVecs == 3)
1927          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1928          : N->getOperand(Vec0Idx + 3);
1929        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1930      }
1931    } else {
1932      // Form a QQ register.
1933      SDValue Q0 = N->getOperand(Vec0Idx);
1934      SDValue Q1 = N->getOperand(Vec0Idx + 1);
1935      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1936    }
1937
1938    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1939                    QOpcodes0[OpcodeIndex]);
1940    Ops.push_back(MemAddr);
1941    Ops.push_back(Align);
1942    if (isUpdating) {
1943      SDValue Inc = N->getOperand(AddrOpIdx + 1);
1944      // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
1945      // case entirely when the rest are updated to that form, too.
1946      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
1947        Opc = getVLDSTRegisterUpdateOpcode(Opc);
1948      // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
1949      // check for that explicitly too. Horribly hacky, but temporary.
1950      if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
1951          !isa<ConstantSDNode>(Inc.getNode()))
1952        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1953    }
1954    Ops.push_back(SrcReg);
1955    Ops.push_back(Pred);
1956    Ops.push_back(Reg0);
1957    Ops.push_back(Chain);
1958    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1959
1960    // Transfer memoperands.
1961    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
1962
1963    return VSt;
1964  }
1965
1966  // Otherwise, quad registers are stored with two separate instructions,
1967  // where one stores the even registers and the other stores the odd registers.
1968
1969  // Form the QQQQ REG_SEQUENCE.
1970  SDValue V0 = N->getOperand(Vec0Idx + 0);
1971  SDValue V1 = N->getOperand(Vec0Idx + 1);
1972  SDValue V2 = N->getOperand(Vec0Idx + 2);
1973  SDValue V3 = (NumVecs == 3)
1974    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1975    : N->getOperand(Vec0Idx + 3);
1976  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
1977
1978  // Store the even D registers.  This is always an updating store, so that it
1979  // provides the address to the second store for the odd subregs.
1980  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
1981  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1982                                        MemAddr.getValueType(),
1983                                        MVT::Other, OpsA);
1984  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
1985  Chain = SDValue(VStA, 1);
1986
1987  // Store the odd D registers.
1988  Ops.push_back(SDValue(VStA, 0));
1989  Ops.push_back(Align);
1990  if (isUpdating) {
1991    SDValue Inc = N->getOperand(AddrOpIdx + 1);
1992    assert(isa<ConstantSDNode>(Inc.getNode()) &&
1993           "only constant post-increment update allowed for VST3/4");
1994    (void)Inc;
1995    Ops.push_back(Reg0);
1996  }
1997  Ops.push_back(RegSeq);
1998  Ops.push_back(Pred);
1999  Ops.push_back(Reg0);
2000  Ops.push_back(Chain);
2001  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2002                                        Ops);
2003  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2004  return VStB;
2005}
2006
2007SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2008                                         bool isUpdating, unsigned NumVecs,
2009                                         const uint16_t *DOpcodes,
2010                                         const uint16_t *QOpcodes) {
2011  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2012  SDLoc dl(N);
2013
2014  SDValue MemAddr, Align;
2015  unsigned AddrOpIdx = isUpdating ? 1 : 2;
2016  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2017  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2018    return NULL;
2019
2020  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2021  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2022
2023  SDValue Chain = N->getOperand(0);
2024  unsigned Lane =
2025    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2026  EVT VT = N->getOperand(Vec0Idx).getValueType();
2027  bool is64BitVector = VT.is64BitVector();
2028
2029  unsigned Alignment = 0;
2030  if (NumVecs != 3) {
2031    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2032    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2033    if (Alignment > NumBytes)
2034      Alignment = NumBytes;
2035    if (Alignment < 8 && Alignment < NumBytes)
2036      Alignment = 0;
2037    // Alignment must be a power of two; make sure of that.
2038    Alignment = (Alignment & -Alignment);
2039    if (Alignment == 1)
2040      Alignment = 0;
2041  }
2042  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
2043
2044  unsigned OpcodeIndex;
2045  switch (VT.getSimpleVT().SimpleTy) {
2046  default: llvm_unreachable("unhandled vld/vst lane type");
2047    // Double-register operations:
2048  case MVT::v8i8:  OpcodeIndex = 0; break;
2049  case MVT::v4i16: OpcodeIndex = 1; break;
2050  case MVT::v2f32:
2051  case MVT::v2i32: OpcodeIndex = 2; break;
2052    // Quad-register operations:
2053  case MVT::v8i16: OpcodeIndex = 0; break;
2054  case MVT::v4f32:
2055  case MVT::v4i32: OpcodeIndex = 1; break;
2056  }
2057
2058  std::vector<EVT> ResTys;
2059  if (IsLoad) {
2060    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2061    if (!is64BitVector)
2062      ResTyElts *= 2;
2063    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2064                                      MVT::i64, ResTyElts));
2065  }
2066  if (isUpdating)
2067    ResTys.push_back(MVT::i32);
2068  ResTys.push_back(MVT::Other);
2069
2070  SDValue Pred = getAL(CurDAG);
2071  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2072
2073  SmallVector<SDValue, 8> Ops;
2074  Ops.push_back(MemAddr);
2075  Ops.push_back(Align);
2076  if (isUpdating) {
2077    SDValue Inc = N->getOperand(AddrOpIdx + 1);
2078    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2079  }
2080
2081  SDValue SuperReg;
2082  SDValue V0 = N->getOperand(Vec0Idx + 0);
2083  SDValue V1 = N->getOperand(Vec0Idx + 1);
2084  if (NumVecs == 2) {
2085    if (is64BitVector)
2086      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2087    else
2088      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2089  } else {
2090    SDValue V2 = N->getOperand(Vec0Idx + 2);
2091    SDValue V3 = (NumVecs == 3)
2092      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2093      : N->getOperand(Vec0Idx + 3);
2094    if (is64BitVector)
2095      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2096    else
2097      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2098  }
2099  Ops.push_back(SuperReg);
2100  Ops.push_back(getI32Imm(Lane));
2101  Ops.push_back(Pred);
2102  Ops.push_back(Reg0);
2103  Ops.push_back(Chain);
2104
2105  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2106                                  QOpcodes[OpcodeIndex]);
2107  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2108  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2109  if (!IsLoad)
2110    return VLdLn;
2111
2112  // Extract the subregisters.
2113  SuperReg = SDValue(VLdLn, 0);
2114  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2115         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2116  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2117  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2118    ReplaceUses(SDValue(N, Vec),
2119                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2120  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2121  if (isUpdating)
2122    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2123  return NULL;
2124}
2125
2126SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2127                                      unsigned NumVecs,
2128                                      const uint16_t *Opcodes) {
2129  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2130  SDLoc dl(N);
2131
2132  SDValue MemAddr, Align;
2133  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2134    return NULL;
2135
2136  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2137  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2138
2139  SDValue Chain = N->getOperand(0);
2140  EVT VT = N->getValueType(0);
2141
2142  unsigned Alignment = 0;
2143  if (NumVecs != 3) {
2144    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2145    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2146    if (Alignment > NumBytes)
2147      Alignment = NumBytes;
2148    if (Alignment < 8 && Alignment < NumBytes)
2149      Alignment = 0;
2150    // Alignment must be a power of two; make sure of that.
2151    Alignment = (Alignment & -Alignment);
2152    if (Alignment == 1)
2153      Alignment = 0;
2154  }
2155  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
2156
2157  unsigned OpcodeIndex;
2158  switch (VT.getSimpleVT().SimpleTy) {
2159  default: llvm_unreachable("unhandled vld-dup type");
2160  case MVT::v8i8:  OpcodeIndex = 0; break;
2161  case MVT::v4i16: OpcodeIndex = 1; break;
2162  case MVT::v2f32:
2163  case MVT::v2i32: OpcodeIndex = 2; break;
2164  }
2165
2166  SDValue Pred = getAL(CurDAG);
2167  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2168  SDValue SuperReg;
2169  unsigned Opc = Opcodes[OpcodeIndex];
2170  SmallVector<SDValue, 6> Ops;
2171  Ops.push_back(MemAddr);
2172  Ops.push_back(Align);
2173  if (isUpdating) {
2174    // fixed-stride update instructions don't have an explicit writeback
2175    // operand. It's implicit in the opcode itself.
2176    SDValue Inc = N->getOperand(2);
2177    if (!isa<ConstantSDNode>(Inc.getNode()))
2178      Ops.push_back(Inc);
2179    // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2180    else if (NumVecs > 2)
2181      Ops.push_back(Reg0);
2182  }
2183  Ops.push_back(Pred);
2184  Ops.push_back(Reg0);
2185  Ops.push_back(Chain);
2186
2187  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2188  std::vector<EVT> ResTys;
2189  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2190  if (isUpdating)
2191    ResTys.push_back(MVT::i32);
2192  ResTys.push_back(MVT::Other);
2193  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2194  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2195  SuperReg = SDValue(VLdDup, 0);
2196
2197  // Extract the subregisters.
2198  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2199  unsigned SubIdx = ARM::dsub_0;
2200  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2201    ReplaceUses(SDValue(N, Vec),
2202                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2203  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2204  if (isUpdating)
2205    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2206  return NULL;
2207}
2208
2209SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2210                                    unsigned Opc) {
2211  assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2212  SDLoc dl(N);
2213  EVT VT = N->getValueType(0);
2214  unsigned FirstTblReg = IsExt ? 2 : 1;
2215
2216  // Form a REG_SEQUENCE to force register allocation.
2217  SDValue RegSeq;
2218  SDValue V0 = N->getOperand(FirstTblReg + 0);
2219  SDValue V1 = N->getOperand(FirstTblReg + 1);
2220  if (NumVecs == 2)
2221    RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2222  else {
2223    SDValue V2 = N->getOperand(FirstTblReg + 2);
2224    // If it's a vtbl3, form a quad D-register and leave the last part as
2225    // an undef.
2226    SDValue V3 = (NumVecs == 3)
2227      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2228      : N->getOperand(FirstTblReg + 3);
2229    RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2230  }
2231
2232  SmallVector<SDValue, 6> Ops;
2233  if (IsExt)
2234    Ops.push_back(N->getOperand(1));
2235  Ops.push_back(RegSeq);
2236  Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2237  Ops.push_back(getAL(CurDAG)); // predicate
2238  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2239  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2240}
2241
2242SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2243                                                     bool isSigned) {
2244  if (!Subtarget->hasV6T2Ops())
2245    return NULL;
2246
2247  unsigned Opc = isSigned
2248    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2249    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2250
2251  // For unsigned extracts, check for a shift right and mask
2252  unsigned And_imm = 0;
2253  if (N->getOpcode() == ISD::AND) {
2254    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2255
2256      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2257      if (And_imm & (And_imm + 1))
2258        return NULL;
2259
2260      unsigned Srl_imm = 0;
2261      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2262                                Srl_imm)) {
2263        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2264
2265        // Note: The width operand is encoded as width-1.
2266        unsigned Width = CountTrailingOnes_32(And_imm) - 1;
2267        unsigned LSB = Srl_imm;
2268
2269        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2270
2271        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2272          // It's cheaper to use a right shift to extract the top bits.
2273          if (Subtarget->isThumb()) {
2274            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2275            SDValue Ops[] = { N->getOperand(0).getOperand(0),
2276                              CurDAG->getTargetConstant(LSB, MVT::i32),
2277                              getAL(CurDAG), Reg0, Reg0 };
2278            return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2279          }
2280
2281          // ARM models shift instructions as MOVsi with shifter operand.
2282          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2283          SDValue ShOpc =
2284            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
2285                                      MVT::i32);
2286          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2287                            getAL(CurDAG), Reg0, Reg0 };
2288          return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
2289        }
2290
2291        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2292                          CurDAG->getTargetConstant(LSB, MVT::i32),
2293                          CurDAG->getTargetConstant(Width, MVT::i32),
2294          getAL(CurDAG), Reg0 };
2295        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2296      }
2297    }
2298    return NULL;
2299  }
2300
2301  // Otherwise, we're looking for a shift of a shift
2302  unsigned Shl_imm = 0;
2303  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2304    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2305    unsigned Srl_imm = 0;
2306    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2307      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2308      // Note: The width operand is encoded as width-1.
2309      unsigned Width = 32 - Srl_imm - 1;
2310      int LSB = Srl_imm - Shl_imm;
2311      if (LSB < 0)
2312        return NULL;
2313      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2314      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2315                        CurDAG->getTargetConstant(LSB, MVT::i32),
2316                        CurDAG->getTargetConstant(Width, MVT::i32),
2317                        getAL(CurDAG), Reg0 };
2318      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2319    }
2320  }
2321  return NULL;
2322}
2323
2324SDNode *ARMDAGToDAGISel::
2325SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2326                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2327  SDValue CPTmp0;
2328  SDValue CPTmp1;
2329  if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
2330    unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
2331    unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
2332    unsigned Opc = 0;
2333    switch (SOShOp) {
2334    case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
2335    case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
2336    case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
2337    case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
2338    default:
2339      llvm_unreachable("Unknown so_reg opcode!");
2340    }
2341    SDValue SOShImm =
2342      CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
2343    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2344    SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
2345    return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
2346  }
2347  return 0;
2348}
2349
2350SDNode *ARMDAGToDAGISel::
2351SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2352                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2353  SDValue CPTmp0;
2354  SDValue CPTmp1;
2355  SDValue CPTmp2;
2356  if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
2357    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2358    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
2359    return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
2360  }
2361
2362  if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
2363    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2364    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
2365    return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
2366  }
2367  return 0;
2368}
2369
2370SDNode *ARMDAGToDAGISel::
2371SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2372                  ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2373  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
2374  if (!T)
2375    return 0;
2376
2377  unsigned Opc = 0;
2378  unsigned TrueImm = T->getZExtValue();
2379  if (is_t2_so_imm(TrueImm)) {
2380    Opc = ARM::t2MOVCCi;
2381  } else if (TrueImm <= 0xffff) {
2382    Opc = ARM::t2MOVCCi16;
2383  } else if (is_t2_so_imm_not(TrueImm)) {
2384    TrueImm = ~TrueImm;
2385    Opc = ARM::t2MVNCCi;
2386  } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
2387    // Large immediate.
2388    Opc = ARM::t2MOVCCi32imm;
2389  }
2390
2391  if (Opc) {
2392    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
2393    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2394    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
2395    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2396  }
2397
2398  return 0;
2399}
2400
2401SDNode *ARMDAGToDAGISel::
2402SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2403                   ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2404  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
2405  if (!T)
2406    return 0;
2407
2408  unsigned Opc = 0;
2409  unsigned TrueImm = T->getZExtValue();
2410  bool isSoImm = is_so_imm(TrueImm);
2411  if (isSoImm) {
2412    Opc = ARM::MOVCCi;
2413  } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
2414    Opc = ARM::MOVCCi16;
2415  } else if (is_so_imm_not(TrueImm)) {
2416    TrueImm = ~TrueImm;
2417    Opc = ARM::MVNCCi;
2418  } else if (TrueVal.getNode()->hasOneUse() &&
2419             (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
2420    // Large immediate.
2421    Opc = ARM::MOVCCi32imm;
2422  }
2423
2424  if (Opc) {
2425    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
2426    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2427    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
2428    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2429  }
2430
2431  return 0;
2432}
2433
2434SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
2435  EVT VT = N->getValueType(0);
2436  SDValue FalseVal = N->getOperand(0);
2437  SDValue TrueVal  = N->getOperand(1);
2438  SDValue CC = N->getOperand(2);
2439  SDValue CCR = N->getOperand(3);
2440  SDValue InFlag = N->getOperand(4);
2441  assert(CC.getOpcode() == ISD::Constant);
2442  assert(CCR.getOpcode() == ISD::Register);
2443  ARMCC::CondCodes CCVal =
2444    (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
2445
2446  if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
2447    // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
2448    // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
2449    // Pattern complexity = 18  cost = 1  size = 0
2450    if (Subtarget->isThumb()) {
2451      SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
2452                                        CCVal, CCR, InFlag);
2453      if (!Res)
2454        Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
2455                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2456      if (Res)
2457        return Res;
2458    } else {
2459      SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
2460                                         CCVal, CCR, InFlag);
2461      if (!Res)
2462        Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
2463                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2464      if (Res)
2465        return Res;
2466    }
2467
2468    // Pattern: (ARMcmov:i32 GPR:i32:$false,
2469    //             (imm:i32)<<P:Pred_so_imm>>:$true,
2470    //             (imm:i32):$cc)
2471    // Emits: (MOVCCi:i32 GPR:i32:$false,
2472    //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
2473    // Pattern complexity = 10  cost = 1  size = 0
2474    if (Subtarget->isThumb()) {
2475      SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
2476                                        CCVal, CCR, InFlag);
2477      if (!Res)
2478        Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
2479                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2480      if (Res)
2481        return Res;
2482    } else {
2483      SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
2484                                         CCVal, CCR, InFlag);
2485      if (!Res)
2486        Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
2487                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2488      if (Res)
2489        return Res;
2490    }
2491  }
2492
2493  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2494  // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2495  // Pattern complexity = 6  cost = 1  size = 0
2496  //
2497  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2498  // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2499  // Pattern complexity = 6  cost = 11  size = 0
2500  //
2501  // Also VMOVScc and VMOVDcc.
2502  SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
2503  SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
2504  unsigned Opc = 0;
2505  switch (VT.getSimpleVT().SimpleTy) {
2506  default: llvm_unreachable("Illegal conditional move type!");
2507  case MVT::i32:
2508    Opc = Subtarget->isThumb()
2509      ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
2510      : ARM::MOVCCr;
2511    break;
2512  case MVT::f32:
2513    Opc = ARM::VMOVScc;
2514    break;
2515  case MVT::f64:
2516    Opc = ARM::VMOVDcc;
2517    break;
2518  }
2519  return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
2520}
2521
2522/// Target-specific DAG combining for ISD::XOR.
2523/// Target-independent combining lowers SELECT_CC nodes of the form
2524/// select_cc setg[ge] X,  0,  X, -X
2525/// select_cc setgt    X, -1,  X, -X
2526/// select_cc setl[te] X,  0, -X,  X
2527/// select_cc setlt    X,  1, -X,  X
2528/// which represent Integer ABS into:
2529/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2530/// ARM instruction selection detects the latter and matches it to
2531/// ARM::ABS or ARM::t2ABS machine node.
2532SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2533  SDValue XORSrc0 = N->getOperand(0);
2534  SDValue XORSrc1 = N->getOperand(1);
2535  EVT VT = N->getValueType(0);
2536
2537  if (Subtarget->isThumb1Only())
2538    return NULL;
2539
2540  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2541    return NULL;
2542
2543  SDValue ADDSrc0 = XORSrc0.getOperand(0);
2544  SDValue ADDSrc1 = XORSrc0.getOperand(1);
2545  SDValue SRASrc0 = XORSrc1.getOperand(0);
2546  SDValue SRASrc1 = XORSrc1.getOperand(1);
2547  ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2548  EVT XType = SRASrc0.getValueType();
2549  unsigned Size = XType.getSizeInBits() - 1;
2550
2551  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2552      XType.isInteger() && SRAConstant != NULL &&
2553      Size == SRAConstant->getZExtValue()) {
2554    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2555    return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2556  }
2557
2558  return NULL;
2559}
2560
2561SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2562  // The only time a CONCAT_VECTORS operation can have legal types is when
2563  // two 64-bit vectors are concatenated to a 128-bit vector.
2564  EVT VT = N->getValueType(0);
2565  if (!VT.is128BitVector() || N->getNumOperands() != 2)
2566    llvm_unreachable("unexpected CONCAT_VECTORS");
2567  return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2568}
2569
2570SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
2571  SmallVector<SDValue, 6> Ops;
2572  Ops.push_back(Node->getOperand(1)); // Ptr
2573  Ops.push_back(Node->getOperand(2)); // Low part of Val1
2574  Ops.push_back(Node->getOperand(3)); // High part of Val1
2575  if (Opc == ARM::ATOMCMPXCHG6432) {
2576    Ops.push_back(Node->getOperand(4)); // Low part of Val2
2577    Ops.push_back(Node->getOperand(5)); // High part of Val2
2578  }
2579  Ops.push_back(Node->getOperand(0)); // Chain
2580  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2581  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
2582  SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
2583                                           MVT::i32, MVT::i32, MVT::Other,
2584                                           Ops);
2585  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
2586  return ResNode;
2587}
2588
2589SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2590  SDLoc dl(N);
2591
2592  if (N->isMachineOpcode())
2593    return NULL;   // Already selected.
2594
2595  switch (N->getOpcode()) {
2596  default: break;
2597  case ISD::INLINEASM: {
2598    SDNode *ResNode = SelectInlineAsm(N);
2599    if (ResNode)
2600      return ResNode;
2601    break;
2602  }
2603  case ISD::XOR: {
2604    // Select special operations if XOR node forms integer ABS pattern
2605    SDNode *ResNode = SelectABSOp(N);
2606    if (ResNode)
2607      return ResNode;
2608    // Other cases are autogenerated.
2609    break;
2610  }
2611  case ISD::Constant: {
2612    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2613    bool UseCP = true;
2614    if (Subtarget->hasThumb2())
2615      // Thumb2-aware targets have the MOVT instruction, so all immediates can
2616      // be done with MOV + MOVT, at worst.
2617      UseCP = 0;
2618    else {
2619      if (Subtarget->isThumb()) {
2620        UseCP = (Val > 255 &&                          // MOV
2621                 ~Val > 255 &&                         // MOV + MVN
2622                 !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
2623      } else
2624        UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
2625                 ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
2626                 !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
2627    }
2628
2629    if (UseCP) {
2630      SDValue CPIdx =
2631        CurDAG->getTargetConstantPool(ConstantInt::get(
2632                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
2633                                      getTargetLowering()->getPointerTy());
2634
2635      SDNode *ResNode;
2636      if (Subtarget->isThumb1Only()) {
2637        SDValue Pred = getAL(CurDAG);
2638        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2639        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2640        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2641                                         Ops);
2642      } else {
2643        SDValue Ops[] = {
2644          CPIdx,
2645          CurDAG->getTargetConstant(0, MVT::i32),
2646          getAL(CurDAG),
2647          CurDAG->getRegister(0, MVT::i32),
2648          CurDAG->getEntryNode()
2649        };
2650        ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2651                                       Ops);
2652      }
2653      ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2654      return NULL;
2655    }
2656
2657    // Other cases are autogenerated.
2658    break;
2659  }
2660  case ISD::FrameIndex: {
2661    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2662    int FI = cast<FrameIndexSDNode>(N)->getIndex();
2663    SDValue TFI = CurDAG->getTargetFrameIndex(FI,
2664                                           getTargetLowering()->getPointerTy());
2665    if (Subtarget->isThumb1Only()) {
2666      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2667                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2668      return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
2669    } else {
2670      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2671                      ARM::t2ADDri : ARM::ADDri);
2672      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2673                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2674                        CurDAG->getRegister(0, MVT::i32) };
2675      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2676    }
2677  }
2678  case ISD::SRL:
2679    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2680      return I;
2681    break;
2682  case ISD::SRA:
2683    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2684      return I;
2685    break;
2686  case ISD::MUL:
2687    if (Subtarget->isThumb1Only())
2688      break;
2689    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2690      unsigned RHSV = C->getZExtValue();
2691      if (!RHSV) break;
2692      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2693        unsigned ShImm = Log2_32(RHSV-1);
2694        if (ShImm >= 32)
2695          break;
2696        SDValue V = N->getOperand(0);
2697        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2698        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
2699        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2700        if (Subtarget->isThumb()) {
2701          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2702          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
2703        } else {
2704          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2705          return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
2706        }
2707      }
2708      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2709        unsigned ShImm = Log2_32(RHSV+1);
2710        if (ShImm >= 32)
2711          break;
2712        SDValue V = N->getOperand(0);
2713        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2714        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
2715        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2716        if (Subtarget->isThumb()) {
2717          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2718          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
2719        } else {
2720          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2721          return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
2722        }
2723      }
2724    }
2725    break;
2726  case ISD::AND: {
2727    // Check for unsigned bitfield extract
2728    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2729      return I;
2730
2731    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2732    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2733    // are entirely contributed by c2 and lower 16-bits are entirely contributed
2734    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2735    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2736    EVT VT = N->getValueType(0);
2737    if (VT != MVT::i32)
2738      break;
2739    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2740      ? ARM::t2MOVTi16
2741      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2742    if (!Opc)
2743      break;
2744    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2745    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2746    if (!N1C)
2747      break;
2748    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2749      SDValue N2 = N0.getOperand(1);
2750      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2751      if (!N2C)
2752        break;
2753      unsigned N1CVal = N1C->getZExtValue();
2754      unsigned N2CVal = N2C->getZExtValue();
2755      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2756          (N1CVal & 0xffffU) == 0xffffU &&
2757          (N2CVal & 0xffffU) == 0x0U) {
2758        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2759                                                  MVT::i32);
2760        SDValue Ops[] = { N0.getOperand(0), Imm16,
2761                          getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2762        return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2763      }
2764    }
2765    break;
2766  }
2767  case ARMISD::VMOVRRD:
2768    return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2769                                  N->getOperand(0), getAL(CurDAG),
2770                                  CurDAG->getRegister(0, MVT::i32));
2771  case ISD::UMUL_LOHI: {
2772    if (Subtarget->isThumb1Only())
2773      break;
2774    if (Subtarget->isThumb()) {
2775      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2776                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2777      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2778    } else {
2779      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2780                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2781                        CurDAG->getRegister(0, MVT::i32) };
2782      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2783                                    ARM::UMULL : ARM::UMULLv5,
2784                                    dl, MVT::i32, MVT::i32, Ops);
2785    }
2786  }
2787  case ISD::SMUL_LOHI: {
2788    if (Subtarget->isThumb1Only())
2789      break;
2790    if (Subtarget->isThumb()) {
2791      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2792                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2793      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2794    } else {
2795      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2796                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2797                        CurDAG->getRegister(0, MVT::i32) };
2798      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2799                                    ARM::SMULL : ARM::SMULLv5,
2800                                    dl, MVT::i32, MVT::i32, Ops);
2801    }
2802  }
2803  case ARMISD::UMLAL:{
2804    if (Subtarget->isThumb()) {
2805      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2806                        N->getOperand(3), getAL(CurDAG),
2807                        CurDAG->getRegister(0, MVT::i32)};
2808      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2809    }else{
2810      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2811                        N->getOperand(3), getAL(CurDAG),
2812                        CurDAG->getRegister(0, MVT::i32),
2813                        CurDAG->getRegister(0, MVT::i32) };
2814      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2815                                      ARM::UMLAL : ARM::UMLALv5,
2816                                      dl, MVT::i32, MVT::i32, Ops);
2817    }
2818  }
2819  case ARMISD::SMLAL:{
2820    if (Subtarget->isThumb()) {
2821      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2822                        N->getOperand(3), getAL(CurDAG),
2823                        CurDAG->getRegister(0, MVT::i32)};
2824      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2825    }else{
2826      SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2827                        N->getOperand(3), getAL(CurDAG),
2828                        CurDAG->getRegister(0, MVT::i32),
2829                        CurDAG->getRegister(0, MVT::i32) };
2830      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2831                                      ARM::SMLAL : ARM::SMLALv5,
2832                                      dl, MVT::i32, MVT::i32, Ops);
2833    }
2834  }
2835  case ISD::LOAD: {
2836    SDNode *ResNode = 0;
2837    if (Subtarget->isThumb() && Subtarget->hasThumb2())
2838      ResNode = SelectT2IndexedLoad(N);
2839    else
2840      ResNode = SelectARMIndexedLoad(N);
2841    if (ResNode)
2842      return ResNode;
2843    // Other cases are autogenerated.
2844    break;
2845  }
2846  case ARMISD::BRCOND: {
2847    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2848    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2849    // Pattern complexity = 6  cost = 1  size = 0
2850
2851    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2852    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2853    // Pattern complexity = 6  cost = 1  size = 0
2854
2855    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2856    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2857    // Pattern complexity = 6  cost = 1  size = 0
2858
2859    unsigned Opc = Subtarget->isThumb() ?
2860      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2861    SDValue Chain = N->getOperand(0);
2862    SDValue N1 = N->getOperand(1);
2863    SDValue N2 = N->getOperand(2);
2864    SDValue N3 = N->getOperand(3);
2865    SDValue InFlag = N->getOperand(4);
2866    assert(N1.getOpcode() == ISD::BasicBlock);
2867    assert(N2.getOpcode() == ISD::Constant);
2868    assert(N3.getOpcode() == ISD::Register);
2869
2870    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2871                               cast<ConstantSDNode>(N2)->getZExtValue()),
2872                               MVT::i32);
2873    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2874    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2875                                             MVT::Glue, Ops);
2876    Chain = SDValue(ResNode, 0);
2877    if (N->getNumValues() == 2) {
2878      InFlag = SDValue(ResNode, 1);
2879      ReplaceUses(SDValue(N, 1), InFlag);
2880    }
2881    ReplaceUses(SDValue(N, 0),
2882                SDValue(Chain.getNode(), Chain.getResNo()));
2883    return NULL;
2884  }
2885  case ARMISD::CMOV:
2886    return SelectCMOVOp(N);
2887  case ARMISD::VZIP: {
2888    unsigned Opc = 0;
2889    EVT VT = N->getValueType(0);
2890    switch (VT.getSimpleVT().SimpleTy) {
2891    default: return NULL;
2892    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2893    case MVT::v4i16: Opc = ARM::VZIPd16; break;
2894    case MVT::v2f32:
2895    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2896    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2897    case MVT::v16i8: Opc = ARM::VZIPq8; break;
2898    case MVT::v8i16: Opc = ARM::VZIPq16; break;
2899    case MVT::v4f32:
2900    case MVT::v4i32: Opc = ARM::VZIPq32; break;
2901    }
2902    SDValue Pred = getAL(CurDAG);
2903    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2904    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2905    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2906  }
2907  case ARMISD::VUZP: {
2908    unsigned Opc = 0;
2909    EVT VT = N->getValueType(0);
2910    switch (VT.getSimpleVT().SimpleTy) {
2911    default: return NULL;
2912    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2913    case MVT::v4i16: Opc = ARM::VUZPd16; break;
2914    case MVT::v2f32:
2915    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2916    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2917    case MVT::v16i8: Opc = ARM::VUZPq8; break;
2918    case MVT::v8i16: Opc = ARM::VUZPq16; break;
2919    case MVT::v4f32:
2920    case MVT::v4i32: Opc = ARM::VUZPq32; break;
2921    }
2922    SDValue Pred = getAL(CurDAG);
2923    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2924    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2925    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2926  }
2927  case ARMISD::VTRN: {
2928    unsigned Opc = 0;
2929    EVT VT = N->getValueType(0);
2930    switch (VT.getSimpleVT().SimpleTy) {
2931    default: return NULL;
2932    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2933    case MVT::v4i16: Opc = ARM::VTRNd16; break;
2934    case MVT::v2f32:
2935    case MVT::v2i32: Opc = ARM::VTRNd32; break;
2936    case MVT::v16i8: Opc = ARM::VTRNq8; break;
2937    case MVT::v8i16: Opc = ARM::VTRNq16; break;
2938    case MVT::v4f32:
2939    case MVT::v4i32: Opc = ARM::VTRNq32; break;
2940    }
2941    SDValue Pred = getAL(CurDAG);
2942    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2943    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2944    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2945  }
2946  case ARMISD::BUILD_VECTOR: {
2947    EVT VecVT = N->getValueType(0);
2948    EVT EltVT = VecVT.getVectorElementType();
2949    unsigned NumElts = VecVT.getVectorNumElements();
2950    if (EltVT == MVT::f64) {
2951      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2952      return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2953    }
2954    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2955    if (NumElts == 2)
2956      return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2957    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2958    return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2959                     N->getOperand(2), N->getOperand(3));
2960  }
2961
2962  case ARMISD::VLD2DUP: {
2963    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2964                                        ARM::VLD2DUPd32 };
2965    return SelectVLDDup(N, false, 2, Opcodes);
2966  }
2967
2968  case ARMISD::VLD3DUP: {
2969    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2970                                        ARM::VLD3DUPd16Pseudo,
2971                                        ARM::VLD3DUPd32Pseudo };
2972    return SelectVLDDup(N, false, 3, Opcodes);
2973  }
2974
2975  case ARMISD::VLD4DUP: {
2976    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2977                                        ARM::VLD4DUPd16Pseudo,
2978                                        ARM::VLD4DUPd32Pseudo };
2979    return SelectVLDDup(N, false, 4, Opcodes);
2980  }
2981
2982  case ARMISD::VLD2DUP_UPD: {
2983    static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2984                                        ARM::VLD2DUPd16wb_fixed,
2985                                        ARM::VLD2DUPd32wb_fixed };
2986    return SelectVLDDup(N, true, 2, Opcodes);
2987  }
2988
2989  case ARMISD::VLD3DUP_UPD: {
2990    static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2991                                        ARM::VLD3DUPd16Pseudo_UPD,
2992                                        ARM::VLD3DUPd32Pseudo_UPD };
2993    return SelectVLDDup(N, true, 3, Opcodes);
2994  }
2995
2996  case ARMISD::VLD4DUP_UPD: {
2997    static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2998                                        ARM::VLD4DUPd16Pseudo_UPD,
2999                                        ARM::VLD4DUPd32Pseudo_UPD };
3000    return SelectVLDDup(N, true, 4, Opcodes);
3001  }
3002
3003  case ARMISD::VLD1_UPD: {
3004    static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3005                                         ARM::VLD1d16wb_fixed,
3006                                         ARM::VLD1d32wb_fixed,
3007                                         ARM::VLD1d64wb_fixed };
3008    static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3009                                         ARM::VLD1q16wb_fixed,
3010                                         ARM::VLD1q32wb_fixed,
3011                                         ARM::VLD1q64wb_fixed };
3012    return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
3013  }
3014
3015  case ARMISD::VLD2_UPD: {
3016    static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3017                                         ARM::VLD2d16wb_fixed,
3018                                         ARM::VLD2d32wb_fixed,
3019                                         ARM::VLD1q64wb_fixed};
3020    static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3021                                         ARM::VLD2q16PseudoWB_fixed,
3022                                         ARM::VLD2q32PseudoWB_fixed };
3023    return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
3024  }
3025
3026  case ARMISD::VLD3_UPD: {
3027    static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3028                                         ARM::VLD3d16Pseudo_UPD,
3029                                         ARM::VLD3d32Pseudo_UPD,
3030                                         ARM::VLD1q64wb_fixed};
3031    static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3032                                          ARM::VLD3q16Pseudo_UPD,
3033                                          ARM::VLD3q32Pseudo_UPD };
3034    static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3035                                          ARM::VLD3q16oddPseudo_UPD,
3036                                          ARM::VLD3q32oddPseudo_UPD };
3037    return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3038  }
3039
3040  case ARMISD::VLD4_UPD: {
3041    static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3042                                         ARM::VLD4d16Pseudo_UPD,
3043                                         ARM::VLD4d32Pseudo_UPD,
3044                                         ARM::VLD1q64wb_fixed};
3045    static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3046                                          ARM::VLD4q16Pseudo_UPD,
3047                                          ARM::VLD4q32Pseudo_UPD };
3048    static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3049                                          ARM::VLD4q16oddPseudo_UPD,
3050                                          ARM::VLD4q32oddPseudo_UPD };
3051    return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3052  }
3053
3054  case ARMISD::VLD2LN_UPD: {
3055    static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3056                                         ARM::VLD2LNd16Pseudo_UPD,
3057                                         ARM::VLD2LNd32Pseudo_UPD };
3058    static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3059                                         ARM::VLD2LNq32Pseudo_UPD };
3060    return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3061  }
3062
3063  case ARMISD::VLD3LN_UPD: {
3064    static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3065                                         ARM::VLD3LNd16Pseudo_UPD,
3066                                         ARM::VLD3LNd32Pseudo_UPD };
3067    static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3068                                         ARM::VLD3LNq32Pseudo_UPD };
3069    return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3070  }
3071
3072  case ARMISD::VLD4LN_UPD: {
3073    static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3074                                         ARM::VLD4LNd16Pseudo_UPD,
3075                                         ARM::VLD4LNd32Pseudo_UPD };
3076    static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3077                                         ARM::VLD4LNq32Pseudo_UPD };
3078    return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3079  }
3080
3081  case ARMISD::VST1_UPD: {
3082    static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3083                                         ARM::VST1d16wb_fixed,
3084                                         ARM::VST1d32wb_fixed,
3085                                         ARM::VST1d64wb_fixed };
3086    static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3087                                         ARM::VST1q16wb_fixed,
3088                                         ARM::VST1q32wb_fixed,
3089                                         ARM::VST1q64wb_fixed };
3090    return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
3091  }
3092
3093  case ARMISD::VST2_UPD: {
3094    static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3095                                         ARM::VST2d16wb_fixed,
3096                                         ARM::VST2d32wb_fixed,
3097                                         ARM::VST1q64wb_fixed};
3098    static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3099                                         ARM::VST2q16PseudoWB_fixed,
3100                                         ARM::VST2q32PseudoWB_fixed };
3101    return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
3102  }
3103
3104  case ARMISD::VST3_UPD: {
3105    static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3106                                         ARM::VST3d16Pseudo_UPD,
3107                                         ARM::VST3d32Pseudo_UPD,
3108                                         ARM::VST1d64TPseudoWB_fixed};
3109    static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3110                                          ARM::VST3q16Pseudo_UPD,
3111                                          ARM::VST3q32Pseudo_UPD };
3112    static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3113                                          ARM::VST3q16oddPseudo_UPD,
3114                                          ARM::VST3q32oddPseudo_UPD };
3115    return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3116  }
3117
3118  case ARMISD::VST4_UPD: {
3119    static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3120                                         ARM::VST4d16Pseudo_UPD,
3121                                         ARM::VST4d32Pseudo_UPD,
3122                                         ARM::VST1d64QPseudoWB_fixed};
3123    static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3124                                          ARM::VST4q16Pseudo_UPD,
3125                                          ARM::VST4q32Pseudo_UPD };
3126    static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3127                                          ARM::VST4q16oddPseudo_UPD,
3128                                          ARM::VST4q32oddPseudo_UPD };
3129    return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3130  }
3131
3132  case ARMISD::VST2LN_UPD: {
3133    static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3134                                         ARM::VST2LNd16Pseudo_UPD,
3135                                         ARM::VST2LNd32Pseudo_UPD };
3136    static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3137                                         ARM::VST2LNq32Pseudo_UPD };
3138    return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3139  }
3140
3141  case ARMISD::VST3LN_UPD: {
3142    static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3143                                         ARM::VST3LNd16Pseudo_UPD,
3144                                         ARM::VST3LNd32Pseudo_UPD };
3145    static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3146                                         ARM::VST3LNq32Pseudo_UPD };
3147    return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3148  }
3149
3150  case ARMISD::VST4LN_UPD: {
3151    static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3152                                         ARM::VST4LNd16Pseudo_UPD,
3153                                         ARM::VST4LNd32Pseudo_UPD };
3154    static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3155                                         ARM::VST4LNq32Pseudo_UPD };
3156    return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3157  }
3158
3159  case ISD::INTRINSIC_VOID:
3160  case ISD::INTRINSIC_W_CHAIN: {
3161    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3162    switch (IntNo) {
3163    default:
3164      break;
3165
3166    case Intrinsic::arm_ldrexd: {
3167      SDValue MemAddr = N->getOperand(2);
3168      SDLoc dl(N);
3169      SDValue Chain = N->getOperand(0);
3170
3171      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3172      unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
3173
3174      // arm_ldrexd returns a i64 value in {i32, i32}
3175      std::vector<EVT> ResTys;
3176      if (isThumb) {
3177        ResTys.push_back(MVT::i32);
3178        ResTys.push_back(MVT::i32);
3179      } else
3180        ResTys.push_back(MVT::Untyped);
3181      ResTys.push_back(MVT::Other);
3182
3183      // Place arguments in the right order.
3184      SmallVector<SDValue, 7> Ops;
3185      Ops.push_back(MemAddr);
3186      Ops.push_back(getAL(CurDAG));
3187      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3188      Ops.push_back(Chain);
3189      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3190      // Transfer memoperands.
3191      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3192      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3193      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3194
3195      // Remap uses.
3196      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3197      if (!SDValue(N, 0).use_empty()) {
3198        SDValue Result;
3199        if (isThumb)
3200          Result = SDValue(Ld, 0);
3201        else {
3202          SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
3203          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3204              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3205          Result = SDValue(ResNode,0);
3206        }
3207        ReplaceUses(SDValue(N, 0), Result);
3208      }
3209      if (!SDValue(N, 1).use_empty()) {
3210        SDValue Result;
3211        if (isThumb)
3212          Result = SDValue(Ld, 1);
3213        else {
3214          SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
3215          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3216              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3217          Result = SDValue(ResNode,0);
3218        }
3219        ReplaceUses(SDValue(N, 1), Result);
3220      }
3221      ReplaceUses(SDValue(N, 2), OutChain);
3222      return NULL;
3223    }
3224
3225    case Intrinsic::arm_strexd: {
3226      SDLoc dl(N);
3227      SDValue Chain = N->getOperand(0);
3228      SDValue Val0 = N->getOperand(2);
3229      SDValue Val1 = N->getOperand(3);
3230      SDValue MemAddr = N->getOperand(4);
3231
3232      // Store exclusive double return a i32 value which is the return status
3233      // of the issued store.
3234      EVT ResTys[] = { MVT::i32, MVT::Other };
3235
3236      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3237      // Place arguments in the right order.
3238      SmallVector<SDValue, 7> Ops;
3239      if (isThumb) {
3240        Ops.push_back(Val0);
3241        Ops.push_back(Val1);
3242      } else
3243        // arm_strexd uses GPRPair.
3244        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3245      Ops.push_back(MemAddr);
3246      Ops.push_back(getAL(CurDAG));
3247      Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3248      Ops.push_back(Chain);
3249
3250      unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
3251
3252      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3253      // Transfer memoperands.
3254      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3255      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3256      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3257
3258      return St;
3259    }
3260
3261    case Intrinsic::arm_neon_vld1: {
3262      static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3263                                           ARM::VLD1d32, ARM::VLD1d64 };
3264      static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3265                                           ARM::VLD1q32, ARM::VLD1q64};
3266      return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
3267    }
3268
3269    case Intrinsic::arm_neon_vld2: {
3270      static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3271                                           ARM::VLD2d32, ARM::VLD1q64 };
3272      static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3273                                           ARM::VLD2q32Pseudo };
3274      return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
3275    }
3276
3277    case Intrinsic::arm_neon_vld3: {
3278      static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3279                                           ARM::VLD3d16Pseudo,
3280                                           ARM::VLD3d32Pseudo,
3281                                           ARM::VLD1d64TPseudo };
3282      static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3283                                            ARM::VLD3q16Pseudo_UPD,
3284                                            ARM::VLD3q32Pseudo_UPD };
3285      static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3286                                            ARM::VLD3q16oddPseudo,
3287                                            ARM::VLD3q32oddPseudo };
3288      return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3289    }
3290
3291    case Intrinsic::arm_neon_vld4: {
3292      static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3293                                           ARM::VLD4d16Pseudo,
3294                                           ARM::VLD4d32Pseudo,
3295                                           ARM::VLD1d64QPseudo };
3296      static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3297                                            ARM::VLD4q16Pseudo_UPD,
3298                                            ARM::VLD4q32Pseudo_UPD };
3299      static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3300                                            ARM::VLD4q16oddPseudo,
3301                                            ARM::VLD4q32oddPseudo };
3302      return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3303    }
3304
3305    case Intrinsic::arm_neon_vld2lane: {
3306      static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3307                                           ARM::VLD2LNd16Pseudo,
3308                                           ARM::VLD2LNd32Pseudo };
3309      static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3310                                           ARM::VLD2LNq32Pseudo };
3311      return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3312    }
3313
3314    case Intrinsic::arm_neon_vld3lane: {
3315      static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3316                                           ARM::VLD3LNd16Pseudo,
3317                                           ARM::VLD3LNd32Pseudo };
3318      static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3319                                           ARM::VLD3LNq32Pseudo };
3320      return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3321    }
3322
3323    case Intrinsic::arm_neon_vld4lane: {
3324      static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3325                                           ARM::VLD4LNd16Pseudo,
3326                                           ARM::VLD4LNd32Pseudo };
3327      static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3328                                           ARM::VLD4LNq32Pseudo };
3329      return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3330    }
3331
3332    case Intrinsic::arm_neon_vst1: {
3333      static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3334                                           ARM::VST1d32, ARM::VST1d64 };
3335      static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3336                                           ARM::VST1q32, ARM::VST1q64 };
3337      return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
3338    }
3339
3340    case Intrinsic::arm_neon_vst2: {
3341      static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3342                                           ARM::VST2d32, ARM::VST1q64 };
3343      static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3344                                     ARM::VST2q32Pseudo };
3345      return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
3346    }
3347
3348    case Intrinsic::arm_neon_vst3: {
3349      static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3350                                           ARM::VST3d16Pseudo,
3351                                           ARM::VST3d32Pseudo,
3352                                           ARM::VST1d64TPseudo };
3353      static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3354                                            ARM::VST3q16Pseudo_UPD,
3355                                            ARM::VST3q32Pseudo_UPD };
3356      static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3357                                            ARM::VST3q16oddPseudo,
3358                                            ARM::VST3q32oddPseudo };
3359      return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3360    }
3361
3362    case Intrinsic::arm_neon_vst4: {
3363      static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3364                                           ARM::VST4d16Pseudo,
3365                                           ARM::VST4d32Pseudo,
3366                                           ARM::VST1d64QPseudo };
3367      static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3368                                            ARM::VST4q16Pseudo_UPD,
3369                                            ARM::VST4q32Pseudo_UPD };
3370      static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3371                                            ARM::VST4q16oddPseudo,
3372                                            ARM::VST4q32oddPseudo };
3373      return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3374    }
3375
3376    case Intrinsic::arm_neon_vst2lane: {
3377      static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3378                                           ARM::VST2LNd16Pseudo,
3379                                           ARM::VST2LNd32Pseudo };
3380      static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3381                                           ARM::VST2LNq32Pseudo };
3382      return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3383    }
3384
3385    case Intrinsic::arm_neon_vst3lane: {
3386      static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3387                                           ARM::VST3LNd16Pseudo,
3388                                           ARM::VST3LNd32Pseudo };
3389      static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3390                                           ARM::VST3LNq32Pseudo };
3391      return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3392    }
3393
3394    case Intrinsic::arm_neon_vst4lane: {
3395      static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3396                                           ARM::VST4LNd16Pseudo,
3397                                           ARM::VST4LNd32Pseudo };
3398      static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3399                                           ARM::VST4LNq32Pseudo };
3400      return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3401    }
3402    }
3403    break;
3404  }
3405
3406  case ISD::INTRINSIC_WO_CHAIN: {
3407    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3408    switch (IntNo) {
3409    default:
3410      break;
3411
3412    case Intrinsic::arm_neon_vtbl2:
3413      return SelectVTBL(N, false, 2, ARM::VTBL2);
3414    case Intrinsic::arm_neon_vtbl3:
3415      return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3416    case Intrinsic::arm_neon_vtbl4:
3417      return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3418
3419    case Intrinsic::arm_neon_vtbx2:
3420      return SelectVTBL(N, true, 2, ARM::VTBX2);
3421    case Intrinsic::arm_neon_vtbx3:
3422      return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3423    case Intrinsic::arm_neon_vtbx4:
3424      return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3425    }
3426    break;
3427  }
3428
3429  case ARMISD::VTBL1: {
3430    SDLoc dl(N);
3431    EVT VT = N->getValueType(0);
3432    SmallVector<SDValue, 6> Ops;
3433
3434    Ops.push_back(N->getOperand(0));
3435    Ops.push_back(N->getOperand(1));
3436    Ops.push_back(getAL(CurDAG));                    // Predicate
3437    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3438    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3439  }
3440  case ARMISD::VTBL2: {
3441    SDLoc dl(N);
3442    EVT VT = N->getValueType(0);
3443
3444    // Form a REG_SEQUENCE to force register allocation.
3445    SDValue V0 = N->getOperand(0);
3446    SDValue V1 = N->getOperand(1);
3447    SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3448
3449    SmallVector<SDValue, 6> Ops;
3450    Ops.push_back(RegSeq);
3451    Ops.push_back(N->getOperand(2));
3452    Ops.push_back(getAL(CurDAG));                    // Predicate
3453    Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3454    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3455  }
3456
3457  case ISD::CONCAT_VECTORS:
3458    return SelectConcatVector(N);
3459
3460  case ARMISD::ATOMOR64_DAG:
3461    return SelectAtomic64(N, ARM::ATOMOR6432);
3462  case ARMISD::ATOMXOR64_DAG:
3463    return SelectAtomic64(N, ARM::ATOMXOR6432);
3464  case ARMISD::ATOMADD64_DAG:
3465    return SelectAtomic64(N, ARM::ATOMADD6432);
3466  case ARMISD::ATOMSUB64_DAG:
3467    return SelectAtomic64(N, ARM::ATOMSUB6432);
3468  case ARMISD::ATOMNAND64_DAG:
3469    return SelectAtomic64(N, ARM::ATOMNAND6432);
3470  case ARMISD::ATOMAND64_DAG:
3471    return SelectAtomic64(N, ARM::ATOMAND6432);
3472  case ARMISD::ATOMSWAP64_DAG:
3473    return SelectAtomic64(N, ARM::ATOMSWAP6432);
3474  case ARMISD::ATOMCMPXCHG64_DAG:
3475    return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
3476
3477  case ARMISD::ATOMMIN64_DAG:
3478    return SelectAtomic64(N, ARM::ATOMMIN6432);
3479  case ARMISD::ATOMUMIN64_DAG:
3480    return SelectAtomic64(N, ARM::ATOMUMIN6432);
3481  case ARMISD::ATOMMAX64_DAG:
3482    return SelectAtomic64(N, ARM::ATOMMAX6432);
3483  case ARMISD::ATOMUMAX64_DAG:
3484    return SelectAtomic64(N, ARM::ATOMUMAX6432);
3485  }
3486
3487  return SelectCode(N);
3488}
3489
3490SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3491  std::vector<SDValue> AsmNodeOperands;
3492  unsigned Flag, Kind;
3493  bool Changed = false;
3494  unsigned NumOps = N->getNumOperands();
3495
3496  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3497  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3498  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3499  // respectively. Since there is no constraint to explicitly specify a
3500  // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3501  // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3502  // them into a GPRPair.
3503
3504  SDLoc dl(N);
3505  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0);
3506
3507  SmallVector<bool, 8> OpChanged;
3508  // Glue node will be appended late.
3509  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3510    SDValue op = N->getOperand(i);
3511    AsmNodeOperands.push_back(op);
3512
3513    if (i < InlineAsm::Op_FirstOperand)
3514      continue;
3515
3516    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3517      Flag = C->getZExtValue();
3518      Kind = InlineAsm::getKind(Flag);
3519    }
3520    else
3521      continue;
3522
3523    // Immediate operands to inline asm in the SelectionDAG are modeled with
3524    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3525    // the second is a constant with the value of the immediate. If we get here
3526    // and we have a Kind_Imm, skip the next operand, and continue.
3527    if (Kind == InlineAsm::Kind_Imm) {
3528      SDValue op = N->getOperand(++i);
3529      AsmNodeOperands.push_back(op);
3530      continue;
3531    }
3532
3533    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3534    if (NumRegs)
3535      OpChanged.push_back(false);
3536
3537    unsigned DefIdx = 0;
3538    bool IsTiedToChangedOp = false;
3539    // If it's a use that is tied with a previous def, it has no
3540    // reg class constraint.
3541    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3542      IsTiedToChangedOp = OpChanged[DefIdx];
3543
3544    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3545        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3546      continue;
3547
3548    unsigned RC;
3549    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3550    if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3551        || NumRegs != 2)
3552      continue;
3553
3554    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3555    SDValue V0 = N->getOperand(i+1);
3556    SDValue V1 = N->getOperand(i+2);
3557    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3558    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3559    SDValue PairedReg;
3560    MachineRegisterInfo &MRI = MF->getRegInfo();
3561
3562    if (Kind == InlineAsm::Kind_RegDef ||
3563        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3564      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3565      // the original GPRs.
3566
3567      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3568      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3569      SDValue Chain = SDValue(N,0);
3570
3571      SDNode *GU = N->getGluedUser();
3572      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3573                                               Chain.getValue(1));
3574
3575      // Extract values from a GPRPair reg and copy to the original GPR reg.
3576      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3577                                                    RegCopy);
3578      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3579                                                    RegCopy);
3580      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3581                                        RegCopy.getValue(1));
3582      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3583
3584      // Update the original glue user.
3585      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3586      Ops.push_back(T1.getValue(1));
3587      CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
3588      GU = T1.getNode();
3589    }
3590    else {
3591      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3592      // GPRPair and then pass the GPRPair to the inline asm.
3593      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3594
3595      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3596      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3597                                          Chain.getValue(1));
3598      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3599                                          T0.getValue(1));
3600      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3601
3602      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3603      // i32 VRs of inline asm with it.
3604      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3605      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3606      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3607
3608      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3609      Glue = Chain.getValue(1);
3610    }
3611
3612    Changed = true;
3613
3614    if(PairedReg.getNode()) {
3615      OpChanged[OpChanged.size() -1 ] = true;
3616      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3617      Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3618      // Replace the current flag.
3619      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3620          Flag, MVT::i32);
3621      // Add the new register node and skip the original two GPRs.
3622      AsmNodeOperands.push_back(PairedReg);
3623      // Skip the next two GPRs.
3624      i += 2;
3625    }
3626  }
3627
3628  if (Glue.getNode())
3629    AsmNodeOperands.push_back(Glue);
3630  if (!Changed)
3631    return NULL;
3632
3633  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3634      CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
3635                        AsmNodeOperands.size());
3636  New->setNodeId(-1);
3637  return New.getNode();
3638}
3639
3640
3641bool ARMDAGToDAGISel::
3642SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
3643                             std::vector<SDValue> &OutOps) {
3644  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
3645  // Require the address to be in a register.  That is safe for all ARM
3646  // variants and it is hard to do anything much smarter without knowing
3647  // how the operand is used.
3648  OutOps.push_back(Op);
3649  return false;
3650}
3651
3652/// createARMISelDag - This pass converts a legalized DAG into a
3653/// ARM-specific DAG, ready for instruction scheduling.
3654///
3655FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3656                                     CodeGenOpt::Level OptLevel) {
3657  return new ARMDAGToDAGISel(TM, OptLevel);
3658}
3659