ARMBaseInstrInfo.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the Base ARM implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMConstantPoolValue.h"
18#include "ARMFeatures.h"
19#include "ARMHazardRecognizer.h"
20#include "ARMMachineFunctionInfo.h"
21#include "MCTargetDesc/ARMAddressingModes.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/CodeGen/LiveVariables.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineJumpTableInfo.h"
28#include "llvm/CodeGen/MachineMemOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/SelectionDAGNodes.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/GlobalValue.h"
34#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/Support/BranchProbability.h"
36#include "llvm/Support/CommandLine.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/ErrorHandling.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "arm-instrinfo"
43
44#define GET_INSTRINFO_CTOR_DTOR
45#include "ARMGenInstrInfo.inc"
46
47static cl::opt<bool>
48EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
49               cl::desc("Enable ARM 2-addr to 3-addr conv"));
50
51static cl::opt<bool>
52WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
53           cl::desc("Widen ARM vmovs to vmovd when possible"));
54
55static cl::opt<unsigned>
56SwiftPartialUpdateClearance("swift-partial-update-clearance",
57     cl::Hidden, cl::init(12),
58     cl::desc("Clearance before partial register updates"));
59
60/// ARM_MLxEntry - Record information about MLA / MLS instructions.
61struct ARM_MLxEntry {
62  uint16_t MLxOpc;     // MLA / MLS opcode
63  uint16_t MulOpc;     // Expanded multiplication opcode
64  uint16_t AddSubOpc;  // Expanded add / sub opcode
65  bool NegAcc;         // True if the acc is negated before the add / sub.
66  bool HasLane;        // True if instruction has an extra "lane" operand.
67};
68
69static const ARM_MLxEntry ARM_MLxTable[] = {
70  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
71  // fp scalar ops
72  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
73  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
74  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
75  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
76  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
77  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
78  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
79  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
80
81  // fp SIMD ops
82  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
83  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
84  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
85  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
86  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
87  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
88  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
89  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
90};
91
92ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
93  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
94    Subtarget(STI) {
95  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
96    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
97      assert(false && "Duplicated entries?");
98    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
99    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
100  }
101}
102
103// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
104// currently defaults to no prepass hazard recognizer.
105ScheduleHazardRecognizer *ARMBaseInstrInfo::
106CreateTargetHazardRecognizer(const TargetMachine *TM,
107                             const ScheduleDAG *DAG) const {
108  if (usePreRAHazardRecognizer()) {
109    const InstrItineraryData *II = TM->getInstrItineraryData();
110    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
111  }
112  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
113}
114
115ScheduleHazardRecognizer *ARMBaseInstrInfo::
116CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
117                                   const ScheduleDAG *DAG) const {
118  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
119    return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
120  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
121}
122
123MachineInstr *
124ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
125                                        MachineBasicBlock::iterator &MBBI,
126                                        LiveVariables *LV) const {
127  // FIXME: Thumb2 support.
128
129  if (!EnableARM3Addr)
130    return nullptr;
131
132  MachineInstr *MI = MBBI;
133  MachineFunction &MF = *MI->getParent()->getParent();
134  uint64_t TSFlags = MI->getDesc().TSFlags;
135  bool isPre = false;
136  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
137  default: return nullptr;
138  case ARMII::IndexModePre:
139    isPre = true;
140    break;
141  case ARMII::IndexModePost:
142    break;
143  }
144
145  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
146  // operation.
147  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
148  if (MemOpc == 0)
149    return nullptr;
150
151  MachineInstr *UpdateMI = nullptr;
152  MachineInstr *MemMI = nullptr;
153  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
154  const MCInstrDesc &MCID = MI->getDesc();
155  unsigned NumOps = MCID.getNumOperands();
156  bool isLoad = !MI->mayStore();
157  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
158  const MachineOperand &Base = MI->getOperand(2);
159  const MachineOperand &Offset = MI->getOperand(NumOps-3);
160  unsigned WBReg = WB.getReg();
161  unsigned BaseReg = Base.getReg();
162  unsigned OffReg = Offset.getReg();
163  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
164  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
165  switch (AddrMode) {
166  default: llvm_unreachable("Unknown indexed op!");
167  case ARMII::AddrMode2: {
168    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
169    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
170    if (OffReg == 0) {
171      if (ARM_AM::getSOImmVal(Amt) == -1)
172        // Can't encode it in a so_imm operand. This transformation will
173        // add more than 1 instruction. Abandon!
174        return nullptr;
175      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
176                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
177        .addReg(BaseReg).addImm(Amt)
178        .addImm(Pred).addReg(0).addReg(0);
179    } else if (Amt != 0) {
180      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
181      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
182      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
183                         get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
184        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
185        .addImm(Pred).addReg(0).addReg(0);
186    } else
187      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
188                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
189        .addReg(BaseReg).addReg(OffReg)
190        .addImm(Pred).addReg(0).addReg(0);
191    break;
192  }
193  case ARMII::AddrMode3 : {
194    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
195    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
196    if (OffReg == 0)
197      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
198      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
199                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
200        .addReg(BaseReg).addImm(Amt)
201        .addImm(Pred).addReg(0).addReg(0);
202    else
203      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
204                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
205        .addReg(BaseReg).addReg(OffReg)
206        .addImm(Pred).addReg(0).addReg(0);
207    break;
208  }
209  }
210
211  std::vector<MachineInstr*> NewMIs;
212  if (isPre) {
213    if (isLoad)
214      MemMI = BuildMI(MF, MI->getDebugLoc(),
215                      get(MemOpc), MI->getOperand(0).getReg())
216        .addReg(WBReg).addImm(0).addImm(Pred);
217    else
218      MemMI = BuildMI(MF, MI->getDebugLoc(),
219                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
220        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
221    NewMIs.push_back(MemMI);
222    NewMIs.push_back(UpdateMI);
223  } else {
224    if (isLoad)
225      MemMI = BuildMI(MF, MI->getDebugLoc(),
226                      get(MemOpc), MI->getOperand(0).getReg())
227        .addReg(BaseReg).addImm(0).addImm(Pred);
228    else
229      MemMI = BuildMI(MF, MI->getDebugLoc(),
230                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
231        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
232    if (WB.isDead())
233      UpdateMI->getOperand(0).setIsDead();
234    NewMIs.push_back(UpdateMI);
235    NewMIs.push_back(MemMI);
236  }
237
238  // Transfer LiveVariables states, kill / dead info.
239  if (LV) {
240    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
241      MachineOperand &MO = MI->getOperand(i);
242      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
243        unsigned Reg = MO.getReg();
244
245        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
246        if (MO.isDef()) {
247          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
248          if (MO.isDead())
249            LV->addVirtualRegisterDead(Reg, NewMI);
250        }
251        if (MO.isUse() && MO.isKill()) {
252          for (unsigned j = 0; j < 2; ++j) {
253            // Look at the two new MI's in reverse order.
254            MachineInstr *NewMI = NewMIs[j];
255            if (!NewMI->readsRegister(Reg))
256              continue;
257            LV->addVirtualRegisterKilled(Reg, NewMI);
258            if (VI.removeKill(MI))
259              VI.Kills.push_back(NewMI);
260            break;
261          }
262        }
263      }
264    }
265  }
266
267  MFI->insert(MBBI, NewMIs[1]);
268  MFI->insert(MBBI, NewMIs[0]);
269  return NewMIs[0];
270}
271
272// Branch analysis.
273bool
274ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
275                                MachineBasicBlock *&FBB,
276                                SmallVectorImpl<MachineOperand> &Cond,
277                                bool AllowModify) const {
278  TBB = nullptr;
279  FBB = nullptr;
280
281  MachineBasicBlock::iterator I = MBB.end();
282  if (I == MBB.begin())
283    return false; // Empty blocks are easy.
284  --I;
285
286  // Walk backwards from the end of the basic block until the branch is
287  // analyzed or we give up.
288  while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) {
289
290    // Flag to be raised on unanalyzeable instructions. This is useful in cases
291    // where we want to clean up on the end of the basic block before we bail
292    // out.
293    bool CantAnalyze = false;
294
295    // Skip over DEBUG values and predicated nonterminators.
296    while (I->isDebugValue() || !I->isTerminator()) {
297      if (I == MBB.begin())
298        return false;
299      --I;
300    }
301
302    if (isIndirectBranchOpcode(I->getOpcode()) ||
303        isJumpTableBranchOpcode(I->getOpcode())) {
304      // Indirect branches and jump tables can't be analyzed, but we still want
305      // to clean up any instructions at the tail of the basic block.
306      CantAnalyze = true;
307    } else if (isUncondBranchOpcode(I->getOpcode())) {
308      TBB = I->getOperand(0).getMBB();
309    } else if (isCondBranchOpcode(I->getOpcode())) {
310      // Bail out if we encounter multiple conditional branches.
311      if (!Cond.empty())
312        return true;
313
314      assert(!FBB && "FBB should have been null.");
315      FBB = TBB;
316      TBB = I->getOperand(0).getMBB();
317      Cond.push_back(I->getOperand(1));
318      Cond.push_back(I->getOperand(2));
319    } else if (I->isReturn()) {
320      // Returns can't be analyzed, but we should run cleanup.
321      CantAnalyze = !isPredicated(I);
322    } else {
323      // We encountered other unrecognized terminator. Bail out immediately.
324      return true;
325    }
326
327    // Cleanup code - to be run for unpredicated unconditional branches and
328    //                returns.
329    if (!isPredicated(I) &&
330          (isUncondBranchOpcode(I->getOpcode()) ||
331           isIndirectBranchOpcode(I->getOpcode()) ||
332           isJumpTableBranchOpcode(I->getOpcode()) ||
333           I->isReturn())) {
334      // Forget any previous condition branch information - it no longer applies.
335      Cond.clear();
336      FBB = nullptr;
337
338      // If we can modify the function, delete everything below this
339      // unconditional branch.
340      if (AllowModify) {
341        MachineBasicBlock::iterator DI = std::next(I);
342        while (DI != MBB.end()) {
343          MachineInstr *InstToDelete = DI;
344          ++DI;
345          InstToDelete->eraseFromParent();
346        }
347      }
348    }
349
350    if (CantAnalyze)
351      return true;
352
353    if (I == MBB.begin())
354      return false;
355
356    --I;
357  }
358
359  // We made it past the terminators without bailing out - we must have
360  // analyzed this branch successfully.
361  return false;
362}
363
364
365unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
366  MachineBasicBlock::iterator I = MBB.end();
367  if (I == MBB.begin()) return 0;
368  --I;
369  while (I->isDebugValue()) {
370    if (I == MBB.begin())
371      return 0;
372    --I;
373  }
374  if (!isUncondBranchOpcode(I->getOpcode()) &&
375      !isCondBranchOpcode(I->getOpcode()))
376    return 0;
377
378  // Remove the branch.
379  I->eraseFromParent();
380
381  I = MBB.end();
382
383  if (I == MBB.begin()) return 1;
384  --I;
385  if (!isCondBranchOpcode(I->getOpcode()))
386    return 1;
387
388  // Remove the branch.
389  I->eraseFromParent();
390  return 2;
391}
392
393unsigned
394ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
395                               MachineBasicBlock *FBB,
396                               const SmallVectorImpl<MachineOperand> &Cond,
397                               DebugLoc DL) const {
398  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
399  int BOpc   = !AFI->isThumbFunction()
400    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
401  int BccOpc = !AFI->isThumbFunction()
402    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
403  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
404
405  // Shouldn't be a fall through.
406  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
407  assert((Cond.size() == 2 || Cond.size() == 0) &&
408         "ARM branch conditions have two components!");
409
410  if (!FBB) {
411    if (Cond.empty()) { // Unconditional branch?
412      if (isThumb)
413        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
414      else
415        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
416    } else
417      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
418        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
419    return 1;
420  }
421
422  // Two-way conditional branch.
423  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
424    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
425  if (isThumb)
426    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
427  else
428    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
429  return 2;
430}
431
432bool ARMBaseInstrInfo::
433ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
434  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
435  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
436  return false;
437}
438
439bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
440  if (MI->isBundle()) {
441    MachineBasicBlock::const_instr_iterator I = MI;
442    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
443    while (++I != E && I->isInsideBundle()) {
444      int PIdx = I->findFirstPredOperandIdx();
445      if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
446        return true;
447    }
448    return false;
449  }
450
451  int PIdx = MI->findFirstPredOperandIdx();
452  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
453}
454
455bool ARMBaseInstrInfo::
456PredicateInstruction(MachineInstr *MI,
457                     const SmallVectorImpl<MachineOperand> &Pred) const {
458  unsigned Opc = MI->getOpcode();
459  if (isUncondBranchOpcode(Opc)) {
460    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
461    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
462      .addImm(Pred[0].getImm())
463      .addReg(Pred[1].getReg());
464    return true;
465  }
466
467  int PIdx = MI->findFirstPredOperandIdx();
468  if (PIdx != -1) {
469    MachineOperand &PMO = MI->getOperand(PIdx);
470    PMO.setImm(Pred[0].getImm());
471    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
472    return true;
473  }
474  return false;
475}
476
477bool ARMBaseInstrInfo::
478SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
479                  const SmallVectorImpl<MachineOperand> &Pred2) const {
480  if (Pred1.size() > 2 || Pred2.size() > 2)
481    return false;
482
483  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
484  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
485  if (CC1 == CC2)
486    return true;
487
488  switch (CC1) {
489  default:
490    return false;
491  case ARMCC::AL:
492    return true;
493  case ARMCC::HS:
494    return CC2 == ARMCC::HI;
495  case ARMCC::LS:
496    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
497  case ARMCC::GE:
498    return CC2 == ARMCC::GT;
499  case ARMCC::LE:
500    return CC2 == ARMCC::LT;
501  }
502}
503
504bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
505                                    std::vector<MachineOperand> &Pred) const {
506  bool Found = false;
507  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
508    const MachineOperand &MO = MI->getOperand(i);
509    if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
510        (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
511      Pred.push_back(MO);
512      Found = true;
513    }
514  }
515
516  return Found;
517}
518
519/// isPredicable - Return true if the specified instruction can be predicated.
520/// By default, this returns true for every instruction with a
521/// PredicateOperand.
522bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
523  if (!MI->isPredicable())
524    return false;
525
526  ARMFunctionInfo *AFI =
527    MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
528
529  if (AFI->isThumb2Function()) {
530    if (getSubtarget().restrictIT())
531      return isV8EligibleForIT(MI);
532  } else { // non-Thumb
533    if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
534      return false;
535  }
536
537  return true;
538}
539
540namespace llvm {
541template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
542  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
543    const MachineOperand &MO = MI->getOperand(i);
544    if (!MO.isReg() || MO.isUndef() || MO.isUse())
545      continue;
546    if (MO.getReg() != ARM::CPSR)
547      continue;
548    if (!MO.isDead())
549      return false;
550  }
551  // all definitions of CPSR are dead
552  return true;
553}
554}
555
556/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
557LLVM_ATTRIBUTE_NOINLINE
558static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
559                                unsigned JTI);
560static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
561                                unsigned JTI) {
562  assert(JTI < JT.size());
563  return JT[JTI].MBBs.size();
564}
565
566/// GetInstSize - Return the size of the specified MachineInstr.
567///
568unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
569  const MachineBasicBlock &MBB = *MI->getParent();
570  const MachineFunction *MF = MBB.getParent();
571  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
572
573  const MCInstrDesc &MCID = MI->getDesc();
574  if (MCID.getSize())
575    return MCID.getSize();
576
577  // If this machine instr is an inline asm, measure it.
578  if (MI->getOpcode() == ARM::INLINEASM)
579    return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
580  unsigned Opc = MI->getOpcode();
581  switch (Opc) {
582  default:
583    // pseudo-instruction sizes are zero.
584    return 0;
585  case TargetOpcode::BUNDLE:
586    return getInstBundleLength(MI);
587  case ARM::MOVi16_ga_pcrel:
588  case ARM::MOVTi16_ga_pcrel:
589  case ARM::t2MOVi16_ga_pcrel:
590  case ARM::t2MOVTi16_ga_pcrel:
591    return 4;
592  case ARM::MOVi32imm:
593  case ARM::t2MOVi32imm:
594    return 8;
595  case ARM::CONSTPOOL_ENTRY:
596    // If this machine instr is a constant pool entry, its size is recorded as
597    // operand #2.
598    return MI->getOperand(2).getImm();
599  case ARM::Int_eh_sjlj_longjmp:
600    return 16;
601  case ARM::tInt_eh_sjlj_longjmp:
602    return 10;
603  case ARM::Int_eh_sjlj_setjmp:
604  case ARM::Int_eh_sjlj_setjmp_nofp:
605    return 20;
606  case ARM::tInt_eh_sjlj_setjmp:
607  case ARM::t2Int_eh_sjlj_setjmp:
608  case ARM::t2Int_eh_sjlj_setjmp_nofp:
609    return 12;
610  case ARM::BR_JTr:
611  case ARM::BR_JTm:
612  case ARM::BR_JTadd:
613  case ARM::tBR_JTr:
614  case ARM::t2BR_JT:
615  case ARM::t2TBB_JT:
616  case ARM::t2TBH_JT: {
617    // These are jumptable branches, i.e. a branch followed by an inlined
618    // jumptable. The size is 4 + 4 * number of entries. For TBB, each
619    // entry is one byte; TBH two byte each.
620    unsigned EntrySize = (Opc == ARM::t2TBB_JT)
621      ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
622    unsigned NumOps = MCID.getNumOperands();
623    MachineOperand JTOP =
624      MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
625    unsigned JTI = JTOP.getIndex();
626    const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
627    assert(MJTI != nullptr);
628    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
629    assert(JTI < JT.size());
630    // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
631    // 4 aligned. The assembler / linker may add 2 byte padding just before
632    // the JT entries.  The size does not include this padding; the
633    // constant islands pass does separate bookkeeping for it.
634    // FIXME: If we know the size of the function is less than (1 << 16) *2
635    // bytes, we can use 16-bit entries instead. Then there won't be an
636    // alignment issue.
637    unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
638    unsigned NumEntries = getNumJTEntries(JT, JTI);
639    if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
640      // Make sure the instruction that follows TBB is 2-byte aligned.
641      // FIXME: Constant island pass should insert an "ALIGN" instruction
642      // instead.
643      ++NumEntries;
644    return NumEntries * EntrySize + InstSize;
645  }
646  }
647}
648
649unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
650  unsigned Size = 0;
651  MachineBasicBlock::const_instr_iterator I = MI;
652  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
653  while (++I != E && I->isInsideBundle()) {
654    assert(!I->isBundle() && "No nested bundle!");
655    Size += GetInstSizeInBytes(&*I);
656  }
657  return Size;
658}
659
660void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
661                                   MachineBasicBlock::iterator I, DebugLoc DL,
662                                   unsigned DestReg, unsigned SrcReg,
663                                   bool KillSrc) const {
664  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
665  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
666
667  if (GPRDest && GPRSrc) {
668    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
669                                    .addReg(SrcReg, getKillRegState(KillSrc))));
670    return;
671  }
672
673  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
674  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
675
676  unsigned Opc = 0;
677  if (SPRDest && SPRSrc)
678    Opc = ARM::VMOVS;
679  else if (GPRDest && SPRSrc)
680    Opc = ARM::VMOVRS;
681  else if (SPRDest && GPRSrc)
682    Opc = ARM::VMOVSR;
683  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
684    Opc = ARM::VMOVD;
685  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
686    Opc = ARM::VORRq;
687
688  if (Opc) {
689    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
690    MIB.addReg(SrcReg, getKillRegState(KillSrc));
691    if (Opc == ARM::VORRq)
692      MIB.addReg(SrcReg, getKillRegState(KillSrc));
693    AddDefaultPred(MIB);
694    return;
695  }
696
697  // Handle register classes that require multiple instructions.
698  unsigned BeginIdx = 0;
699  unsigned SubRegs = 0;
700  int Spacing = 1;
701
702  // Use VORRq when possible.
703  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
704    Opc = ARM::VORRq;
705    BeginIdx = ARM::qsub_0;
706    SubRegs = 2;
707  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
708    Opc = ARM::VORRq;
709    BeginIdx = ARM::qsub_0;
710    SubRegs = 4;
711  // Fall back to VMOVD.
712  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
713    Opc = ARM::VMOVD;
714    BeginIdx = ARM::dsub_0;
715    SubRegs = 2;
716  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
717    Opc = ARM::VMOVD;
718    BeginIdx = ARM::dsub_0;
719    SubRegs = 3;
720  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
721    Opc = ARM::VMOVD;
722    BeginIdx = ARM::dsub_0;
723    SubRegs = 4;
724  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
725    Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
726    BeginIdx = ARM::gsub_0;
727    SubRegs = 2;
728  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
729    Opc = ARM::VMOVD;
730    BeginIdx = ARM::dsub_0;
731    SubRegs = 2;
732    Spacing = 2;
733  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
734    Opc = ARM::VMOVD;
735    BeginIdx = ARM::dsub_0;
736    SubRegs = 3;
737    Spacing = 2;
738  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
739    Opc = ARM::VMOVD;
740    BeginIdx = ARM::dsub_0;
741    SubRegs = 4;
742    Spacing = 2;
743  }
744
745  assert(Opc && "Impossible reg-to-reg copy");
746
747  const TargetRegisterInfo *TRI = &getRegisterInfo();
748  MachineInstrBuilder Mov;
749
750  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
751  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
752    BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
753    Spacing = -Spacing;
754  }
755#ifndef NDEBUG
756  SmallSet<unsigned, 4> DstRegs;
757#endif
758  for (unsigned i = 0; i != SubRegs; ++i) {
759    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
760    unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
761    assert(Dst && Src && "Bad sub-register");
762#ifndef NDEBUG
763    assert(!DstRegs.count(Src) && "destructive vector copy");
764    DstRegs.insert(Dst);
765#endif
766    Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
767    // VORR takes two source operands.
768    if (Opc == ARM::VORRq)
769      Mov.addReg(Src);
770    Mov = AddDefaultPred(Mov);
771    // MOVr can set CC.
772    if (Opc == ARM::MOVr)
773      Mov = AddDefaultCC(Mov);
774  }
775  // Add implicit super-register defs and kills to the last instruction.
776  Mov->addRegisterDefined(DestReg, TRI);
777  if (KillSrc)
778    Mov->addRegisterKilled(SrcReg, TRI);
779}
780
781const MachineInstrBuilder &
782ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
783                          unsigned SubIdx, unsigned State,
784                          const TargetRegisterInfo *TRI) const {
785  if (!SubIdx)
786    return MIB.addReg(Reg, State);
787
788  if (TargetRegisterInfo::isPhysicalRegister(Reg))
789    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
790  return MIB.addReg(Reg, State, SubIdx);
791}
792
793void ARMBaseInstrInfo::
794storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
795                    unsigned SrcReg, bool isKill, int FI,
796                    const TargetRegisterClass *RC,
797                    const TargetRegisterInfo *TRI) const {
798  DebugLoc DL;
799  if (I != MBB.end()) DL = I->getDebugLoc();
800  MachineFunction &MF = *MBB.getParent();
801  MachineFrameInfo &MFI = *MF.getFrameInfo();
802  unsigned Align = MFI.getObjectAlignment(FI);
803
804  MachineMemOperand *MMO =
805    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
806                            MachineMemOperand::MOStore,
807                            MFI.getObjectSize(FI),
808                            Align);
809
810  switch (RC->getSize()) {
811    case 4:
812      if (ARM::GPRRegClass.hasSubClassEq(RC)) {
813        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
814                   .addReg(SrcReg, getKillRegState(isKill))
815                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
816      } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
817        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
818                   .addReg(SrcReg, getKillRegState(isKill))
819                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
820      } else
821        llvm_unreachable("Unknown reg class!");
822      break;
823    case 8:
824      if (ARM::DPRRegClass.hasSubClassEq(RC)) {
825        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
826                   .addReg(SrcReg, getKillRegState(isKill))
827                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
828      } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
829        if (Subtarget.hasV5TEOps()) {
830          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
831          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
832          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
833          MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
834
835          AddDefaultPred(MIB);
836        } else {
837          // Fallback to STM instruction, which has existed since the dawn of
838          // time.
839          MachineInstrBuilder MIB =
840            AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
841                             .addFrameIndex(FI).addMemOperand(MMO));
842          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
843          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
844        }
845      } else
846        llvm_unreachable("Unknown reg class!");
847      break;
848    case 16:
849      if (ARM::DPairRegClass.hasSubClassEq(RC)) {
850        // Use aligned spills if the stack can be realigned.
851        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
852          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
853                     .addFrameIndex(FI).addImm(16)
854                     .addReg(SrcReg, getKillRegState(isKill))
855                     .addMemOperand(MMO));
856        } else {
857          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
858                     .addReg(SrcReg, getKillRegState(isKill))
859                     .addFrameIndex(FI)
860                     .addMemOperand(MMO));
861        }
862      } else
863        llvm_unreachable("Unknown reg class!");
864      break;
865    case 24:
866      if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
867        // Use aligned spills if the stack can be realigned.
868        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
869          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
870                     .addFrameIndex(FI).addImm(16)
871                     .addReg(SrcReg, getKillRegState(isKill))
872                     .addMemOperand(MMO));
873        } else {
874          MachineInstrBuilder MIB =
875          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
876                       .addFrameIndex(FI))
877                       .addMemOperand(MMO);
878          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
879          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
880          AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
881        }
882      } else
883        llvm_unreachable("Unknown reg class!");
884      break;
885    case 32:
886      if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
887        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
888          // FIXME: It's possible to only store part of the QQ register if the
889          // spilled def has a sub-register index.
890          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
891                     .addFrameIndex(FI).addImm(16)
892                     .addReg(SrcReg, getKillRegState(isKill))
893                     .addMemOperand(MMO));
894        } else {
895          MachineInstrBuilder MIB =
896          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
897                       .addFrameIndex(FI))
898                       .addMemOperand(MMO);
899          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
900          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
901          MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
902                AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
903        }
904      } else
905        llvm_unreachable("Unknown reg class!");
906      break;
907    case 64:
908      if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
909        MachineInstrBuilder MIB =
910          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
911                         .addFrameIndex(FI))
912                         .addMemOperand(MMO);
913        MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
914        MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
915        MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
916        MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
917        MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
918        MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
919        MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
920              AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
921      } else
922        llvm_unreachable("Unknown reg class!");
923      break;
924    default:
925      llvm_unreachable("Unknown reg class!");
926  }
927}
928
929unsigned
930ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
931                                     int &FrameIndex) const {
932  switch (MI->getOpcode()) {
933  default: break;
934  case ARM::STRrs:
935  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
936    if (MI->getOperand(1).isFI() &&
937        MI->getOperand(2).isReg() &&
938        MI->getOperand(3).isImm() &&
939        MI->getOperand(2).getReg() == 0 &&
940        MI->getOperand(3).getImm() == 0) {
941      FrameIndex = MI->getOperand(1).getIndex();
942      return MI->getOperand(0).getReg();
943    }
944    break;
945  case ARM::STRi12:
946  case ARM::t2STRi12:
947  case ARM::tSTRspi:
948  case ARM::VSTRD:
949  case ARM::VSTRS:
950    if (MI->getOperand(1).isFI() &&
951        MI->getOperand(2).isImm() &&
952        MI->getOperand(2).getImm() == 0) {
953      FrameIndex = MI->getOperand(1).getIndex();
954      return MI->getOperand(0).getReg();
955    }
956    break;
957  case ARM::VST1q64:
958  case ARM::VST1d64TPseudo:
959  case ARM::VST1d64QPseudo:
960    if (MI->getOperand(0).isFI() &&
961        MI->getOperand(2).getSubReg() == 0) {
962      FrameIndex = MI->getOperand(0).getIndex();
963      return MI->getOperand(2).getReg();
964    }
965    break;
966  case ARM::VSTMQIA:
967    if (MI->getOperand(1).isFI() &&
968        MI->getOperand(0).getSubReg() == 0) {
969      FrameIndex = MI->getOperand(1).getIndex();
970      return MI->getOperand(0).getReg();
971    }
972    break;
973  }
974
975  return 0;
976}
977
978unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
979                                                    int &FrameIndex) const {
980  const MachineMemOperand *Dummy;
981  return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
982}
983
984void ARMBaseInstrInfo::
985loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
986                     unsigned DestReg, int FI,
987                     const TargetRegisterClass *RC,
988                     const TargetRegisterInfo *TRI) const {
989  DebugLoc DL;
990  if (I != MBB.end()) DL = I->getDebugLoc();
991  MachineFunction &MF = *MBB.getParent();
992  MachineFrameInfo &MFI = *MF.getFrameInfo();
993  unsigned Align = MFI.getObjectAlignment(FI);
994  MachineMemOperand *MMO =
995    MF.getMachineMemOperand(
996                    MachinePointerInfo::getFixedStack(FI),
997                            MachineMemOperand::MOLoad,
998                            MFI.getObjectSize(FI),
999                            Align);
1000
1001  switch (RC->getSize()) {
1002  case 4:
1003    if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1004      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1005                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1006
1007    } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1008      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1009                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1010    } else
1011      llvm_unreachable("Unknown reg class!");
1012    break;
1013  case 8:
1014    if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1015      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1016                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1017    } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1018      MachineInstrBuilder MIB;
1019
1020      if (Subtarget.hasV5TEOps()) {
1021        MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1022        AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1023        AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1024        MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
1025
1026        AddDefaultPred(MIB);
1027      } else {
1028        // Fallback to LDM instruction, which has existed since the dawn of
1029        // time.
1030        MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
1031                                 .addFrameIndex(FI).addMemOperand(MMO));
1032        MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1033        MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1034      }
1035
1036      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1037        MIB.addReg(DestReg, RegState::ImplicitDefine);
1038    } else
1039      llvm_unreachable("Unknown reg class!");
1040    break;
1041  case 16:
1042    if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1043      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1044        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1045                     .addFrameIndex(FI).addImm(16)
1046                     .addMemOperand(MMO));
1047      } else {
1048        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1049                       .addFrameIndex(FI)
1050                       .addMemOperand(MMO));
1051      }
1052    } else
1053      llvm_unreachable("Unknown reg class!");
1054    break;
1055  case 24:
1056    if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1057      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1058        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1059                     .addFrameIndex(FI).addImm(16)
1060                     .addMemOperand(MMO));
1061      } else {
1062        MachineInstrBuilder MIB =
1063          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1064                         .addFrameIndex(FI)
1065                         .addMemOperand(MMO));
1066        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1067        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1068        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1069        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1070          MIB.addReg(DestReg, RegState::ImplicitDefine);
1071      }
1072    } else
1073      llvm_unreachable("Unknown reg class!");
1074    break;
1075   case 32:
1076    if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1077      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1078        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1079                     .addFrameIndex(FI).addImm(16)
1080                     .addMemOperand(MMO));
1081      } else {
1082        MachineInstrBuilder MIB =
1083        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1084                       .addFrameIndex(FI))
1085                       .addMemOperand(MMO);
1086        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1087        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1088        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1089        MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1090        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1091          MIB.addReg(DestReg, RegState::ImplicitDefine);
1092      }
1093    } else
1094      llvm_unreachable("Unknown reg class!");
1095    break;
1096  case 64:
1097    if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1098      MachineInstrBuilder MIB =
1099      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1100                     .addFrameIndex(FI))
1101                     .addMemOperand(MMO);
1102      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1103      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1104      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1105      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1106      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1107      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1108      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1109      MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1110      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1111        MIB.addReg(DestReg, RegState::ImplicitDefine);
1112    } else
1113      llvm_unreachable("Unknown reg class!");
1114    break;
1115  default:
1116    llvm_unreachable("Unknown regclass!");
1117  }
1118}
1119
1120unsigned
1121ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
1122                                      int &FrameIndex) const {
1123  switch (MI->getOpcode()) {
1124  default: break;
1125  case ARM::LDRrs:
1126  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
1127    if (MI->getOperand(1).isFI() &&
1128        MI->getOperand(2).isReg() &&
1129        MI->getOperand(3).isImm() &&
1130        MI->getOperand(2).getReg() == 0 &&
1131        MI->getOperand(3).getImm() == 0) {
1132      FrameIndex = MI->getOperand(1).getIndex();
1133      return MI->getOperand(0).getReg();
1134    }
1135    break;
1136  case ARM::LDRi12:
1137  case ARM::t2LDRi12:
1138  case ARM::tLDRspi:
1139  case ARM::VLDRD:
1140  case ARM::VLDRS:
1141    if (MI->getOperand(1).isFI() &&
1142        MI->getOperand(2).isImm() &&
1143        MI->getOperand(2).getImm() == 0) {
1144      FrameIndex = MI->getOperand(1).getIndex();
1145      return MI->getOperand(0).getReg();
1146    }
1147    break;
1148  case ARM::VLD1q64:
1149  case ARM::VLD1d64TPseudo:
1150  case ARM::VLD1d64QPseudo:
1151    if (MI->getOperand(1).isFI() &&
1152        MI->getOperand(0).getSubReg() == 0) {
1153      FrameIndex = MI->getOperand(1).getIndex();
1154      return MI->getOperand(0).getReg();
1155    }
1156    break;
1157  case ARM::VLDMQIA:
1158    if (MI->getOperand(1).isFI() &&
1159        MI->getOperand(0).getSubReg() == 0) {
1160      FrameIndex = MI->getOperand(1).getIndex();
1161      return MI->getOperand(0).getReg();
1162    }
1163    break;
1164  }
1165
1166  return 0;
1167}
1168
1169unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
1170                                             int &FrameIndex) const {
1171  const MachineMemOperand *Dummy;
1172  return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1173}
1174
1175bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
1176  // This hook gets to expand COPY instructions before they become
1177  // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
1178  // widened to VMOVD.  We prefer the VMOVD when possible because it may be
1179  // changed into a VORR that can go down the NEON pipeline.
1180  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
1181    return false;
1182
1183  // Look for a copy between even S-registers.  That is where we keep floats
1184  // when using NEON v2f32 instructions for f32 arithmetic.
1185  unsigned DstRegS = MI->getOperand(0).getReg();
1186  unsigned SrcRegS = MI->getOperand(1).getReg();
1187  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1188    return false;
1189
1190  const TargetRegisterInfo *TRI = &getRegisterInfo();
1191  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1192                                              &ARM::DPRRegClass);
1193  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1194                                              &ARM::DPRRegClass);
1195  if (!DstRegD || !SrcRegD)
1196    return false;
1197
1198  // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
1199  // legal if the COPY already defines the full DstRegD, and it isn't a
1200  // sub-register insertion.
1201  if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
1202    return false;
1203
1204  // A dead copy shouldn't show up here, but reject it just in case.
1205  if (MI->getOperand(0).isDead())
1206    return false;
1207
1208  // All clear, widen the COPY.
1209  DEBUG(dbgs() << "widening:    " << *MI);
1210  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1211
1212  // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
1213  // or some other super-register.
1214  int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
1215  if (ImpDefIdx != -1)
1216    MI->RemoveOperand(ImpDefIdx);
1217
1218  // Change the opcode and operands.
1219  MI->setDesc(get(ARM::VMOVD));
1220  MI->getOperand(0).setReg(DstRegD);
1221  MI->getOperand(1).setReg(SrcRegD);
1222  AddDefaultPred(MIB);
1223
1224  // We are now reading SrcRegD instead of SrcRegS.  This may upset the
1225  // register scavenger and machine verifier, so we need to indicate that we
1226  // are reading an undefined value from SrcRegD, but a proper value from
1227  // SrcRegS.
1228  MI->getOperand(1).setIsUndef();
1229  MIB.addReg(SrcRegS, RegState::Implicit);
1230
1231  // SrcRegD may actually contain an unrelated value in the ssub_1
1232  // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
1233  if (MI->getOperand(1).isKill()) {
1234    MI->getOperand(1).setIsKill(false);
1235    MI->addRegisterKilled(SrcRegS, TRI, true);
1236  }
1237
1238  DEBUG(dbgs() << "replaced by: " << *MI);
1239  return true;
1240}
1241
1242/// Create a copy of a const pool value. Update CPI to the new index and return
1243/// the label UID.
1244static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1245  MachineConstantPool *MCP = MF.getConstantPool();
1246  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1247
1248  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1249  assert(MCPE.isMachineConstantPoolEntry() &&
1250         "Expecting a machine constantpool entry!");
1251  ARMConstantPoolValue *ACPV =
1252    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1253
1254  unsigned PCLabelId = AFI->createPICLabelUId();
1255  ARMConstantPoolValue *NewCPV = nullptr;
1256
1257  // FIXME: The below assumes PIC relocation model and that the function
1258  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1259  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1260  // instructions, so that's probably OK, but is PIC always correct when
1261  // we get here?
1262  if (ACPV->isGlobalValue())
1263    NewCPV = ARMConstantPoolConstant::
1264      Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
1265             ARMCP::CPValue, 4);
1266  else if (ACPV->isExtSymbol())
1267    NewCPV = ARMConstantPoolSymbol::
1268      Create(MF.getFunction()->getContext(),
1269             cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1270  else if (ACPV->isBlockAddress())
1271    NewCPV = ARMConstantPoolConstant::
1272      Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1273             ARMCP::CPBlockAddress, 4);
1274  else if (ACPV->isLSDA())
1275    NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
1276                                             ARMCP::CPLSDA, 4);
1277  else if (ACPV->isMachineBasicBlock())
1278    NewCPV = ARMConstantPoolMBB::
1279      Create(MF.getFunction()->getContext(),
1280             cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1281  else
1282    llvm_unreachable("Unexpected ARM constantpool value type!!");
1283  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1284  return PCLabelId;
1285}
1286
1287void ARMBaseInstrInfo::
1288reMaterialize(MachineBasicBlock &MBB,
1289              MachineBasicBlock::iterator I,
1290              unsigned DestReg, unsigned SubIdx,
1291              const MachineInstr *Orig,
1292              const TargetRegisterInfo &TRI) const {
1293  unsigned Opcode = Orig->getOpcode();
1294  switch (Opcode) {
1295  default: {
1296    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
1297    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
1298    MBB.insert(I, MI);
1299    break;
1300  }
1301  case ARM::tLDRpci_pic:
1302  case ARM::t2LDRpci_pic: {
1303    MachineFunction &MF = *MBB.getParent();
1304    unsigned CPI = Orig->getOperand(1).getIndex();
1305    unsigned PCLabelId = duplicateCPV(MF, CPI);
1306    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
1307                                      DestReg)
1308      .addConstantPoolIndex(CPI).addImm(PCLabelId);
1309    MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
1310    break;
1311  }
1312  }
1313}
1314
1315MachineInstr *
1316ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
1317  MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
1318  switch(Orig->getOpcode()) {
1319  case ARM::tLDRpci_pic:
1320  case ARM::t2LDRpci_pic: {
1321    unsigned CPI = Orig->getOperand(1).getIndex();
1322    unsigned PCLabelId = duplicateCPV(MF, CPI);
1323    Orig->getOperand(1).setIndex(CPI);
1324    Orig->getOperand(2).setImm(PCLabelId);
1325    break;
1326  }
1327  }
1328  return MI;
1329}
1330
1331bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
1332                                        const MachineInstr *MI1,
1333                                        const MachineRegisterInfo *MRI) const {
1334  int Opcode = MI0->getOpcode();
1335  if (Opcode == ARM::t2LDRpci ||
1336      Opcode == ARM::t2LDRpci_pic ||
1337      Opcode == ARM::tLDRpci ||
1338      Opcode == ARM::tLDRpci_pic ||
1339      Opcode == ARM::LDRLIT_ga_pcrel ||
1340      Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1341      Opcode == ARM::tLDRLIT_ga_pcrel ||
1342      Opcode == ARM::MOV_ga_pcrel ||
1343      Opcode == ARM::MOV_ga_pcrel_ldr ||
1344      Opcode == ARM::t2MOV_ga_pcrel) {
1345    if (MI1->getOpcode() != Opcode)
1346      return false;
1347    if (MI0->getNumOperands() != MI1->getNumOperands())
1348      return false;
1349
1350    const MachineOperand &MO0 = MI0->getOperand(1);
1351    const MachineOperand &MO1 = MI1->getOperand(1);
1352    if (MO0.getOffset() != MO1.getOffset())
1353      return false;
1354
1355    if (Opcode == ARM::LDRLIT_ga_pcrel ||
1356        Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1357        Opcode == ARM::tLDRLIT_ga_pcrel ||
1358        Opcode == ARM::MOV_ga_pcrel ||
1359        Opcode == ARM::MOV_ga_pcrel_ldr ||
1360        Opcode == ARM::t2MOV_ga_pcrel)
1361      // Ignore the PC labels.
1362      return MO0.getGlobal() == MO1.getGlobal();
1363
1364    const MachineFunction *MF = MI0->getParent()->getParent();
1365    const MachineConstantPool *MCP = MF->getConstantPool();
1366    int CPI0 = MO0.getIndex();
1367    int CPI1 = MO1.getIndex();
1368    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1369    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1370    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1371    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1372    if (isARMCP0 && isARMCP1) {
1373      ARMConstantPoolValue *ACPV0 =
1374        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1375      ARMConstantPoolValue *ACPV1 =
1376        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1377      return ACPV0->hasSameValue(ACPV1);
1378    } else if (!isARMCP0 && !isARMCP1) {
1379      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1380    }
1381    return false;
1382  } else if (Opcode == ARM::PICLDR) {
1383    if (MI1->getOpcode() != Opcode)
1384      return false;
1385    if (MI0->getNumOperands() != MI1->getNumOperands())
1386      return false;
1387
1388    unsigned Addr0 = MI0->getOperand(1).getReg();
1389    unsigned Addr1 = MI1->getOperand(1).getReg();
1390    if (Addr0 != Addr1) {
1391      if (!MRI ||
1392          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
1393          !TargetRegisterInfo::isVirtualRegister(Addr1))
1394        return false;
1395
1396      // This assumes SSA form.
1397      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1398      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1399      // Check if the loaded value, e.g. a constantpool of a global address, are
1400      // the same.
1401      if (!produceSameValue(Def0, Def1, MRI))
1402        return false;
1403    }
1404
1405    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
1406      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
1407      const MachineOperand &MO0 = MI0->getOperand(i);
1408      const MachineOperand &MO1 = MI1->getOperand(i);
1409      if (!MO0.isIdenticalTo(MO1))
1410        return false;
1411    }
1412    return true;
1413  }
1414
1415  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1416}
1417
1418/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1419/// determine if two loads are loading from the same base address. It should
1420/// only return true if the base pointers are the same and the only differences
1421/// between the two addresses is the offset. It also returns the offsets by
1422/// reference.
1423///
1424/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1425/// is permanently disabled.
1426bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1427                                               int64_t &Offset1,
1428                                               int64_t &Offset2) const {
1429  // Don't worry about Thumb: just ARM and Thumb2.
1430  if (Subtarget.isThumb1Only()) return false;
1431
1432  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1433    return false;
1434
1435  switch (Load1->getMachineOpcode()) {
1436  default:
1437    return false;
1438  case ARM::LDRi12:
1439  case ARM::LDRBi12:
1440  case ARM::LDRD:
1441  case ARM::LDRH:
1442  case ARM::LDRSB:
1443  case ARM::LDRSH:
1444  case ARM::VLDRD:
1445  case ARM::VLDRS:
1446  case ARM::t2LDRi8:
1447  case ARM::t2LDRBi8:
1448  case ARM::t2LDRDi8:
1449  case ARM::t2LDRSHi8:
1450  case ARM::t2LDRi12:
1451  case ARM::t2LDRBi12:
1452  case ARM::t2LDRSHi12:
1453    break;
1454  }
1455
1456  switch (Load2->getMachineOpcode()) {
1457  default:
1458    return false;
1459  case ARM::LDRi12:
1460  case ARM::LDRBi12:
1461  case ARM::LDRD:
1462  case ARM::LDRH:
1463  case ARM::LDRSB:
1464  case ARM::LDRSH:
1465  case ARM::VLDRD:
1466  case ARM::VLDRS:
1467  case ARM::t2LDRi8:
1468  case ARM::t2LDRBi8:
1469  case ARM::t2LDRSHi8:
1470  case ARM::t2LDRi12:
1471  case ARM::t2LDRBi12:
1472  case ARM::t2LDRSHi12:
1473    break;
1474  }
1475
1476  // Check if base addresses and chain operands match.
1477  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1478      Load1->getOperand(4) != Load2->getOperand(4))
1479    return false;
1480
1481  // Index should be Reg0.
1482  if (Load1->getOperand(3) != Load2->getOperand(3))
1483    return false;
1484
1485  // Determine the offsets.
1486  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1487      isa<ConstantSDNode>(Load2->getOperand(1))) {
1488    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1489    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1490    return true;
1491  }
1492
1493  return false;
1494}
1495
1496/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1497/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1498/// be scheduled togther. On some targets if two loads are loading from
1499/// addresses in the same cache line, it's better if they are scheduled
1500/// together. This function takes two integers that represent the load offsets
1501/// from the common base address. It returns true if it decides it's desirable
1502/// to schedule the two loads together. "NumLoads" is the number of loads that
1503/// have already been scheduled after Load1.
1504///
1505/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1506/// is permanently disabled.
1507bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1508                                               int64_t Offset1, int64_t Offset2,
1509                                               unsigned NumLoads) const {
1510  // Don't worry about Thumb: just ARM and Thumb2.
1511  if (Subtarget.isThumb1Only()) return false;
1512
1513  assert(Offset2 > Offset1);
1514
1515  if ((Offset2 - Offset1) / 8 > 64)
1516    return false;
1517
1518  // Check if the machine opcodes are different. If they are different
1519  // then we consider them to not be of the same base address,
1520  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1521  // In this case, they are considered to be the same because they are different
1522  // encoding forms of the same basic instruction.
1523  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1524      !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1525         Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1526        (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1527         Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1528    return false;  // FIXME: overly conservative?
1529
1530  // Four loads in a row should be sufficient.
1531  if (NumLoads >= 3)
1532    return false;
1533
1534  return true;
1535}
1536
1537bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
1538                                            const MachineBasicBlock *MBB,
1539                                            const MachineFunction &MF) const {
1540  // Debug info is never a scheduling boundary. It's necessary to be explicit
1541  // due to the special treatment of IT instructions below, otherwise a
1542  // dbg_value followed by an IT will result in the IT instruction being
1543  // considered a scheduling hazard, which is wrong. It should be the actual
1544  // instruction preceding the dbg_value instruction(s), just like it is
1545  // when debug info is not present.
1546  if (MI->isDebugValue())
1547    return false;
1548
1549  // Terminators and labels can't be scheduled around.
1550  if (MI->isTerminator() || MI->isPosition())
1551    return true;
1552
1553  // Treat the start of the IT block as a scheduling boundary, but schedule
1554  // t2IT along with all instructions following it.
1555  // FIXME: This is a big hammer. But the alternative is to add all potential
1556  // true and anti dependencies to IT block instructions as implicit operands
1557  // to the t2IT instruction. The added compile time and complexity does not
1558  // seem worth it.
1559  MachineBasicBlock::const_iterator I = MI;
1560  // Make sure to skip any dbg_value instructions
1561  while (++I != MBB->end() && I->isDebugValue())
1562    ;
1563  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1564    return true;
1565
1566  // Don't attempt to schedule around any instruction that defines
1567  // a stack-oriented pointer, as it's unlikely to be profitable. This
1568  // saves compile time, because it doesn't require every single
1569  // stack slot reference to depend on the instruction that does the
1570  // modification.
1571  // Calls don't actually change the stack pointer, even if they have imp-defs.
1572  // No ARM calling conventions change the stack pointer. (X86 calling
1573  // conventions sometimes do).
1574  if (!MI->isCall() && MI->definesRegister(ARM::SP))
1575    return true;
1576
1577  return false;
1578}
1579
1580bool ARMBaseInstrInfo::
1581isProfitableToIfCvt(MachineBasicBlock &MBB,
1582                    unsigned NumCycles, unsigned ExtraPredCycles,
1583                    const BranchProbability &Probability) const {
1584  if (!NumCycles)
1585    return false;
1586
1587  // Attempt to estimate the relative costs of predication versus branching.
1588  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
1589  UnpredCost /= Probability.getDenominator();
1590  UnpredCost += 1; // The branch itself
1591  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1592
1593  return (NumCycles + ExtraPredCycles) <= UnpredCost;
1594}
1595
1596bool ARMBaseInstrInfo::
1597isProfitableToIfCvt(MachineBasicBlock &TMBB,
1598                    unsigned TCycles, unsigned TExtra,
1599                    MachineBasicBlock &FMBB,
1600                    unsigned FCycles, unsigned FExtra,
1601                    const BranchProbability &Probability) const {
1602  if (!TCycles || !FCycles)
1603    return false;
1604
1605  // Attempt to estimate the relative costs of predication versus branching.
1606  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
1607  TUnpredCost /= Probability.getDenominator();
1608
1609  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
1610  unsigned FUnpredCost = Comp * FCycles;
1611  FUnpredCost /= Probability.getDenominator();
1612
1613  unsigned UnpredCost = TUnpredCost + FUnpredCost;
1614  UnpredCost += 1; // The branch itself
1615  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1616
1617  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
1618}
1619
1620bool
1621ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
1622                                            MachineBasicBlock &FMBB) const {
1623  // Reduce false anti-dependencies to let Swift's out-of-order execution
1624  // engine do its thing.
1625  return Subtarget.isSwift();
1626}
1627
1628/// getInstrPredicate - If instruction is predicated, returns its predicate
1629/// condition, otherwise returns AL. It also returns the condition code
1630/// register by reference.
1631ARMCC::CondCodes
1632llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
1633  int PIdx = MI->findFirstPredOperandIdx();
1634  if (PIdx == -1) {
1635    PredReg = 0;
1636    return ARMCC::AL;
1637  }
1638
1639  PredReg = MI->getOperand(PIdx+1).getReg();
1640  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
1641}
1642
1643
1644int llvm::getMatchingCondBranchOpcode(int Opc) {
1645  if (Opc == ARM::B)
1646    return ARM::Bcc;
1647  if (Opc == ARM::tB)
1648    return ARM::tBcc;
1649  if (Opc == ARM::t2B)
1650    return ARM::t2Bcc;
1651
1652  llvm_unreachable("Unknown unconditional branch opcode!");
1653}
1654
1655/// commuteInstruction - Handle commutable instructions.
1656MachineInstr *
1657ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
1658  switch (MI->getOpcode()) {
1659  case ARM::MOVCCr:
1660  case ARM::t2MOVCCr: {
1661    // MOVCC can be commuted by inverting the condition.
1662    unsigned PredReg = 0;
1663    ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
1664    // MOVCC AL can't be inverted. Shouldn't happen.
1665    if (CC == ARMCC::AL || PredReg != ARM::CPSR)
1666      return nullptr;
1667    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
1668    if (!MI)
1669      return nullptr;
1670    // After swapping the MOVCC operands, also invert the condition.
1671    MI->getOperand(MI->findFirstPredOperandIdx())
1672      .setImm(ARMCC::getOppositeCondition(CC));
1673    return MI;
1674  }
1675  }
1676  return TargetInstrInfo::commuteInstruction(MI, NewMI);
1677}
1678
1679/// Identify instructions that can be folded into a MOVCC instruction, and
1680/// return the defining instruction.
1681static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
1682                                      const MachineRegisterInfo &MRI,
1683                                      const TargetInstrInfo *TII) {
1684  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1685    return nullptr;
1686  if (!MRI.hasOneNonDBGUse(Reg))
1687    return nullptr;
1688  MachineInstr *MI = MRI.getVRegDef(Reg);
1689  if (!MI)
1690    return nullptr;
1691  // MI is folded into the MOVCC by predicating it.
1692  if (!MI->isPredicable())
1693    return nullptr;
1694  // Check if MI has any non-dead defs or physreg uses. This also detects
1695  // predicated instructions which will be reading CPSR.
1696  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
1697    const MachineOperand &MO = MI->getOperand(i);
1698    // Reject frame index operands, PEI can't handle the predicated pseudos.
1699    if (MO.isFI() || MO.isCPI() || MO.isJTI())
1700      return nullptr;
1701    if (!MO.isReg())
1702      continue;
1703    // MI can't have any tied operands, that would conflict with predication.
1704    if (MO.isTied())
1705      return nullptr;
1706    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1707      return nullptr;
1708    if (MO.isDef() && !MO.isDead())
1709      return nullptr;
1710  }
1711  bool DontMoveAcrossStores = true;
1712  if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr,
1713                        DontMoveAcrossStores))
1714    return nullptr;
1715  return MI;
1716}
1717
1718bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
1719                                     SmallVectorImpl<MachineOperand> &Cond,
1720                                     unsigned &TrueOp, unsigned &FalseOp,
1721                                     bool &Optimizable) const {
1722  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
1723         "Unknown select instruction");
1724  // MOVCC operands:
1725  // 0: Def.
1726  // 1: True use.
1727  // 2: False use.
1728  // 3: Condition code.
1729  // 4: CPSR use.
1730  TrueOp = 1;
1731  FalseOp = 2;
1732  Cond.push_back(MI->getOperand(3));
1733  Cond.push_back(MI->getOperand(4));
1734  // We can always fold a def.
1735  Optimizable = true;
1736  return false;
1737}
1738
1739MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
1740                                               bool PreferFalse) const {
1741  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
1742         "Unknown select instruction");
1743  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1744  MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
1745  bool Invert = !DefMI;
1746  if (!DefMI)
1747    DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
1748  if (!DefMI)
1749    return nullptr;
1750
1751  // Find new register class to use.
1752  MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
1753  unsigned       DestReg  = MI->getOperand(0).getReg();
1754  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1755  if (!MRI.constrainRegClass(DestReg, PreviousClass))
1756    return nullptr;
1757
1758  // Create a new predicated version of DefMI.
1759  // Rfalse is the first use.
1760  MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1761                                      DefMI->getDesc(), DestReg);
1762
1763  // Copy all the DefMI operands, excluding its (null) predicate.
1764  const MCInstrDesc &DefDesc = DefMI->getDesc();
1765  for (unsigned i = 1, e = DefDesc.getNumOperands();
1766       i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
1767    NewMI.addOperand(DefMI->getOperand(i));
1768
1769  unsigned CondCode = MI->getOperand(3).getImm();
1770  if (Invert)
1771    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
1772  else
1773    NewMI.addImm(CondCode);
1774  NewMI.addOperand(MI->getOperand(4));
1775
1776  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
1777  if (NewMI->hasOptionalDef())
1778    AddDefaultCC(NewMI);
1779
1780  // The output register value when the predicate is false is an implicit
1781  // register operand tied to the first def.
1782  // The tie makes the register allocator ensure the FalseReg is allocated the
1783  // same register as operand 0.
1784  FalseReg.setImplicit();
1785  NewMI.addOperand(FalseReg);
1786  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
1787
1788  // The caller will erase MI, but not DefMI.
1789  DefMI->eraseFromParent();
1790  return NewMI;
1791}
1792
1793/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
1794/// instruction is encoded with an 'S' bit is determined by the optional CPSR
1795/// def operand.
1796///
1797/// This will go away once we can teach tblgen how to set the optional CPSR def
1798/// operand itself.
1799struct AddSubFlagsOpcodePair {
1800  uint16_t PseudoOpc;
1801  uint16_t MachineOpc;
1802};
1803
1804static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
1805  {ARM::ADDSri, ARM::ADDri},
1806  {ARM::ADDSrr, ARM::ADDrr},
1807  {ARM::ADDSrsi, ARM::ADDrsi},
1808  {ARM::ADDSrsr, ARM::ADDrsr},
1809
1810  {ARM::SUBSri, ARM::SUBri},
1811  {ARM::SUBSrr, ARM::SUBrr},
1812  {ARM::SUBSrsi, ARM::SUBrsi},
1813  {ARM::SUBSrsr, ARM::SUBrsr},
1814
1815  {ARM::RSBSri, ARM::RSBri},
1816  {ARM::RSBSrsi, ARM::RSBrsi},
1817  {ARM::RSBSrsr, ARM::RSBrsr},
1818
1819  {ARM::t2ADDSri, ARM::t2ADDri},
1820  {ARM::t2ADDSrr, ARM::t2ADDrr},
1821  {ARM::t2ADDSrs, ARM::t2ADDrs},
1822
1823  {ARM::t2SUBSri, ARM::t2SUBri},
1824  {ARM::t2SUBSrr, ARM::t2SUBrr},
1825  {ARM::t2SUBSrs, ARM::t2SUBrs},
1826
1827  {ARM::t2RSBSri, ARM::t2RSBri},
1828  {ARM::t2RSBSrs, ARM::t2RSBrs},
1829};
1830
1831unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
1832  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
1833    if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
1834      return AddSubFlagsOpcodeMap[i].MachineOpc;
1835  return 0;
1836}
1837
1838void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
1839                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
1840                               unsigned DestReg, unsigned BaseReg, int NumBytes,
1841                               ARMCC::CondCodes Pred, unsigned PredReg,
1842                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
1843  if (NumBytes == 0 && DestReg != BaseReg) {
1844    BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
1845      .addReg(BaseReg, RegState::Kill)
1846      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
1847      .setMIFlags(MIFlags);
1848    return;
1849  }
1850
1851  bool isSub = NumBytes < 0;
1852  if (isSub) NumBytes = -NumBytes;
1853
1854  while (NumBytes) {
1855    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
1856    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
1857    assert(ThisVal && "Didn't extract field correctly");
1858
1859    // We will handle these bits from offset, clear them.
1860    NumBytes &= ~ThisVal;
1861
1862    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
1863
1864    // Build the new ADD / SUB.
1865    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
1866    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
1867      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
1868      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
1869      .setMIFlags(MIFlags);
1870    BaseReg = DestReg;
1871  }
1872}
1873
1874static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
1875                      MachineInstr *MI) {
1876  for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
1877       Subreg.isValid(); ++Subreg)
1878    if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
1879        MachineBasicBlock::LQR_Dead)
1880      return true;
1881  return false;
1882}
1883bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
1884                                      MachineFunction &MF, MachineInstr *MI,
1885                                      unsigned NumBytes) {
1886  // This optimisation potentially adds lots of load and store
1887  // micro-operations, it's only really a great benefit to code-size.
1888  if (!Subtarget.isMinSize())
1889    return false;
1890
1891  // If only one register is pushed/popped, LLVM can use an LDR/STR
1892  // instead. We can't modify those so make sure we're dealing with an
1893  // instruction we understand.
1894  bool IsPop = isPopOpcode(MI->getOpcode());
1895  bool IsPush = isPushOpcode(MI->getOpcode());
1896  if (!IsPush && !IsPop)
1897    return false;
1898
1899  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
1900                      MI->getOpcode() == ARM::VLDMDIA_UPD;
1901  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
1902                     MI->getOpcode() == ARM::tPOP ||
1903                     MI->getOpcode() == ARM::tPOP_RET;
1904
1905  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
1906                          MI->getOperand(1).getReg() == ARM::SP)) &&
1907         "trying to fold sp update into non-sp-updating push/pop");
1908
1909  // The VFP push & pop act on D-registers, so we can only fold an adjustment
1910  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
1911  // if this is violated.
1912  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
1913    return false;
1914
1915  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
1916  // pred) so the list starts at 4. Thumb1 starts after the predicate.
1917  int RegListIdx = IsT1PushPop ? 2 : 4;
1918
1919  // Calculate the space we'll need in terms of registers.
1920  unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
1921  unsigned RD0Reg, RegsNeeded;
1922  if (IsVFPPushPop) {
1923    RD0Reg = ARM::D0;
1924    RegsNeeded = NumBytes / 8;
1925  } else {
1926    RD0Reg = ARM::R0;
1927    RegsNeeded = NumBytes / 4;
1928  }
1929
1930  // We're going to have to strip all list operands off before
1931  // re-adding them since the order matters, so save the existing ones
1932  // for later.
1933  SmallVector<MachineOperand, 4> RegList;
1934  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
1935    RegList.push_back(MI->getOperand(i));
1936
1937  const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
1938  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
1939
1940  // Now try to find enough space in the reglist to allocate NumBytes.
1941  for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
1942       --CurReg) {
1943    if (!IsPop) {
1944      // Pushing any register is completely harmless, mark the
1945      // register involved as undef since we don't care about it in
1946      // the slightest.
1947      RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
1948                                                  false, false, true));
1949      --RegsNeeded;
1950      continue;
1951    }
1952
1953    // However, we can only pop an extra register if it's not live. For
1954    // registers live within the function we might clobber a return value
1955    // register; the other way a register can be live here is if it's
1956    // callee-saved.
1957    // TODO: Currently, computeRegisterLiveness() does not report "live" if a
1958    // sub reg is live. When computeRegisterLiveness() works for sub reg, it
1959    // can replace isAnySubRegLive().
1960    if (isCalleeSavedRegister(CurReg, CSRegs) ||
1961        isAnySubRegLive(CurReg, TRI, MI)) {
1962      // VFP pops don't allow holes in the register list, so any skip is fatal
1963      // for our transformation. GPR pops do, so we should just keep looking.
1964      if (IsVFPPushPop)
1965        return false;
1966      else
1967        continue;
1968    }
1969
1970    // Mark the unimportant registers as <def,dead> in the POP.
1971    RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
1972                                                true));
1973    --RegsNeeded;
1974  }
1975
1976  if (RegsNeeded > 0)
1977    return false;
1978
1979  // Finally we know we can profitably perform the optimisation so go
1980  // ahead: strip all existing registers off and add them back again
1981  // in the right order.
1982  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
1983    MI->RemoveOperand(i);
1984
1985  // Add the complete list back in.
1986  MachineInstrBuilder MIB(MF, &*MI);
1987  for (int i = RegList.size() - 1; i >= 0; --i)
1988    MIB.addOperand(RegList[i]);
1989
1990  return true;
1991}
1992
1993bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
1994                                unsigned FrameReg, int &Offset,
1995                                const ARMBaseInstrInfo &TII) {
1996  unsigned Opcode = MI.getOpcode();
1997  const MCInstrDesc &Desc = MI.getDesc();
1998  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
1999  bool isSub = false;
2000
2001  // Memory operands in inline assembly always use AddrMode2.
2002  if (Opcode == ARM::INLINEASM)
2003    AddrMode = ARMII::AddrMode2;
2004
2005  if (Opcode == ARM::ADDri) {
2006    Offset += MI.getOperand(FrameRegIdx+1).getImm();
2007    if (Offset == 0) {
2008      // Turn it into a move.
2009      MI.setDesc(TII.get(ARM::MOVr));
2010      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2011      MI.RemoveOperand(FrameRegIdx+1);
2012      Offset = 0;
2013      return true;
2014    } else if (Offset < 0) {
2015      Offset = -Offset;
2016      isSub = true;
2017      MI.setDesc(TII.get(ARM::SUBri));
2018    }
2019
2020    // Common case: small offset, fits into instruction.
2021    if (ARM_AM::getSOImmVal(Offset) != -1) {
2022      // Replace the FrameIndex with sp / fp
2023      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2024      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2025      Offset = 0;
2026      return true;
2027    }
2028
2029    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2030    // as possible.
2031    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2032    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2033
2034    // We will handle these bits from offset, clear them.
2035    Offset &= ~ThisImmVal;
2036
2037    // Get the properly encoded SOImmVal field.
2038    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2039           "Bit extraction didn't work?");
2040    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2041 } else {
2042    unsigned ImmIdx = 0;
2043    int InstrOffs = 0;
2044    unsigned NumBits = 0;
2045    unsigned Scale = 1;
2046    switch (AddrMode) {
2047    case ARMII::AddrMode_i12: {
2048      ImmIdx = FrameRegIdx + 1;
2049      InstrOffs = MI.getOperand(ImmIdx).getImm();
2050      NumBits = 12;
2051      break;
2052    }
2053    case ARMII::AddrMode2: {
2054      ImmIdx = FrameRegIdx+2;
2055      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2056      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2057        InstrOffs *= -1;
2058      NumBits = 12;
2059      break;
2060    }
2061    case ARMII::AddrMode3: {
2062      ImmIdx = FrameRegIdx+2;
2063      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2064      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2065        InstrOffs *= -1;
2066      NumBits = 8;
2067      break;
2068    }
2069    case ARMII::AddrMode4:
2070    case ARMII::AddrMode6:
2071      // Can't fold any offset even if it's zero.
2072      return false;
2073    case ARMII::AddrMode5: {
2074      ImmIdx = FrameRegIdx+1;
2075      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2076      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2077        InstrOffs *= -1;
2078      NumBits = 8;
2079      Scale = 4;
2080      break;
2081    }
2082    default:
2083      llvm_unreachable("Unsupported addressing mode!");
2084    }
2085
2086    Offset += InstrOffs * Scale;
2087    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2088    if (Offset < 0) {
2089      Offset = -Offset;
2090      isSub = true;
2091    }
2092
2093    // Attempt to fold address comp. if opcode has offset bits
2094    if (NumBits > 0) {
2095      // Common case: small offset, fits into instruction.
2096      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2097      int ImmedOffset = Offset / Scale;
2098      unsigned Mask = (1 << NumBits) - 1;
2099      if ((unsigned)Offset <= Mask * Scale) {
2100        // Replace the FrameIndex with sp
2101        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2102        // FIXME: When addrmode2 goes away, this will simplify (like the
2103        // T2 version), as the LDR.i12 versions don't need the encoding
2104        // tricks for the offset value.
2105        if (isSub) {
2106          if (AddrMode == ARMII::AddrMode_i12)
2107            ImmedOffset = -ImmedOffset;
2108          else
2109            ImmedOffset |= 1 << NumBits;
2110        }
2111        ImmOp.ChangeToImmediate(ImmedOffset);
2112        Offset = 0;
2113        return true;
2114      }
2115
2116      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2117      ImmedOffset = ImmedOffset & Mask;
2118      if (isSub) {
2119        if (AddrMode == ARMII::AddrMode_i12)
2120          ImmedOffset = -ImmedOffset;
2121        else
2122          ImmedOffset |= 1 << NumBits;
2123      }
2124      ImmOp.ChangeToImmediate(ImmedOffset);
2125      Offset &= ~(Mask*Scale);
2126    }
2127  }
2128
2129  Offset = (isSub) ? -Offset : Offset;
2130  return Offset == 0;
2131}
2132
2133/// analyzeCompare - For a comparison instruction, return the source registers
2134/// in SrcReg and SrcReg2 if having two register operands, and the value it
2135/// compares against in CmpValue. Return true if the comparison instruction
2136/// can be analyzed.
2137bool ARMBaseInstrInfo::
2138analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
2139               int &CmpMask, int &CmpValue) const {
2140  switch (MI->getOpcode()) {
2141  default: break;
2142  case ARM::CMPri:
2143  case ARM::t2CMPri:
2144    SrcReg = MI->getOperand(0).getReg();
2145    SrcReg2 = 0;
2146    CmpMask = ~0;
2147    CmpValue = MI->getOperand(1).getImm();
2148    return true;
2149  case ARM::CMPrr:
2150  case ARM::t2CMPrr:
2151    SrcReg = MI->getOperand(0).getReg();
2152    SrcReg2 = MI->getOperand(1).getReg();
2153    CmpMask = ~0;
2154    CmpValue = 0;
2155    return true;
2156  case ARM::TSTri:
2157  case ARM::t2TSTri:
2158    SrcReg = MI->getOperand(0).getReg();
2159    SrcReg2 = 0;
2160    CmpMask = MI->getOperand(1).getImm();
2161    CmpValue = 0;
2162    return true;
2163  }
2164
2165  return false;
2166}
2167
2168/// isSuitableForMask - Identify a suitable 'and' instruction that
2169/// operates on the given source register and applies the same mask
2170/// as a 'tst' instruction. Provide a limited look-through for copies.
2171/// When successful, MI will hold the found instruction.
2172static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2173                              int CmpMask, bool CommonUse) {
2174  switch (MI->getOpcode()) {
2175    case ARM::ANDri:
2176    case ARM::t2ANDri:
2177      if (CmpMask != MI->getOperand(2).getImm())
2178        return false;
2179      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2180        return true;
2181      break;
2182    case ARM::COPY: {
2183      // Walk down one instruction which is potentially an 'and'.
2184      const MachineInstr &Copy = *MI;
2185      MachineBasicBlock::iterator AND(
2186        std::next(MachineBasicBlock::iterator(MI)));
2187      if (AND == MI->getParent()->end()) return false;
2188      MI = AND;
2189      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
2190                               CmpMask, true);
2191    }
2192  }
2193
2194  return false;
2195}
2196
2197/// getSwappedCondition - assume the flags are set by MI(a,b), return
2198/// the condition code if we modify the instructions such that flags are
2199/// set by MI(b,a).
2200inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
2201  switch (CC) {
2202  default: return ARMCC::AL;
2203  case ARMCC::EQ: return ARMCC::EQ;
2204  case ARMCC::NE: return ARMCC::NE;
2205  case ARMCC::HS: return ARMCC::LS;
2206  case ARMCC::LO: return ARMCC::HI;
2207  case ARMCC::HI: return ARMCC::LO;
2208  case ARMCC::LS: return ARMCC::HS;
2209  case ARMCC::GE: return ARMCC::LE;
2210  case ARMCC::LT: return ARMCC::GT;
2211  case ARMCC::GT: return ARMCC::LT;
2212  case ARMCC::LE: return ARMCC::GE;
2213  }
2214}
2215
2216/// isRedundantFlagInstr - check whether the first instruction, whose only
2217/// purpose is to update flags, can be made redundant.
2218/// CMPrr can be made redundant by SUBrr if the operands are the same.
2219/// CMPri can be made redundant by SUBri if the operands are the same.
2220/// This function can be extended later on.
2221inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
2222                                        unsigned SrcReg2, int ImmValue,
2223                                        MachineInstr *OI) {
2224  if ((CmpI->getOpcode() == ARM::CMPrr ||
2225       CmpI->getOpcode() == ARM::t2CMPrr) &&
2226      (OI->getOpcode() == ARM::SUBrr ||
2227       OI->getOpcode() == ARM::t2SUBrr) &&
2228      ((OI->getOperand(1).getReg() == SrcReg &&
2229        OI->getOperand(2).getReg() == SrcReg2) ||
2230       (OI->getOperand(1).getReg() == SrcReg2 &&
2231        OI->getOperand(2).getReg() == SrcReg)))
2232    return true;
2233
2234  if ((CmpI->getOpcode() == ARM::CMPri ||
2235       CmpI->getOpcode() == ARM::t2CMPri) &&
2236      (OI->getOpcode() == ARM::SUBri ||
2237       OI->getOpcode() == ARM::t2SUBri) &&
2238      OI->getOperand(1).getReg() == SrcReg &&
2239      OI->getOperand(2).getImm() == ImmValue)
2240    return true;
2241  return false;
2242}
2243
2244/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2245/// comparison into one that sets the zero bit in the flags register;
2246/// Remove a redundant Compare instruction if an earlier instruction can set the
2247/// flags in the same way as Compare.
2248/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2249/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2250/// condition code of instructions which use the flags.
2251bool ARMBaseInstrInfo::
2252optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
2253                     int CmpMask, int CmpValue,
2254                     const MachineRegisterInfo *MRI) const {
2255  // Get the unique definition of SrcReg.
2256  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2257  if (!MI) return false;
2258
2259  // Masked compares sometimes use the same register as the corresponding 'and'.
2260  if (CmpMask != ~0) {
2261    if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
2262      MI = nullptr;
2263      for (MachineRegisterInfo::use_instr_iterator
2264           UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2265           UI != UE; ++UI) {
2266        if (UI->getParent() != CmpInstr->getParent()) continue;
2267        MachineInstr *PotentialAND = &*UI;
2268        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2269            isPredicated(PotentialAND))
2270          continue;
2271        MI = PotentialAND;
2272        break;
2273      }
2274      if (!MI) return false;
2275    }
2276  }
2277
2278  // Get ready to iterate backward from CmpInstr.
2279  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2280                              B = CmpInstr->getParent()->begin();
2281
2282  // Early exit if CmpInstr is at the beginning of the BB.
2283  if (I == B) return false;
2284
2285  // There are two possible candidates which can be changed to set CPSR:
2286  // One is MI, the other is a SUB instruction.
2287  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2288  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2289  MachineInstr *Sub = nullptr;
2290  if (SrcReg2 != 0)
2291    // MI is not a candidate for CMPrr.
2292    MI = nullptr;
2293  else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
2294    // Conservatively refuse to convert an instruction which isn't in the same
2295    // BB as the comparison.
2296    // For CMPri, we need to check Sub, thus we can't return here.
2297    if (CmpInstr->getOpcode() == ARM::CMPri ||
2298       CmpInstr->getOpcode() == ARM::t2CMPri)
2299      MI = nullptr;
2300    else
2301      return false;
2302  }
2303
2304  // Check that CPSR isn't set between the comparison instruction and the one we
2305  // want to change. At the same time, search for Sub.
2306  const TargetRegisterInfo *TRI = &getRegisterInfo();
2307  --I;
2308  for (; I != E; --I) {
2309    const MachineInstr &Instr = *I;
2310
2311    if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2312        Instr.readsRegister(ARM::CPSR, TRI))
2313      // This instruction modifies or uses CPSR after the one we want to
2314      // change. We can't do this transformation.
2315      return false;
2316
2317    // Check whether CmpInstr can be made redundant by the current instruction.
2318    if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
2319      Sub = &*I;
2320      break;
2321    }
2322
2323    if (I == B)
2324      // The 'and' is below the comparison instruction.
2325      return false;
2326  }
2327
2328  // Return false if no candidates exist.
2329  if (!MI && !Sub)
2330    return false;
2331
2332  // The single candidate is called MI.
2333  if (!MI) MI = Sub;
2334
2335  // We can't use a predicated instruction - it doesn't always write the flags.
2336  if (isPredicated(MI))
2337    return false;
2338
2339  switch (MI->getOpcode()) {
2340  default: break;
2341  case ARM::RSBrr:
2342  case ARM::RSBri:
2343  case ARM::RSCrr:
2344  case ARM::RSCri:
2345  case ARM::ADDrr:
2346  case ARM::ADDri:
2347  case ARM::ADCrr:
2348  case ARM::ADCri:
2349  case ARM::SUBrr:
2350  case ARM::SUBri:
2351  case ARM::SBCrr:
2352  case ARM::SBCri:
2353  case ARM::t2RSBri:
2354  case ARM::t2ADDrr:
2355  case ARM::t2ADDri:
2356  case ARM::t2ADCrr:
2357  case ARM::t2ADCri:
2358  case ARM::t2SUBrr:
2359  case ARM::t2SUBri:
2360  case ARM::t2SBCrr:
2361  case ARM::t2SBCri:
2362  case ARM::ANDrr:
2363  case ARM::ANDri:
2364  case ARM::t2ANDrr:
2365  case ARM::t2ANDri:
2366  case ARM::ORRrr:
2367  case ARM::ORRri:
2368  case ARM::t2ORRrr:
2369  case ARM::t2ORRri:
2370  case ARM::EORrr:
2371  case ARM::EORri:
2372  case ARM::t2EORrr:
2373  case ARM::t2EORri: {
2374    // Scan forward for the use of CPSR
2375    // When checking against MI: if it's a conditional code requires
2376    // checking of V bit, then this is not safe to do.
2377    // It is safe to remove CmpInstr if CPSR is redefined or killed.
2378    // If we are done with the basic block, we need to check whether CPSR is
2379    // live-out.
2380    SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
2381        OperandsToUpdate;
2382    bool isSafe = false;
2383    I = CmpInstr;
2384    E = CmpInstr->getParent()->end();
2385    while (!isSafe && ++I != E) {
2386      const MachineInstr &Instr = *I;
2387      for (unsigned IO = 0, EO = Instr.getNumOperands();
2388           !isSafe && IO != EO; ++IO) {
2389        const MachineOperand &MO = Instr.getOperand(IO);
2390        if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2391          isSafe = true;
2392          break;
2393        }
2394        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2395          continue;
2396        if (MO.isDef()) {
2397          isSafe = true;
2398          break;
2399        }
2400        // Condition code is after the operand before CPSR except for VSELs.
2401        ARMCC::CondCodes CC;
2402        bool IsInstrVSel = true;
2403        switch (Instr.getOpcode()) {
2404        default:
2405          IsInstrVSel = false;
2406          CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2407          break;
2408        case ARM::VSELEQD:
2409        case ARM::VSELEQS:
2410          CC = ARMCC::EQ;
2411          break;
2412        case ARM::VSELGTD:
2413        case ARM::VSELGTS:
2414          CC = ARMCC::GT;
2415          break;
2416        case ARM::VSELGED:
2417        case ARM::VSELGES:
2418          CC = ARMCC::GE;
2419          break;
2420        case ARM::VSELVSS:
2421        case ARM::VSELVSD:
2422          CC = ARMCC::VS;
2423          break;
2424        }
2425
2426        if (Sub) {
2427          ARMCC::CondCodes NewCC = getSwappedCondition(CC);
2428          if (NewCC == ARMCC::AL)
2429            return false;
2430          // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2431          // on CMP needs to be updated to be based on SUB.
2432          // Push the condition code operands to OperandsToUpdate.
2433          // If it is safe to remove CmpInstr, the condition code of these
2434          // operands will be modified.
2435          if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2436              Sub->getOperand(2).getReg() == SrcReg) {
2437            // VSel doesn't support condition code update.
2438            if (IsInstrVSel)
2439              return false;
2440            OperandsToUpdate.push_back(
2441                std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2442          }
2443        } else
2444          switch (CC) {
2445          default:
2446            // CPSR can be used multiple times, we should continue.
2447            break;
2448          case ARMCC::VS:
2449          case ARMCC::VC:
2450          case ARMCC::GE:
2451          case ARMCC::LT:
2452          case ARMCC::GT:
2453          case ARMCC::LE:
2454            return false;
2455          }
2456      }
2457    }
2458
2459    // If CPSR is not killed nor re-defined, we should check whether it is
2460    // live-out. If it is live-out, do not optimize.
2461    if (!isSafe) {
2462      MachineBasicBlock *MBB = CmpInstr->getParent();
2463      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
2464               SE = MBB->succ_end(); SI != SE; ++SI)
2465        if ((*SI)->isLiveIn(ARM::CPSR))
2466          return false;
2467    }
2468
2469    // Toggle the optional operand to CPSR.
2470    MI->getOperand(5).setReg(ARM::CPSR);
2471    MI->getOperand(5).setIsDef(true);
2472    assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
2473    CmpInstr->eraseFromParent();
2474
2475    // Modify the condition code of operands in OperandsToUpdate.
2476    // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2477    // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2478    for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2479      OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2480    return true;
2481  }
2482  }
2483
2484  return false;
2485}
2486
2487bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
2488                                     MachineInstr *DefMI, unsigned Reg,
2489                                     MachineRegisterInfo *MRI) const {
2490  // Fold large immediates into add, sub, or, xor.
2491  unsigned DefOpc = DefMI->getOpcode();
2492  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2493    return false;
2494  if (!DefMI->getOperand(1).isImm())
2495    // Could be t2MOVi32imm <ga:xx>
2496    return false;
2497
2498  if (!MRI->hasOneNonDBGUse(Reg))
2499    return false;
2500
2501  const MCInstrDesc &DefMCID = DefMI->getDesc();
2502  if (DefMCID.hasOptionalDef()) {
2503    unsigned NumOps = DefMCID.getNumOperands();
2504    const MachineOperand &MO = DefMI->getOperand(NumOps-1);
2505    if (MO.getReg() == ARM::CPSR && !MO.isDead())
2506      // If DefMI defines CPSR and it is not dead, it's obviously not safe
2507      // to delete DefMI.
2508      return false;
2509  }
2510
2511  const MCInstrDesc &UseMCID = UseMI->getDesc();
2512  if (UseMCID.hasOptionalDef()) {
2513    unsigned NumOps = UseMCID.getNumOperands();
2514    if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
2515      // If the instruction sets the flag, do not attempt this optimization
2516      // since it may change the semantics of the code.
2517      return false;
2518  }
2519
2520  unsigned UseOpc = UseMI->getOpcode();
2521  unsigned NewUseOpc = 0;
2522  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
2523  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
2524  bool Commute = false;
2525  switch (UseOpc) {
2526  default: return false;
2527  case ARM::SUBrr:
2528  case ARM::ADDrr:
2529  case ARM::ORRrr:
2530  case ARM::EORrr:
2531  case ARM::t2SUBrr:
2532  case ARM::t2ADDrr:
2533  case ARM::t2ORRrr:
2534  case ARM::t2EORrr: {
2535    Commute = UseMI->getOperand(2).getReg() != Reg;
2536    switch (UseOpc) {
2537    default: break;
2538    case ARM::SUBrr: {
2539      if (Commute)
2540        return false;
2541      ImmVal = -ImmVal;
2542      NewUseOpc = ARM::SUBri;
2543      // Fallthrough
2544    }
2545    case ARM::ADDrr:
2546    case ARM::ORRrr:
2547    case ARM::EORrr: {
2548      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
2549        return false;
2550      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2551      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2552      switch (UseOpc) {
2553      default: break;
2554      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
2555      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
2556      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
2557      }
2558      break;
2559    }
2560    case ARM::t2SUBrr: {
2561      if (Commute)
2562        return false;
2563      ImmVal = -ImmVal;
2564      NewUseOpc = ARM::t2SUBri;
2565      // Fallthrough
2566    }
2567    case ARM::t2ADDrr:
2568    case ARM::t2ORRrr:
2569    case ARM::t2EORrr: {
2570      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
2571        return false;
2572      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2573      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2574      switch (UseOpc) {
2575      default: break;
2576      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
2577      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
2578      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
2579      }
2580      break;
2581    }
2582    }
2583  }
2584  }
2585
2586  unsigned OpIdx = Commute ? 2 : 1;
2587  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
2588  bool isKill = UseMI->getOperand(OpIdx).isKill();
2589  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
2590  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
2591                                      UseMI, UseMI->getDebugLoc(),
2592                                      get(NewUseOpc), NewReg)
2593                              .addReg(Reg1, getKillRegState(isKill))
2594                              .addImm(SOImmValV1)));
2595  UseMI->setDesc(get(NewUseOpc));
2596  UseMI->getOperand(1).setReg(NewReg);
2597  UseMI->getOperand(1).setIsKill();
2598  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
2599  DefMI->eraseFromParent();
2600  return true;
2601}
2602
2603static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
2604                                        const MachineInstr *MI) {
2605  switch (MI->getOpcode()) {
2606  default: {
2607    const MCInstrDesc &Desc = MI->getDesc();
2608    int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
2609    assert(UOps >= 0 && "bad # UOps");
2610    return UOps;
2611  }
2612
2613  case ARM::LDRrs:
2614  case ARM::LDRBrs:
2615  case ARM::STRrs:
2616  case ARM::STRBrs: {
2617    unsigned ShOpVal = MI->getOperand(3).getImm();
2618    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2619    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2620    if (!isSub &&
2621        (ShImm == 0 ||
2622         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2623          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2624      return 1;
2625    return 2;
2626  }
2627
2628  case ARM::LDRH:
2629  case ARM::STRH: {
2630    if (!MI->getOperand(2).getReg())
2631      return 1;
2632
2633    unsigned ShOpVal = MI->getOperand(3).getImm();
2634    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2635    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2636    if (!isSub &&
2637        (ShImm == 0 ||
2638         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2639          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2640      return 1;
2641    return 2;
2642  }
2643
2644  case ARM::LDRSB:
2645  case ARM::LDRSH:
2646    return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
2647
2648  case ARM::LDRSB_POST:
2649  case ARM::LDRSH_POST: {
2650    unsigned Rt = MI->getOperand(0).getReg();
2651    unsigned Rm = MI->getOperand(3).getReg();
2652    return (Rt == Rm) ? 4 : 3;
2653  }
2654
2655  case ARM::LDR_PRE_REG:
2656  case ARM::LDRB_PRE_REG: {
2657    unsigned Rt = MI->getOperand(0).getReg();
2658    unsigned Rm = MI->getOperand(3).getReg();
2659    if (Rt == Rm)
2660      return 3;
2661    unsigned ShOpVal = MI->getOperand(4).getImm();
2662    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2663    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2664    if (!isSub &&
2665        (ShImm == 0 ||
2666         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2667          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2668      return 2;
2669    return 3;
2670  }
2671
2672  case ARM::STR_PRE_REG:
2673  case ARM::STRB_PRE_REG: {
2674    unsigned ShOpVal = MI->getOperand(4).getImm();
2675    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2676    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2677    if (!isSub &&
2678        (ShImm == 0 ||
2679         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2680          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2681      return 2;
2682    return 3;
2683  }
2684
2685  case ARM::LDRH_PRE:
2686  case ARM::STRH_PRE: {
2687    unsigned Rt = MI->getOperand(0).getReg();
2688    unsigned Rm = MI->getOperand(3).getReg();
2689    if (!Rm)
2690      return 2;
2691    if (Rt == Rm)
2692      return 3;
2693    return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
2694      ? 3 : 2;
2695  }
2696
2697  case ARM::LDR_POST_REG:
2698  case ARM::LDRB_POST_REG:
2699  case ARM::LDRH_POST: {
2700    unsigned Rt = MI->getOperand(0).getReg();
2701    unsigned Rm = MI->getOperand(3).getReg();
2702    return (Rt == Rm) ? 3 : 2;
2703  }
2704
2705  case ARM::LDR_PRE_IMM:
2706  case ARM::LDRB_PRE_IMM:
2707  case ARM::LDR_POST_IMM:
2708  case ARM::LDRB_POST_IMM:
2709  case ARM::STRB_POST_IMM:
2710  case ARM::STRB_POST_REG:
2711  case ARM::STRB_PRE_IMM:
2712  case ARM::STRH_POST:
2713  case ARM::STR_POST_IMM:
2714  case ARM::STR_POST_REG:
2715  case ARM::STR_PRE_IMM:
2716    return 2;
2717
2718  case ARM::LDRSB_PRE:
2719  case ARM::LDRSH_PRE: {
2720    unsigned Rm = MI->getOperand(3).getReg();
2721    if (Rm == 0)
2722      return 3;
2723    unsigned Rt = MI->getOperand(0).getReg();
2724    if (Rt == Rm)
2725      return 4;
2726    unsigned ShOpVal = MI->getOperand(4).getImm();
2727    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2728    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2729    if (!isSub &&
2730        (ShImm == 0 ||
2731         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2732          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2733      return 3;
2734    return 4;
2735  }
2736
2737  case ARM::LDRD: {
2738    unsigned Rt = MI->getOperand(0).getReg();
2739    unsigned Rn = MI->getOperand(2).getReg();
2740    unsigned Rm = MI->getOperand(3).getReg();
2741    if (Rm)
2742      return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
2743    return (Rt == Rn) ? 3 : 2;
2744  }
2745
2746  case ARM::STRD: {
2747    unsigned Rm = MI->getOperand(3).getReg();
2748    if (Rm)
2749      return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
2750    return 2;
2751  }
2752
2753  case ARM::LDRD_POST:
2754  case ARM::t2LDRD_POST:
2755    return 3;
2756
2757  case ARM::STRD_POST:
2758  case ARM::t2STRD_POST:
2759    return 4;
2760
2761  case ARM::LDRD_PRE: {
2762    unsigned Rt = MI->getOperand(0).getReg();
2763    unsigned Rn = MI->getOperand(3).getReg();
2764    unsigned Rm = MI->getOperand(4).getReg();
2765    if (Rm)
2766      return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
2767    return (Rt == Rn) ? 4 : 3;
2768  }
2769
2770  case ARM::t2LDRD_PRE: {
2771    unsigned Rt = MI->getOperand(0).getReg();
2772    unsigned Rn = MI->getOperand(3).getReg();
2773    return (Rt == Rn) ? 4 : 3;
2774  }
2775
2776  case ARM::STRD_PRE: {
2777    unsigned Rm = MI->getOperand(4).getReg();
2778    if (Rm)
2779      return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
2780    return 3;
2781  }
2782
2783  case ARM::t2STRD_PRE:
2784    return 3;
2785
2786  case ARM::t2LDR_POST:
2787  case ARM::t2LDRB_POST:
2788  case ARM::t2LDRB_PRE:
2789  case ARM::t2LDRSBi12:
2790  case ARM::t2LDRSBi8:
2791  case ARM::t2LDRSBpci:
2792  case ARM::t2LDRSBs:
2793  case ARM::t2LDRH_POST:
2794  case ARM::t2LDRH_PRE:
2795  case ARM::t2LDRSBT:
2796  case ARM::t2LDRSB_POST:
2797  case ARM::t2LDRSB_PRE:
2798  case ARM::t2LDRSH_POST:
2799  case ARM::t2LDRSH_PRE:
2800  case ARM::t2LDRSHi12:
2801  case ARM::t2LDRSHi8:
2802  case ARM::t2LDRSHpci:
2803  case ARM::t2LDRSHs:
2804    return 2;
2805
2806  case ARM::t2LDRDi8: {
2807    unsigned Rt = MI->getOperand(0).getReg();
2808    unsigned Rn = MI->getOperand(2).getReg();
2809    return (Rt == Rn) ? 3 : 2;
2810  }
2811
2812  case ARM::t2STRB_POST:
2813  case ARM::t2STRB_PRE:
2814  case ARM::t2STRBs:
2815  case ARM::t2STRDi8:
2816  case ARM::t2STRH_POST:
2817  case ARM::t2STRH_PRE:
2818  case ARM::t2STRHs:
2819  case ARM::t2STR_POST:
2820  case ARM::t2STR_PRE:
2821  case ARM::t2STRs:
2822    return 2;
2823  }
2824}
2825
2826// Return the number of 32-bit words loaded by LDM or stored by STM. If this
2827// can't be easily determined return 0 (missing MachineMemOperand).
2828//
2829// FIXME: The current MachineInstr design does not support relying on machine
2830// mem operands to determine the width of a memory access. Instead, we expect
2831// the target to provide this information based on the instruction opcode and
2832// operands. However, using MachineMemOperand is a the best solution now for
2833// two reasons:
2834//
2835// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
2836// operands. This is much more dangerous than using the MachineMemOperand
2837// sizes because CodeGen passes can insert/remove optional machine operands. In
2838// fact, it's totally incorrect for preRA passes and appears to be wrong for
2839// postRA passes as well.
2840//
2841// 2) getNumLDMAddresses is only used by the scheduling machine model and any
2842// machine model that calls this should handle the unknown (zero size) case.
2843//
2844// Long term, we should require a target hook that verifies MachineMemOperand
2845// sizes during MC lowering. That target hook should be local to MC lowering
2846// because we can't ensure that it is aware of other MI forms. Doing this will
2847// ensure that MachineMemOperands are correctly propagated through all passes.
2848unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
2849  unsigned Size = 0;
2850  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
2851         E = MI->memoperands_end(); I != E; ++I) {
2852    Size += (*I)->getSize();
2853  }
2854  return Size / 4;
2855}
2856
2857unsigned
2858ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
2859                                 const MachineInstr *MI) const {
2860  if (!ItinData || ItinData->isEmpty())
2861    return 1;
2862
2863  const MCInstrDesc &Desc = MI->getDesc();
2864  unsigned Class = Desc.getSchedClass();
2865  int ItinUOps = ItinData->getNumMicroOps(Class);
2866  if (ItinUOps >= 0) {
2867    if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
2868      return getNumMicroOpsSwiftLdSt(ItinData, MI);
2869
2870    return ItinUOps;
2871  }
2872
2873  unsigned Opc = MI->getOpcode();
2874  switch (Opc) {
2875  default:
2876    llvm_unreachable("Unexpected multi-uops instruction!");
2877  case ARM::VLDMQIA:
2878  case ARM::VSTMQIA:
2879    return 2;
2880
2881  // The number of uOps for load / store multiple are determined by the number
2882  // registers.
2883  //
2884  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
2885  // same cycle. The scheduling for the first load / store must be done
2886  // separately by assuming the address is not 64-bit aligned.
2887  //
2888  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
2889  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
2890  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
2891  case ARM::VLDMDIA:
2892  case ARM::VLDMDIA_UPD:
2893  case ARM::VLDMDDB_UPD:
2894  case ARM::VLDMSIA:
2895  case ARM::VLDMSIA_UPD:
2896  case ARM::VLDMSDB_UPD:
2897  case ARM::VSTMDIA:
2898  case ARM::VSTMDIA_UPD:
2899  case ARM::VSTMDDB_UPD:
2900  case ARM::VSTMSIA:
2901  case ARM::VSTMSIA_UPD:
2902  case ARM::VSTMSDB_UPD: {
2903    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
2904    return (NumRegs / 2) + (NumRegs % 2) + 1;
2905  }
2906
2907  case ARM::LDMIA_RET:
2908  case ARM::LDMIA:
2909  case ARM::LDMDA:
2910  case ARM::LDMDB:
2911  case ARM::LDMIB:
2912  case ARM::LDMIA_UPD:
2913  case ARM::LDMDA_UPD:
2914  case ARM::LDMDB_UPD:
2915  case ARM::LDMIB_UPD:
2916  case ARM::STMIA:
2917  case ARM::STMDA:
2918  case ARM::STMDB:
2919  case ARM::STMIB:
2920  case ARM::STMIA_UPD:
2921  case ARM::STMDA_UPD:
2922  case ARM::STMDB_UPD:
2923  case ARM::STMIB_UPD:
2924  case ARM::tLDMIA:
2925  case ARM::tLDMIA_UPD:
2926  case ARM::tSTMIA_UPD:
2927  case ARM::tPOP_RET:
2928  case ARM::tPOP:
2929  case ARM::tPUSH:
2930  case ARM::t2LDMIA_RET:
2931  case ARM::t2LDMIA:
2932  case ARM::t2LDMDB:
2933  case ARM::t2LDMIA_UPD:
2934  case ARM::t2LDMDB_UPD:
2935  case ARM::t2STMIA:
2936  case ARM::t2STMDB:
2937  case ARM::t2STMIA_UPD:
2938  case ARM::t2STMDB_UPD: {
2939    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
2940    if (Subtarget.isSwift()) {
2941      int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
2942      switch (Opc) {
2943      default: break;
2944      case ARM::VLDMDIA_UPD:
2945      case ARM::VLDMDDB_UPD:
2946      case ARM::VLDMSIA_UPD:
2947      case ARM::VLDMSDB_UPD:
2948      case ARM::VSTMDIA_UPD:
2949      case ARM::VSTMDDB_UPD:
2950      case ARM::VSTMSIA_UPD:
2951      case ARM::VSTMSDB_UPD:
2952      case ARM::LDMIA_UPD:
2953      case ARM::LDMDA_UPD:
2954      case ARM::LDMDB_UPD:
2955      case ARM::LDMIB_UPD:
2956      case ARM::STMIA_UPD:
2957      case ARM::STMDA_UPD:
2958      case ARM::STMDB_UPD:
2959      case ARM::STMIB_UPD:
2960      case ARM::tLDMIA_UPD:
2961      case ARM::tSTMIA_UPD:
2962      case ARM::t2LDMIA_UPD:
2963      case ARM::t2LDMDB_UPD:
2964      case ARM::t2STMIA_UPD:
2965      case ARM::t2STMDB_UPD:
2966        ++UOps; // One for base register writeback.
2967        break;
2968      case ARM::LDMIA_RET:
2969      case ARM::tPOP_RET:
2970      case ARM::t2LDMIA_RET:
2971        UOps += 2; // One for base reg wb, one for write to pc.
2972        break;
2973      }
2974      return UOps;
2975    } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
2976      if (NumRegs < 4)
2977        return 2;
2978      // 4 registers would be issued: 2, 2.
2979      // 5 registers would be issued: 2, 2, 1.
2980      int A8UOps = (NumRegs / 2);
2981      if (NumRegs % 2)
2982        ++A8UOps;
2983      return A8UOps;
2984    } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2985      int A9UOps = (NumRegs / 2);
2986      // If there are odd number of registers or if it's not 64-bit aligned,
2987      // then it takes an extra AGU (Address Generation Unit) cycle.
2988      if ((NumRegs % 2) ||
2989          !MI->hasOneMemOperand() ||
2990          (*MI->memoperands_begin())->getAlignment() < 8)
2991        ++A9UOps;
2992      return A9UOps;
2993    } else {
2994      // Assume the worst.
2995      return NumRegs;
2996    }
2997  }
2998  }
2999}
3000
3001int
3002ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3003                                  const MCInstrDesc &DefMCID,
3004                                  unsigned DefClass,
3005                                  unsigned DefIdx, unsigned DefAlign) const {
3006  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3007  if (RegNo <= 0)
3008    // Def is the address writeback.
3009    return ItinData->getOperandCycle(DefClass, DefIdx);
3010
3011  int DefCycle;
3012  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3013    // (regno / 2) + (regno % 2) + 1
3014    DefCycle = RegNo / 2 + 1;
3015    if (RegNo % 2)
3016      ++DefCycle;
3017  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3018    DefCycle = RegNo;
3019    bool isSLoad = false;
3020
3021    switch (DefMCID.getOpcode()) {
3022    default: break;
3023    case ARM::VLDMSIA:
3024    case ARM::VLDMSIA_UPD:
3025    case ARM::VLDMSDB_UPD:
3026      isSLoad = true;
3027      break;
3028    }
3029
3030    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3031    // then it takes an extra cycle.
3032    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3033      ++DefCycle;
3034  } else {
3035    // Assume the worst.
3036    DefCycle = RegNo + 2;
3037  }
3038
3039  return DefCycle;
3040}
3041
3042int
3043ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3044                                 const MCInstrDesc &DefMCID,
3045                                 unsigned DefClass,
3046                                 unsigned DefIdx, unsigned DefAlign) const {
3047  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3048  if (RegNo <= 0)
3049    // Def is the address writeback.
3050    return ItinData->getOperandCycle(DefClass, DefIdx);
3051
3052  int DefCycle;
3053  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3054    // 4 registers would be issued: 1, 2, 1.
3055    // 5 registers would be issued: 1, 2, 2.
3056    DefCycle = RegNo / 2;
3057    if (DefCycle < 1)
3058      DefCycle = 1;
3059    // Result latency is issue cycle + 2: E2.
3060    DefCycle += 2;
3061  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3062    DefCycle = (RegNo / 2);
3063    // If there are odd number of registers or if it's not 64-bit aligned,
3064    // then it takes an extra AGU (Address Generation Unit) cycle.
3065    if ((RegNo % 2) || DefAlign < 8)
3066      ++DefCycle;
3067    // Result latency is AGU cycles + 2.
3068    DefCycle += 2;
3069  } else {
3070    // Assume the worst.
3071    DefCycle = RegNo + 2;
3072  }
3073
3074  return DefCycle;
3075}
3076
3077int
3078ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3079                                  const MCInstrDesc &UseMCID,
3080                                  unsigned UseClass,
3081                                  unsigned UseIdx, unsigned UseAlign) const {
3082  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3083  if (RegNo <= 0)
3084    return ItinData->getOperandCycle(UseClass, UseIdx);
3085
3086  int UseCycle;
3087  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3088    // (regno / 2) + (regno % 2) + 1
3089    UseCycle = RegNo / 2 + 1;
3090    if (RegNo % 2)
3091      ++UseCycle;
3092  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3093    UseCycle = RegNo;
3094    bool isSStore = false;
3095
3096    switch (UseMCID.getOpcode()) {
3097    default: break;
3098    case ARM::VSTMSIA:
3099    case ARM::VSTMSIA_UPD:
3100    case ARM::VSTMSDB_UPD:
3101      isSStore = true;
3102      break;
3103    }
3104
3105    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3106    // then it takes an extra cycle.
3107    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3108      ++UseCycle;
3109  } else {
3110    // Assume the worst.
3111    UseCycle = RegNo + 2;
3112  }
3113
3114  return UseCycle;
3115}
3116
3117int
3118ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3119                                 const MCInstrDesc &UseMCID,
3120                                 unsigned UseClass,
3121                                 unsigned UseIdx, unsigned UseAlign) const {
3122  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3123  if (RegNo <= 0)
3124    return ItinData->getOperandCycle(UseClass, UseIdx);
3125
3126  int UseCycle;
3127  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3128    UseCycle = RegNo / 2;
3129    if (UseCycle < 2)
3130      UseCycle = 2;
3131    // Read in E3.
3132    UseCycle += 2;
3133  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3134    UseCycle = (RegNo / 2);
3135    // If there are odd number of registers or if it's not 64-bit aligned,
3136    // then it takes an extra AGU (Address Generation Unit) cycle.
3137    if ((RegNo % 2) || UseAlign < 8)
3138      ++UseCycle;
3139  } else {
3140    // Assume the worst.
3141    UseCycle = 1;
3142  }
3143  return UseCycle;
3144}
3145
3146int
3147ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3148                                    const MCInstrDesc &DefMCID,
3149                                    unsigned DefIdx, unsigned DefAlign,
3150                                    const MCInstrDesc &UseMCID,
3151                                    unsigned UseIdx, unsigned UseAlign) const {
3152  unsigned DefClass = DefMCID.getSchedClass();
3153  unsigned UseClass = UseMCID.getSchedClass();
3154
3155  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3156    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3157
3158  // This may be a def / use of a variable_ops instruction, the operand
3159  // latency might be determinable dynamically. Let the target try to
3160  // figure it out.
3161  int DefCycle = -1;
3162  bool LdmBypass = false;
3163  switch (DefMCID.getOpcode()) {
3164  default:
3165    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3166    break;
3167
3168  case ARM::VLDMDIA:
3169  case ARM::VLDMDIA_UPD:
3170  case ARM::VLDMDDB_UPD:
3171  case ARM::VLDMSIA:
3172  case ARM::VLDMSIA_UPD:
3173  case ARM::VLDMSDB_UPD:
3174    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3175    break;
3176
3177  case ARM::LDMIA_RET:
3178  case ARM::LDMIA:
3179  case ARM::LDMDA:
3180  case ARM::LDMDB:
3181  case ARM::LDMIB:
3182  case ARM::LDMIA_UPD:
3183  case ARM::LDMDA_UPD:
3184  case ARM::LDMDB_UPD:
3185  case ARM::LDMIB_UPD:
3186  case ARM::tLDMIA:
3187  case ARM::tLDMIA_UPD:
3188  case ARM::tPUSH:
3189  case ARM::t2LDMIA_RET:
3190  case ARM::t2LDMIA:
3191  case ARM::t2LDMDB:
3192  case ARM::t2LDMIA_UPD:
3193  case ARM::t2LDMDB_UPD:
3194    LdmBypass = 1;
3195    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3196    break;
3197  }
3198
3199  if (DefCycle == -1)
3200    // We can't seem to determine the result latency of the def, assume it's 2.
3201    DefCycle = 2;
3202
3203  int UseCycle = -1;
3204  switch (UseMCID.getOpcode()) {
3205  default:
3206    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3207    break;
3208
3209  case ARM::VSTMDIA:
3210  case ARM::VSTMDIA_UPD:
3211  case ARM::VSTMDDB_UPD:
3212  case ARM::VSTMSIA:
3213  case ARM::VSTMSIA_UPD:
3214  case ARM::VSTMSDB_UPD:
3215    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3216    break;
3217
3218  case ARM::STMIA:
3219  case ARM::STMDA:
3220  case ARM::STMDB:
3221  case ARM::STMIB:
3222  case ARM::STMIA_UPD:
3223  case ARM::STMDA_UPD:
3224  case ARM::STMDB_UPD:
3225  case ARM::STMIB_UPD:
3226  case ARM::tSTMIA_UPD:
3227  case ARM::tPOP_RET:
3228  case ARM::tPOP:
3229  case ARM::t2STMIA:
3230  case ARM::t2STMDB:
3231  case ARM::t2STMIA_UPD:
3232  case ARM::t2STMDB_UPD:
3233    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3234    break;
3235  }
3236
3237  if (UseCycle == -1)
3238    // Assume it's read in the first stage.
3239    UseCycle = 1;
3240
3241  UseCycle = DefCycle - UseCycle + 1;
3242  if (UseCycle > 0) {
3243    if (LdmBypass) {
3244      // It's a variable_ops instruction so we can't use DefIdx here. Just use
3245      // first def operand.
3246      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3247                                          UseClass, UseIdx))
3248        --UseCycle;
3249    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3250                                               UseClass, UseIdx)) {
3251      --UseCycle;
3252    }
3253  }
3254
3255  return UseCycle;
3256}
3257
3258static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
3259                                           const MachineInstr *MI, unsigned Reg,
3260                                           unsigned &DefIdx, unsigned &Dist) {
3261  Dist = 0;
3262
3263  MachineBasicBlock::const_iterator I = MI; ++I;
3264  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3265  assert(II->isInsideBundle() && "Empty bundle?");
3266
3267  int Idx = -1;
3268  while (II->isInsideBundle()) {
3269    Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3270    if (Idx != -1)
3271      break;
3272    --II;
3273    ++Dist;
3274  }
3275
3276  assert(Idx != -1 && "Cannot find bundled definition!");
3277  DefIdx = Idx;
3278  return II;
3279}
3280
3281static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
3282                                           const MachineInstr *MI, unsigned Reg,
3283                                           unsigned &UseIdx, unsigned &Dist) {
3284  Dist = 0;
3285
3286  MachineBasicBlock::const_instr_iterator II = MI; ++II;
3287  assert(II->isInsideBundle() && "Empty bundle?");
3288  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
3289
3290  // FIXME: This doesn't properly handle multiple uses.
3291  int Idx = -1;
3292  while (II != E && II->isInsideBundle()) {
3293    Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3294    if (Idx != -1)
3295      break;
3296    if (II->getOpcode() != ARM::t2IT)
3297      ++Dist;
3298    ++II;
3299  }
3300
3301  if (Idx == -1) {
3302    Dist = 0;
3303    return nullptr;
3304  }
3305
3306  UseIdx = Idx;
3307  return II;
3308}
3309
3310/// Return the number of cycles to add to (or subtract from) the static
3311/// itinerary based on the def opcode and alignment. The caller will ensure that
3312/// adjusted latency is at least one cycle.
3313static int adjustDefLatency(const ARMSubtarget &Subtarget,
3314                            const MachineInstr *DefMI,
3315                            const MCInstrDesc *DefMCID, unsigned DefAlign) {
3316  int Adjust = 0;
3317  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
3318    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3319    // variants are one cycle cheaper.
3320    switch (DefMCID->getOpcode()) {
3321    default: break;
3322    case ARM::LDRrs:
3323    case ARM::LDRBrs: {
3324      unsigned ShOpVal = DefMI->getOperand(3).getImm();
3325      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3326      if (ShImm == 0 ||
3327          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3328        --Adjust;
3329      break;
3330    }
3331    case ARM::t2LDRs:
3332    case ARM::t2LDRBs:
3333    case ARM::t2LDRHs:
3334    case ARM::t2LDRSHs: {
3335      // Thumb2 mode: lsl only.
3336      unsigned ShAmt = DefMI->getOperand(3).getImm();
3337      if (ShAmt == 0 || ShAmt == 2)
3338        --Adjust;
3339      break;
3340    }
3341    }
3342  } else if (Subtarget.isSwift()) {
3343    // FIXME: Properly handle all of the latency adjustments for address
3344    // writeback.
3345    switch (DefMCID->getOpcode()) {
3346    default: break;
3347    case ARM::LDRrs:
3348    case ARM::LDRBrs: {
3349      unsigned ShOpVal = DefMI->getOperand(3).getImm();
3350      bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3351      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3352      if (!isSub &&
3353          (ShImm == 0 ||
3354           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3355            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3356        Adjust -= 2;
3357      else if (!isSub &&
3358               ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3359        --Adjust;
3360      break;
3361    }
3362    case ARM::t2LDRs:
3363    case ARM::t2LDRBs:
3364    case ARM::t2LDRHs:
3365    case ARM::t2LDRSHs: {
3366      // Thumb2 mode: lsl only.
3367      unsigned ShAmt = DefMI->getOperand(3).getImm();
3368      if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3369        Adjust -= 2;
3370      break;
3371    }
3372    }
3373  }
3374
3375  if (DefAlign < 8 && Subtarget.isLikeA9()) {
3376    switch (DefMCID->getOpcode()) {
3377    default: break;
3378    case ARM::VLD1q8:
3379    case ARM::VLD1q16:
3380    case ARM::VLD1q32:
3381    case ARM::VLD1q64:
3382    case ARM::VLD1q8wb_fixed:
3383    case ARM::VLD1q16wb_fixed:
3384    case ARM::VLD1q32wb_fixed:
3385    case ARM::VLD1q64wb_fixed:
3386    case ARM::VLD1q8wb_register:
3387    case ARM::VLD1q16wb_register:
3388    case ARM::VLD1q32wb_register:
3389    case ARM::VLD1q64wb_register:
3390    case ARM::VLD2d8:
3391    case ARM::VLD2d16:
3392    case ARM::VLD2d32:
3393    case ARM::VLD2q8:
3394    case ARM::VLD2q16:
3395    case ARM::VLD2q32:
3396    case ARM::VLD2d8wb_fixed:
3397    case ARM::VLD2d16wb_fixed:
3398    case ARM::VLD2d32wb_fixed:
3399    case ARM::VLD2q8wb_fixed:
3400    case ARM::VLD2q16wb_fixed:
3401    case ARM::VLD2q32wb_fixed:
3402    case ARM::VLD2d8wb_register:
3403    case ARM::VLD2d16wb_register:
3404    case ARM::VLD2d32wb_register:
3405    case ARM::VLD2q8wb_register:
3406    case ARM::VLD2q16wb_register:
3407    case ARM::VLD2q32wb_register:
3408    case ARM::VLD3d8:
3409    case ARM::VLD3d16:
3410    case ARM::VLD3d32:
3411    case ARM::VLD1d64T:
3412    case ARM::VLD3d8_UPD:
3413    case ARM::VLD3d16_UPD:
3414    case ARM::VLD3d32_UPD:
3415    case ARM::VLD1d64Twb_fixed:
3416    case ARM::VLD1d64Twb_register:
3417    case ARM::VLD3q8_UPD:
3418    case ARM::VLD3q16_UPD:
3419    case ARM::VLD3q32_UPD:
3420    case ARM::VLD4d8:
3421    case ARM::VLD4d16:
3422    case ARM::VLD4d32:
3423    case ARM::VLD1d64Q:
3424    case ARM::VLD4d8_UPD:
3425    case ARM::VLD4d16_UPD:
3426    case ARM::VLD4d32_UPD:
3427    case ARM::VLD1d64Qwb_fixed:
3428    case ARM::VLD1d64Qwb_register:
3429    case ARM::VLD4q8_UPD:
3430    case ARM::VLD4q16_UPD:
3431    case ARM::VLD4q32_UPD:
3432    case ARM::VLD1DUPq8:
3433    case ARM::VLD1DUPq16:
3434    case ARM::VLD1DUPq32:
3435    case ARM::VLD1DUPq8wb_fixed:
3436    case ARM::VLD1DUPq16wb_fixed:
3437    case ARM::VLD1DUPq32wb_fixed:
3438    case ARM::VLD1DUPq8wb_register:
3439    case ARM::VLD1DUPq16wb_register:
3440    case ARM::VLD1DUPq32wb_register:
3441    case ARM::VLD2DUPd8:
3442    case ARM::VLD2DUPd16:
3443    case ARM::VLD2DUPd32:
3444    case ARM::VLD2DUPd8wb_fixed:
3445    case ARM::VLD2DUPd16wb_fixed:
3446    case ARM::VLD2DUPd32wb_fixed:
3447    case ARM::VLD2DUPd8wb_register:
3448    case ARM::VLD2DUPd16wb_register:
3449    case ARM::VLD2DUPd32wb_register:
3450    case ARM::VLD4DUPd8:
3451    case ARM::VLD4DUPd16:
3452    case ARM::VLD4DUPd32:
3453    case ARM::VLD4DUPd8_UPD:
3454    case ARM::VLD4DUPd16_UPD:
3455    case ARM::VLD4DUPd32_UPD:
3456    case ARM::VLD1LNd8:
3457    case ARM::VLD1LNd16:
3458    case ARM::VLD1LNd32:
3459    case ARM::VLD1LNd8_UPD:
3460    case ARM::VLD1LNd16_UPD:
3461    case ARM::VLD1LNd32_UPD:
3462    case ARM::VLD2LNd8:
3463    case ARM::VLD2LNd16:
3464    case ARM::VLD2LNd32:
3465    case ARM::VLD2LNq16:
3466    case ARM::VLD2LNq32:
3467    case ARM::VLD2LNd8_UPD:
3468    case ARM::VLD2LNd16_UPD:
3469    case ARM::VLD2LNd32_UPD:
3470    case ARM::VLD2LNq16_UPD:
3471    case ARM::VLD2LNq32_UPD:
3472    case ARM::VLD4LNd8:
3473    case ARM::VLD4LNd16:
3474    case ARM::VLD4LNd32:
3475    case ARM::VLD4LNq16:
3476    case ARM::VLD4LNq32:
3477    case ARM::VLD4LNd8_UPD:
3478    case ARM::VLD4LNd16_UPD:
3479    case ARM::VLD4LNd32_UPD:
3480    case ARM::VLD4LNq16_UPD:
3481    case ARM::VLD4LNq32_UPD:
3482      // If the address is not 64-bit aligned, the latencies of these
3483      // instructions increases by one.
3484      ++Adjust;
3485      break;
3486    }
3487  }
3488  return Adjust;
3489}
3490
3491
3492
3493int
3494ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3495                                    const MachineInstr *DefMI, unsigned DefIdx,
3496                                    const MachineInstr *UseMI,
3497                                    unsigned UseIdx) const {
3498  // No operand latency. The caller may fall back to getInstrLatency.
3499  if (!ItinData || ItinData->isEmpty())
3500    return -1;
3501
3502  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
3503  unsigned Reg = DefMO.getReg();
3504  const MCInstrDesc *DefMCID = &DefMI->getDesc();
3505  const MCInstrDesc *UseMCID = &UseMI->getDesc();
3506
3507  unsigned DefAdj = 0;
3508  if (DefMI->isBundle()) {
3509    DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
3510    DefMCID = &DefMI->getDesc();
3511  }
3512  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
3513      DefMI->isRegSequence() || DefMI->isImplicitDef()) {
3514    return 1;
3515  }
3516
3517  unsigned UseAdj = 0;
3518  if (UseMI->isBundle()) {
3519    unsigned NewUseIdx;
3520    const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
3521                                                   Reg, NewUseIdx, UseAdj);
3522    if (!NewUseMI)
3523      return -1;
3524
3525    UseMI = NewUseMI;
3526    UseIdx = NewUseIdx;
3527    UseMCID = &UseMI->getDesc();
3528  }
3529
3530  if (Reg == ARM::CPSR) {
3531    if (DefMI->getOpcode() == ARM::FMSTAT) {
3532      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
3533      return Subtarget.isLikeA9() ? 1 : 20;
3534    }
3535
3536    // CPSR set and branch can be paired in the same cycle.
3537    if (UseMI->isBranch())
3538      return 0;
3539
3540    // Otherwise it takes the instruction latency (generally one).
3541    unsigned Latency = getInstrLatency(ItinData, DefMI);
3542
3543    // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
3544    // its uses. Instructions which are otherwise scheduled between them may
3545    // incur a code size penalty (not able to use the CPSR setting 16-bit
3546    // instructions).
3547    if (Latency > 0 && Subtarget.isThumb2()) {
3548      const MachineFunction *MF = DefMI->getParent()->getParent();
3549      if (MF->getFunction()->getAttributes().
3550            hasAttribute(AttributeSet::FunctionIndex,
3551                         Attribute::OptimizeForSize))
3552        --Latency;
3553    }
3554    return Latency;
3555  }
3556
3557  if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
3558    return -1;
3559
3560  unsigned DefAlign = DefMI->hasOneMemOperand()
3561    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
3562  unsigned UseAlign = UseMI->hasOneMemOperand()
3563    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
3564
3565  // Get the itinerary's latency if possible, and handle variable_ops.
3566  int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
3567                                  *UseMCID, UseIdx, UseAlign);
3568  // Unable to find operand latency. The caller may resort to getInstrLatency.
3569  if (Latency < 0)
3570    return Latency;
3571
3572  // Adjust for IT block position.
3573  int Adj = DefAdj + UseAdj;
3574
3575  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
3576  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
3577  if (Adj >= 0 || (int)Latency > -Adj) {
3578    return Latency + Adj;
3579  }
3580  // Return the itinerary latency, which may be zero but not less than zero.
3581  return Latency;
3582}
3583
3584int
3585ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3586                                    SDNode *DefNode, unsigned DefIdx,
3587                                    SDNode *UseNode, unsigned UseIdx) const {
3588  if (!DefNode->isMachineOpcode())
3589    return 1;
3590
3591  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
3592
3593  if (isZeroCost(DefMCID.Opcode))
3594    return 0;
3595
3596  if (!ItinData || ItinData->isEmpty())
3597    return DefMCID.mayLoad() ? 3 : 1;
3598
3599  if (!UseNode->isMachineOpcode()) {
3600    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
3601    if (Subtarget.isLikeA9() || Subtarget.isSwift())
3602      return Latency <= 2 ? 1 : Latency - 1;
3603    else
3604      return Latency <= 3 ? 1 : Latency - 2;
3605  }
3606
3607  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
3608  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
3609  unsigned DefAlign = !DefMN->memoperands_empty()
3610    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
3611  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
3612  unsigned UseAlign = !UseMN->memoperands_empty()
3613    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
3614  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
3615                                  UseMCID, UseIdx, UseAlign);
3616
3617  if (Latency > 1 &&
3618      (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
3619       Subtarget.isCortexA7())) {
3620    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3621    // variants are one cycle cheaper.
3622    switch (DefMCID.getOpcode()) {
3623    default: break;
3624    case ARM::LDRrs:
3625    case ARM::LDRBrs: {
3626      unsigned ShOpVal =
3627        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3628      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3629      if (ShImm == 0 ||
3630          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3631        --Latency;
3632      break;
3633    }
3634    case ARM::t2LDRs:
3635    case ARM::t2LDRBs:
3636    case ARM::t2LDRHs:
3637    case ARM::t2LDRSHs: {
3638      // Thumb2 mode: lsl only.
3639      unsigned ShAmt =
3640        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3641      if (ShAmt == 0 || ShAmt == 2)
3642        --Latency;
3643      break;
3644    }
3645    }
3646  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
3647    // FIXME: Properly handle all of the latency adjustments for address
3648    // writeback.
3649    switch (DefMCID.getOpcode()) {
3650    default: break;
3651    case ARM::LDRrs:
3652    case ARM::LDRBrs: {
3653      unsigned ShOpVal =
3654        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3655      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3656      if (ShImm == 0 ||
3657          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3658           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3659        Latency -= 2;
3660      else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3661        --Latency;
3662      break;
3663    }
3664    case ARM::t2LDRs:
3665    case ARM::t2LDRBs:
3666    case ARM::t2LDRHs:
3667    case ARM::t2LDRSHs: {
3668      // Thumb2 mode: lsl 0-3 only.
3669      Latency -= 2;
3670      break;
3671    }
3672    }
3673  }
3674
3675  if (DefAlign < 8 && Subtarget.isLikeA9())
3676    switch (DefMCID.getOpcode()) {
3677    default: break;
3678    case ARM::VLD1q8:
3679    case ARM::VLD1q16:
3680    case ARM::VLD1q32:
3681    case ARM::VLD1q64:
3682    case ARM::VLD1q8wb_register:
3683    case ARM::VLD1q16wb_register:
3684    case ARM::VLD1q32wb_register:
3685    case ARM::VLD1q64wb_register:
3686    case ARM::VLD1q8wb_fixed:
3687    case ARM::VLD1q16wb_fixed:
3688    case ARM::VLD1q32wb_fixed:
3689    case ARM::VLD1q64wb_fixed:
3690    case ARM::VLD2d8:
3691    case ARM::VLD2d16:
3692    case ARM::VLD2d32:
3693    case ARM::VLD2q8Pseudo:
3694    case ARM::VLD2q16Pseudo:
3695    case ARM::VLD2q32Pseudo:
3696    case ARM::VLD2d8wb_fixed:
3697    case ARM::VLD2d16wb_fixed:
3698    case ARM::VLD2d32wb_fixed:
3699    case ARM::VLD2q8PseudoWB_fixed:
3700    case ARM::VLD2q16PseudoWB_fixed:
3701    case ARM::VLD2q32PseudoWB_fixed:
3702    case ARM::VLD2d8wb_register:
3703    case ARM::VLD2d16wb_register:
3704    case ARM::VLD2d32wb_register:
3705    case ARM::VLD2q8PseudoWB_register:
3706    case ARM::VLD2q16PseudoWB_register:
3707    case ARM::VLD2q32PseudoWB_register:
3708    case ARM::VLD3d8Pseudo:
3709    case ARM::VLD3d16Pseudo:
3710    case ARM::VLD3d32Pseudo:
3711    case ARM::VLD1d64TPseudo:
3712    case ARM::VLD1d64TPseudoWB_fixed:
3713    case ARM::VLD3d8Pseudo_UPD:
3714    case ARM::VLD3d16Pseudo_UPD:
3715    case ARM::VLD3d32Pseudo_UPD:
3716    case ARM::VLD3q8Pseudo_UPD:
3717    case ARM::VLD3q16Pseudo_UPD:
3718    case ARM::VLD3q32Pseudo_UPD:
3719    case ARM::VLD3q8oddPseudo:
3720    case ARM::VLD3q16oddPseudo:
3721    case ARM::VLD3q32oddPseudo:
3722    case ARM::VLD3q8oddPseudo_UPD:
3723    case ARM::VLD3q16oddPseudo_UPD:
3724    case ARM::VLD3q32oddPseudo_UPD:
3725    case ARM::VLD4d8Pseudo:
3726    case ARM::VLD4d16Pseudo:
3727    case ARM::VLD4d32Pseudo:
3728    case ARM::VLD1d64QPseudo:
3729    case ARM::VLD1d64QPseudoWB_fixed:
3730    case ARM::VLD4d8Pseudo_UPD:
3731    case ARM::VLD4d16Pseudo_UPD:
3732    case ARM::VLD4d32Pseudo_UPD:
3733    case ARM::VLD4q8Pseudo_UPD:
3734    case ARM::VLD4q16Pseudo_UPD:
3735    case ARM::VLD4q32Pseudo_UPD:
3736    case ARM::VLD4q8oddPseudo:
3737    case ARM::VLD4q16oddPseudo:
3738    case ARM::VLD4q32oddPseudo:
3739    case ARM::VLD4q8oddPseudo_UPD:
3740    case ARM::VLD4q16oddPseudo_UPD:
3741    case ARM::VLD4q32oddPseudo_UPD:
3742    case ARM::VLD1DUPq8:
3743    case ARM::VLD1DUPq16:
3744    case ARM::VLD1DUPq32:
3745    case ARM::VLD1DUPq8wb_fixed:
3746    case ARM::VLD1DUPq16wb_fixed:
3747    case ARM::VLD1DUPq32wb_fixed:
3748    case ARM::VLD1DUPq8wb_register:
3749    case ARM::VLD1DUPq16wb_register:
3750    case ARM::VLD1DUPq32wb_register:
3751    case ARM::VLD2DUPd8:
3752    case ARM::VLD2DUPd16:
3753    case ARM::VLD2DUPd32:
3754    case ARM::VLD2DUPd8wb_fixed:
3755    case ARM::VLD2DUPd16wb_fixed:
3756    case ARM::VLD2DUPd32wb_fixed:
3757    case ARM::VLD2DUPd8wb_register:
3758    case ARM::VLD2DUPd16wb_register:
3759    case ARM::VLD2DUPd32wb_register:
3760    case ARM::VLD4DUPd8Pseudo:
3761    case ARM::VLD4DUPd16Pseudo:
3762    case ARM::VLD4DUPd32Pseudo:
3763    case ARM::VLD4DUPd8Pseudo_UPD:
3764    case ARM::VLD4DUPd16Pseudo_UPD:
3765    case ARM::VLD4DUPd32Pseudo_UPD:
3766    case ARM::VLD1LNq8Pseudo:
3767    case ARM::VLD1LNq16Pseudo:
3768    case ARM::VLD1LNq32Pseudo:
3769    case ARM::VLD1LNq8Pseudo_UPD:
3770    case ARM::VLD1LNq16Pseudo_UPD:
3771    case ARM::VLD1LNq32Pseudo_UPD:
3772    case ARM::VLD2LNd8Pseudo:
3773    case ARM::VLD2LNd16Pseudo:
3774    case ARM::VLD2LNd32Pseudo:
3775    case ARM::VLD2LNq16Pseudo:
3776    case ARM::VLD2LNq32Pseudo:
3777    case ARM::VLD2LNd8Pseudo_UPD:
3778    case ARM::VLD2LNd16Pseudo_UPD:
3779    case ARM::VLD2LNd32Pseudo_UPD:
3780    case ARM::VLD2LNq16Pseudo_UPD:
3781    case ARM::VLD2LNq32Pseudo_UPD:
3782    case ARM::VLD4LNd8Pseudo:
3783    case ARM::VLD4LNd16Pseudo:
3784    case ARM::VLD4LNd32Pseudo:
3785    case ARM::VLD4LNq16Pseudo:
3786    case ARM::VLD4LNq32Pseudo:
3787    case ARM::VLD4LNd8Pseudo_UPD:
3788    case ARM::VLD4LNd16Pseudo_UPD:
3789    case ARM::VLD4LNd32Pseudo_UPD:
3790    case ARM::VLD4LNq16Pseudo_UPD:
3791    case ARM::VLD4LNq32Pseudo_UPD:
3792      // If the address is not 64-bit aligned, the latencies of these
3793      // instructions increases by one.
3794      ++Latency;
3795      break;
3796    }
3797
3798  return Latency;
3799}
3800
3801unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
3802   if (MI->isCopyLike() || MI->isInsertSubreg() ||
3803      MI->isRegSequence() || MI->isImplicitDef())
3804    return 0;
3805
3806  if (MI->isBundle())
3807    return 0;
3808
3809  const MCInstrDesc &MCID = MI->getDesc();
3810
3811  if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
3812    // When predicated, CPSR is an additional source operand for CPSR updating
3813    // instructions, this apparently increases their latencies.
3814    return 1;
3815  }
3816  return 0;
3817}
3818
3819unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
3820                                           const MachineInstr *MI,
3821                                           unsigned *PredCost) const {
3822  if (MI->isCopyLike() || MI->isInsertSubreg() ||
3823      MI->isRegSequence() || MI->isImplicitDef())
3824    return 1;
3825
3826  // An instruction scheduler typically runs on unbundled instructions, however
3827  // other passes may query the latency of a bundled instruction.
3828  if (MI->isBundle()) {
3829    unsigned Latency = 0;
3830    MachineBasicBlock::const_instr_iterator I = MI;
3831    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
3832    while (++I != E && I->isInsideBundle()) {
3833      if (I->getOpcode() != ARM::t2IT)
3834        Latency += getInstrLatency(ItinData, I, PredCost);
3835    }
3836    return Latency;
3837  }
3838
3839  const MCInstrDesc &MCID = MI->getDesc();
3840  if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
3841    // When predicated, CPSR is an additional source operand for CPSR updating
3842    // instructions, this apparently increases their latencies.
3843    *PredCost = 1;
3844  }
3845  // Be sure to call getStageLatency for an empty itinerary in case it has a
3846  // valid MinLatency property.
3847  if (!ItinData)
3848    return MI->mayLoad() ? 3 : 1;
3849
3850  unsigned Class = MCID.getSchedClass();
3851
3852  // For instructions with variable uops, use uops as latency.
3853  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
3854    return getNumMicroOps(ItinData, MI);
3855
3856  // For the common case, fall back on the itinerary's latency.
3857  unsigned Latency = ItinData->getStageLatency(Class);
3858
3859  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
3860  unsigned DefAlign = MI->hasOneMemOperand()
3861    ? (*MI->memoperands_begin())->getAlignment() : 0;
3862  int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
3863  if (Adj >= 0 || (int)Latency > -Adj) {
3864    return Latency + Adj;
3865  }
3866  return Latency;
3867}
3868
3869int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
3870                                      SDNode *Node) const {
3871  if (!Node->isMachineOpcode())
3872    return 1;
3873
3874  if (!ItinData || ItinData->isEmpty())
3875    return 1;
3876
3877  unsigned Opcode = Node->getMachineOpcode();
3878  switch (Opcode) {
3879  default:
3880    return ItinData->getStageLatency(get(Opcode).getSchedClass());
3881  case ARM::VLDMQIA:
3882  case ARM::VSTMQIA:
3883    return 2;
3884  }
3885}
3886
3887bool ARMBaseInstrInfo::
3888hasHighOperandLatency(const InstrItineraryData *ItinData,
3889                      const MachineRegisterInfo *MRI,
3890                      const MachineInstr *DefMI, unsigned DefIdx,
3891                      const MachineInstr *UseMI, unsigned UseIdx) const {
3892  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
3893  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
3894  if (Subtarget.isCortexA8() &&
3895      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
3896    // CortexA8 VFP instructions are not pipelined.
3897    return true;
3898
3899  // Hoist VFP / NEON instructions with 4 or higher latency.
3900  int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
3901  if (Latency < 0)
3902    Latency = getInstrLatency(ItinData, DefMI);
3903  if (Latency <= 3)
3904    return false;
3905  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
3906         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
3907}
3908
3909bool ARMBaseInstrInfo::
3910hasLowDefLatency(const InstrItineraryData *ItinData,
3911                 const MachineInstr *DefMI, unsigned DefIdx) const {
3912  if (!ItinData || ItinData->isEmpty())
3913    return false;
3914
3915  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
3916  if (DDomain == ARMII::DomainGeneral) {
3917    unsigned DefClass = DefMI->getDesc().getSchedClass();
3918    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3919    return (DefCycle != -1 && DefCycle <= 2);
3920  }
3921  return false;
3922}
3923
3924bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
3925                                         StringRef &ErrInfo) const {
3926  if (convertAddSubFlagsOpcode(MI->getOpcode())) {
3927    ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
3928    return false;
3929  }
3930  return true;
3931}
3932
3933bool
3934ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
3935                                     unsigned &AddSubOpc,
3936                                     bool &NegAcc, bool &HasLane) const {
3937  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
3938  if (I == MLxEntryMap.end())
3939    return false;
3940
3941  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
3942  MulOpc = Entry.MulOpc;
3943  AddSubOpc = Entry.AddSubOpc;
3944  NegAcc = Entry.NegAcc;
3945  HasLane = Entry.HasLane;
3946  return true;
3947}
3948
3949//===----------------------------------------------------------------------===//
3950// Execution domains.
3951//===----------------------------------------------------------------------===//
3952//
3953// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
3954// and some can go down both.  The vmov instructions go down the VFP pipeline,
3955// but they can be changed to vorr equivalents that are executed by the NEON
3956// pipeline.
3957//
3958// We use the following execution domain numbering:
3959//
3960enum ARMExeDomain {
3961  ExeGeneric = 0,
3962  ExeVFP = 1,
3963  ExeNEON = 2
3964};
3965//
3966// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
3967//
3968std::pair<uint16_t, uint16_t>
3969ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
3970  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
3971  // if they are not predicated.
3972  if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
3973    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
3974
3975  // CortexA9 is particularly picky about mixing the two and wants these
3976  // converted.
3977  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
3978      (MI->getOpcode() == ARM::VMOVRS ||
3979       MI->getOpcode() == ARM::VMOVSR ||
3980       MI->getOpcode() == ARM::VMOVS))
3981    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
3982
3983  // No other instructions can be swizzled, so just determine their domain.
3984  unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
3985
3986  if (Domain & ARMII::DomainNEON)
3987    return std::make_pair(ExeNEON, 0);
3988
3989  // Certain instructions can go either way on Cortex-A8.
3990  // Treat them as NEON instructions.
3991  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
3992    return std::make_pair(ExeNEON, 0);
3993
3994  if (Domain & ARMII::DomainVFP)
3995    return std::make_pair(ExeVFP, 0);
3996
3997  return std::make_pair(ExeGeneric, 0);
3998}
3999
4000static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
4001                                            unsigned SReg, unsigned &Lane) {
4002  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4003  Lane = 0;
4004
4005  if (DReg != ARM::NoRegister)
4006   return DReg;
4007
4008  Lane = 1;
4009  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4010
4011  assert(DReg && "S-register with no D super-register?");
4012  return DReg;
4013}
4014
4015/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4016/// set ImplicitSReg to a register number that must be marked as implicit-use or
4017/// zero if no register needs to be defined as implicit-use.
4018///
4019/// If the function cannot determine if an SPR should be marked implicit use or
4020/// not, it returns false.
4021///
4022/// This function handles cases where an instruction is being modified from taking
4023/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4024/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4025/// lane of the DPR).
4026///
4027/// If the other SPR is defined, an implicit-use of it should be added. Else,
4028/// (including the case where the DPR itself is defined), it should not.
4029///
4030static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
4031                                       MachineInstr *MI,
4032                                       unsigned DReg, unsigned Lane,
4033                                       unsigned &ImplicitSReg) {
4034  // If the DPR is defined or used already, the other SPR lane will be chained
4035  // correctly, so there is nothing to be done.
4036  if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
4037    ImplicitSReg = 0;
4038    return true;
4039  }
4040
4041  // Otherwise we need to go searching to see if the SPR is set explicitly.
4042  ImplicitSReg = TRI->getSubReg(DReg,
4043                                (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4044  MachineBasicBlock::LivenessQueryResult LQR =
4045    MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4046
4047  if (LQR == MachineBasicBlock::LQR_Live)
4048    return true;
4049  else if (LQR == MachineBasicBlock::LQR_Unknown)
4050    return false;
4051
4052  // If the register is known not to be live, there is no need to add an
4053  // implicit-use.
4054  ImplicitSReg = 0;
4055  return true;
4056}
4057
4058void
4059ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
4060  unsigned DstReg, SrcReg, DReg;
4061  unsigned Lane;
4062  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
4063  const TargetRegisterInfo *TRI = &getRegisterInfo();
4064  switch (MI->getOpcode()) {
4065    default:
4066      llvm_unreachable("cannot handle opcode!");
4067      break;
4068    case ARM::VMOVD:
4069      if (Domain != ExeNEON)
4070        break;
4071
4072      // Zap the predicate operands.
4073      assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4074
4075      // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4076      DstReg = MI->getOperand(0).getReg();
4077      SrcReg = MI->getOperand(1).getReg();
4078
4079      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4080        MI->RemoveOperand(i-1);
4081
4082      // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4083      MI->setDesc(get(ARM::VORRd));
4084      AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
4085                        .addReg(SrcReg)
4086                        .addReg(SrcReg));
4087      break;
4088    case ARM::VMOVRS:
4089      if (Domain != ExeNEON)
4090        break;
4091      assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4092
4093      // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4094      DstReg = MI->getOperand(0).getReg();
4095      SrcReg = MI->getOperand(1).getReg();
4096
4097      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4098        MI->RemoveOperand(i-1);
4099
4100      DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4101
4102      // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4103      // Note that DSrc has been widened and the other lane may be undef, which
4104      // contaminates the entire register.
4105      MI->setDesc(get(ARM::VGETLNi32));
4106      AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
4107                        .addReg(DReg, RegState::Undef)
4108                        .addImm(Lane));
4109
4110      // The old source should be an implicit use, otherwise we might think it
4111      // was dead before here.
4112      MIB.addReg(SrcReg, RegState::Implicit);
4113      break;
4114    case ARM::VMOVSR: {
4115      if (Domain != ExeNEON)
4116        break;
4117      assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4118
4119      // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4120      DstReg = MI->getOperand(0).getReg();
4121      SrcReg = MI->getOperand(1).getReg();
4122
4123      DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4124
4125      unsigned ImplicitSReg;
4126      if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4127        break;
4128
4129      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4130        MI->RemoveOperand(i-1);
4131
4132      // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4133      // Again DDst may be undefined at the beginning of this instruction.
4134      MI->setDesc(get(ARM::VSETLNi32));
4135      MIB.addReg(DReg, RegState::Define)
4136         .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
4137         .addReg(SrcReg)
4138         .addImm(Lane);
4139      AddDefaultPred(MIB);
4140
4141      // The narrower destination must be marked as set to keep previous chains
4142      // in place.
4143      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4144      if (ImplicitSReg != 0)
4145        MIB.addReg(ImplicitSReg, RegState::Implicit);
4146      break;
4147    }
4148    case ARM::VMOVS: {
4149      if (Domain != ExeNEON)
4150        break;
4151
4152      // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4153      DstReg = MI->getOperand(0).getReg();
4154      SrcReg = MI->getOperand(1).getReg();
4155
4156      unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4157      DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4158      DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4159
4160      unsigned ImplicitSReg;
4161      if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4162        break;
4163
4164      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4165        MI->RemoveOperand(i-1);
4166
4167      if (DSrc == DDst) {
4168        // Destination can be:
4169        //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4170        MI->setDesc(get(ARM::VDUPLN32d));
4171        MIB.addReg(DDst, RegState::Define)
4172           .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
4173           .addImm(SrcLane);
4174        AddDefaultPred(MIB);
4175
4176        // Neither the source or the destination are naturally represented any
4177        // more, so add them in manually.
4178        MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4179        MIB.addReg(SrcReg, RegState::Implicit);
4180        if (ImplicitSReg != 0)
4181          MIB.addReg(ImplicitSReg, RegState::Implicit);
4182        break;
4183      }
4184
4185      // In general there's no single instruction that can perform an S <-> S
4186      // move in NEON space, but a pair of VEXT instructions *can* do the
4187      // job. It turns out that the VEXTs needed will only use DSrc once, with
4188      // the position based purely on the combination of lane-0 and lane-1
4189      // involved. For example
4190      //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
4191      //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
4192      //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
4193      //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
4194      //
4195      // Pattern of the MachineInstrs is:
4196      //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4197      MachineInstrBuilder NewMIB;
4198      NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
4199                       get(ARM::VEXTd32), DDst);
4200
4201      // On the first instruction, both DSrc and DDst may be <undef> if present.
4202      // Specifically when the original instruction didn't have them as an
4203      // <imp-use>.
4204      unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4205      bool CurUndef = !MI->readsRegister(CurReg, TRI);
4206      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4207
4208      CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4209      CurUndef = !MI->readsRegister(CurReg, TRI);
4210      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4211
4212      NewMIB.addImm(1);
4213      AddDefaultPred(NewMIB);
4214
4215      if (SrcLane == DstLane)
4216        NewMIB.addReg(SrcReg, RegState::Implicit);
4217
4218      MI->setDesc(get(ARM::VEXTd32));
4219      MIB.addReg(DDst, RegState::Define);
4220
4221      // On the second instruction, DDst has definitely been defined above, so
4222      // it is not <undef>. DSrc, if present, can be <undef> as above.
4223      CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4224      CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
4225      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4226
4227      CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4228      CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
4229      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4230
4231      MIB.addImm(1);
4232      AddDefaultPred(MIB);
4233
4234      if (SrcLane != DstLane)
4235        MIB.addReg(SrcReg, RegState::Implicit);
4236
4237      // As before, the original destination is no longer represented, add it
4238      // implicitly.
4239      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4240      if (ImplicitSReg != 0)
4241        MIB.addReg(ImplicitSReg, RegState::Implicit);
4242      break;
4243    }
4244  }
4245
4246}
4247
4248//===----------------------------------------------------------------------===//
4249// Partial register updates
4250//===----------------------------------------------------------------------===//
4251//
4252// Swift renames NEON registers with 64-bit granularity.  That means any
4253// instruction writing an S-reg implicitly reads the containing D-reg.  The
4254// problem is mostly avoided by translating f32 operations to v2f32 operations
4255// on D-registers, but f32 loads are still a problem.
4256//
4257// These instructions can load an f32 into a NEON register:
4258//
4259// VLDRS - Only writes S, partial D update.
4260// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4261// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4262//
4263// FCONSTD can be used as a dependency-breaking instruction.
4264unsigned ARMBaseInstrInfo::
4265getPartialRegUpdateClearance(const MachineInstr *MI,
4266                             unsigned OpNum,
4267                             const TargetRegisterInfo *TRI) const {
4268  if (!SwiftPartialUpdateClearance ||
4269      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
4270    return 0;
4271
4272  assert(TRI && "Need TRI instance");
4273
4274  const MachineOperand &MO = MI->getOperand(OpNum);
4275  if (MO.readsReg())
4276    return 0;
4277  unsigned Reg = MO.getReg();
4278  int UseOp = -1;
4279
4280  switch(MI->getOpcode()) {
4281    // Normal instructions writing only an S-register.
4282  case ARM::VLDRS:
4283  case ARM::FCONSTS:
4284  case ARM::VMOVSR:
4285  case ARM::VMOVv8i8:
4286  case ARM::VMOVv4i16:
4287  case ARM::VMOVv2i32:
4288  case ARM::VMOVv2f32:
4289  case ARM::VMOVv1i64:
4290    UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
4291    break;
4292
4293    // Explicitly reads the dependency.
4294  case ARM::VLD1LNd32:
4295    UseOp = 3;
4296    break;
4297  default:
4298    return 0;
4299  }
4300
4301  // If this instruction actually reads a value from Reg, there is no unwanted
4302  // dependency.
4303  if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
4304    return 0;
4305
4306  // We must be able to clobber the whole D-reg.
4307  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
4308    // Virtual register must be a foo:ssub_0<def,undef> operand.
4309    if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
4310      return 0;
4311  } else if (ARM::SPRRegClass.contains(Reg)) {
4312    // Physical register: MI must define the full D-reg.
4313    unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4314                                             &ARM::DPRRegClass);
4315    if (!DReg || !MI->definesRegister(DReg, TRI))
4316      return 0;
4317  }
4318
4319  // MI has an unwanted D-register dependency.
4320  // Avoid defs in the previous N instructrions.
4321  return SwiftPartialUpdateClearance;
4322}
4323
4324// Break a partial register dependency after getPartialRegUpdateClearance
4325// returned non-zero.
4326void ARMBaseInstrInfo::
4327breakPartialRegDependency(MachineBasicBlock::iterator MI,
4328                          unsigned OpNum,
4329                          const TargetRegisterInfo *TRI) const {
4330  assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
4331  assert(TRI && "Need TRI instance");
4332
4333  const MachineOperand &MO = MI->getOperand(OpNum);
4334  unsigned Reg = MO.getReg();
4335  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
4336         "Can't break virtual register dependencies.");
4337  unsigned DReg = Reg;
4338
4339  // If MI defines an S-reg, find the corresponding D super-register.
4340  if (ARM::SPRRegClass.contains(Reg)) {
4341    DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4342    assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4343  }
4344
4345  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4346  assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4347
4348  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4349  // the full D-register by loading the same value to both lanes.  The
4350  // instruction is micro-coded with 2 uops, so don't do this until we can
4351  // properly schedule micro-coded instructions.  The dispatcher stalls cause
4352  // too big regressions.
4353
4354  // Insert the dependency-breaking FCONSTD before MI.
4355  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4356  AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
4357                         get(ARM::FCONSTD), DReg).addImm(96));
4358  MI->addRegisterKilled(DReg, TRI, true);
4359}
4360
4361bool ARMBaseInstrInfo::hasNOP() const {
4362  return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
4363}
4364
4365bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
4366  if (MI->getNumOperands() < 4)
4367    return true;
4368  unsigned ShOpVal = MI->getOperand(3).getImm();
4369  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4370  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4371  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4372      ((ShImm == 1 || ShImm == 2) &&
4373       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4374    return true;
4375
4376  return false;
4377}
4378