ARMBaseInstrInfo.cpp revision d9d6e6d59159160299a51fe5010a940db27ae89b
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the Base ARM implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMBaseInstrInfo.h"
15#include "ARM.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMConstantPoolValue.h"
18#include "ARMHazardRecognizer.h"
19#include "ARMMachineFunctionInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/CodeGen/LiveVariables.h"
23#include "llvm/CodeGen/MachineConstantPool.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineMemOperand.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/GlobalValue.h"
33#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/Support/BranchProbability.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38
39#define GET_INSTRINFO_CTOR
40#include "ARMGenInstrInfo.inc"
41
42using namespace llvm;
43
44static cl::opt<bool>
45EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
46               cl::desc("Enable ARM 2-addr to 3-addr conv"));
47
48static cl::opt<bool>
49WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
50           cl::desc("Widen ARM vmovs to vmovd when possible"));
51
52static cl::opt<unsigned>
53SwiftPartialUpdateClearance("swift-partial-update-clearance",
54     cl::Hidden, cl::init(12),
55     cl::desc("Clearance before partial register updates"));
56
57/// ARM_MLxEntry - Record information about MLA / MLS instructions.
58struct ARM_MLxEntry {
59  uint16_t MLxOpc;     // MLA / MLS opcode
60  uint16_t MulOpc;     // Expanded multiplication opcode
61  uint16_t AddSubOpc;  // Expanded add / sub opcode
62  bool NegAcc;         // True if the acc is negated before the add / sub.
63  bool HasLane;        // True if instruction has an extra "lane" operand.
64};
65
66static const ARM_MLxEntry ARM_MLxTable[] = {
67  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
68  // fp scalar ops
69  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
70  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
71  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
72  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
73  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
74  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
75  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
76  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
77
78  // fp SIMD ops
79  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
80  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
81  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
82  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
83  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
84  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
85  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
86  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
87};
88
89ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
90  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
91    Subtarget(STI) {
92  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
93    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
94      assert(false && "Duplicated entries?");
95    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
96    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
97  }
98}
99
100// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
101// currently defaults to no prepass hazard recognizer.
102ScheduleHazardRecognizer *ARMBaseInstrInfo::
103CreateTargetHazardRecognizer(const TargetMachine *TM,
104                             const ScheduleDAG *DAG) const {
105  if (usePreRAHazardRecognizer()) {
106    const InstrItineraryData *II = TM->getInstrItineraryData();
107    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
108  }
109  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
110}
111
112ScheduleHazardRecognizer *ARMBaseInstrInfo::
113CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
114                                   const ScheduleDAG *DAG) const {
115  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
116    return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
117  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
118}
119
120MachineInstr *
121ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
122                                        MachineBasicBlock::iterator &MBBI,
123                                        LiveVariables *LV) const {
124  // FIXME: Thumb2 support.
125
126  if (!EnableARM3Addr)
127    return NULL;
128
129  MachineInstr *MI = MBBI;
130  MachineFunction &MF = *MI->getParent()->getParent();
131  uint64_t TSFlags = MI->getDesc().TSFlags;
132  bool isPre = false;
133  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
134  default: return NULL;
135  case ARMII::IndexModePre:
136    isPre = true;
137    break;
138  case ARMII::IndexModePost:
139    break;
140  }
141
142  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
143  // operation.
144  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
145  if (MemOpc == 0)
146    return NULL;
147
148  MachineInstr *UpdateMI = NULL;
149  MachineInstr *MemMI = NULL;
150  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
151  const MCInstrDesc &MCID = MI->getDesc();
152  unsigned NumOps = MCID.getNumOperands();
153  bool isLoad = !MI->mayStore();
154  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
155  const MachineOperand &Base = MI->getOperand(2);
156  const MachineOperand &Offset = MI->getOperand(NumOps-3);
157  unsigned WBReg = WB.getReg();
158  unsigned BaseReg = Base.getReg();
159  unsigned OffReg = Offset.getReg();
160  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
161  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
162  switch (AddrMode) {
163  default: llvm_unreachable("Unknown indexed op!");
164  case ARMII::AddrMode2: {
165    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
166    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
167    if (OffReg == 0) {
168      if (ARM_AM::getSOImmVal(Amt) == -1)
169        // Can't encode it in a so_imm operand. This transformation will
170        // add more than 1 instruction. Abandon!
171        return NULL;
172      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
173                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
174        .addReg(BaseReg).addImm(Amt)
175        .addImm(Pred).addReg(0).addReg(0);
176    } else if (Amt != 0) {
177      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
178      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
179      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
180                         get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
181        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
182        .addImm(Pred).addReg(0).addReg(0);
183    } else
184      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
185                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
186        .addReg(BaseReg).addReg(OffReg)
187        .addImm(Pred).addReg(0).addReg(0);
188    break;
189  }
190  case ARMII::AddrMode3 : {
191    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
192    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
193    if (OffReg == 0)
194      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
195      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
196                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
197        .addReg(BaseReg).addImm(Amt)
198        .addImm(Pred).addReg(0).addReg(0);
199    else
200      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
201                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
202        .addReg(BaseReg).addReg(OffReg)
203        .addImm(Pred).addReg(0).addReg(0);
204    break;
205  }
206  }
207
208  std::vector<MachineInstr*> NewMIs;
209  if (isPre) {
210    if (isLoad)
211      MemMI = BuildMI(MF, MI->getDebugLoc(),
212                      get(MemOpc), MI->getOperand(0).getReg())
213        .addReg(WBReg).addImm(0).addImm(Pred);
214    else
215      MemMI = BuildMI(MF, MI->getDebugLoc(),
216                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
217        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
218    NewMIs.push_back(MemMI);
219    NewMIs.push_back(UpdateMI);
220  } else {
221    if (isLoad)
222      MemMI = BuildMI(MF, MI->getDebugLoc(),
223                      get(MemOpc), MI->getOperand(0).getReg())
224        .addReg(BaseReg).addImm(0).addImm(Pred);
225    else
226      MemMI = BuildMI(MF, MI->getDebugLoc(),
227                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
228        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
229    if (WB.isDead())
230      UpdateMI->getOperand(0).setIsDead();
231    NewMIs.push_back(UpdateMI);
232    NewMIs.push_back(MemMI);
233  }
234
235  // Transfer LiveVariables states, kill / dead info.
236  if (LV) {
237    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
238      MachineOperand &MO = MI->getOperand(i);
239      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
240        unsigned Reg = MO.getReg();
241
242        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
243        if (MO.isDef()) {
244          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
245          if (MO.isDead())
246            LV->addVirtualRegisterDead(Reg, NewMI);
247        }
248        if (MO.isUse() && MO.isKill()) {
249          for (unsigned j = 0; j < 2; ++j) {
250            // Look at the two new MI's in reverse order.
251            MachineInstr *NewMI = NewMIs[j];
252            if (!NewMI->readsRegister(Reg))
253              continue;
254            LV->addVirtualRegisterKilled(Reg, NewMI);
255            if (VI.removeKill(MI))
256              VI.Kills.push_back(NewMI);
257            break;
258          }
259        }
260      }
261    }
262  }
263
264  MFI->insert(MBBI, NewMIs[1]);
265  MFI->insert(MBBI, NewMIs[0]);
266  return NewMIs[0];
267}
268
269// Branch analysis.
270bool
271ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
272                                MachineBasicBlock *&FBB,
273                                SmallVectorImpl<MachineOperand> &Cond,
274                                bool AllowModify) const {
275  TBB = 0;
276  FBB = 0;
277
278  MachineBasicBlock::iterator I = MBB.end();
279  if (I == MBB.begin())
280    return false; // Empty blocks are easy.
281  --I;
282
283  // Walk backwards from the end of the basic block until the branch is
284  // analyzed or we give up.
285  while (isPredicated(I) || I->isTerminator()) {
286
287    // Flag to be raised on unanalyzeable instructions. This is useful in cases
288    // where we want to clean up on the end of the basic block before we bail
289    // out.
290    bool CantAnalyze = false;
291
292    // Skip over DEBUG values and predicated nonterminators.
293    while (I->isDebugValue() || !I->isTerminator()) {
294      if (I == MBB.begin())
295        return false;
296      --I;
297    }
298
299    if (isIndirectBranchOpcode(I->getOpcode()) ||
300        isJumpTableBranchOpcode(I->getOpcode())) {
301      // Indirect branches and jump tables can't be analyzed, but we still want
302      // to clean up any instructions at the tail of the basic block.
303      CantAnalyze = true;
304    } else if (isUncondBranchOpcode(I->getOpcode())) {
305      TBB = I->getOperand(0).getMBB();
306    } else if (isCondBranchOpcode(I->getOpcode())) {
307      // Bail out if we encounter multiple conditional branches.
308      if (!Cond.empty())
309        return true;
310
311      assert(!FBB && "FBB should have been null.");
312      FBB = TBB;
313      TBB = I->getOperand(0).getMBB();
314      Cond.push_back(I->getOperand(1));
315      Cond.push_back(I->getOperand(2));
316    } else if (I->isReturn()) {
317      // Returns can't be analyzed, but we should run cleanup.
318      CantAnalyze = !isPredicated(I);
319    } else {
320      // We encountered other unrecognized terminator. Bail out immediately.
321      return true;
322    }
323
324    // Cleanup code - to be run for unpredicated unconditional branches and
325    //                returns.
326    if (!isPredicated(I) &&
327          (isUncondBranchOpcode(I->getOpcode()) ||
328           isIndirectBranchOpcode(I->getOpcode()) ||
329           isJumpTableBranchOpcode(I->getOpcode()) ||
330           I->isReturn())) {
331      // Forget any previous condition branch information - it no longer applies.
332      Cond.clear();
333      FBB = 0;
334
335      // If we can modify the function, delete everything below this
336      // unconditional branch.
337      if (AllowModify) {
338        MachineBasicBlock::iterator DI = llvm::next(I);
339        while (DI != MBB.end()) {
340          MachineInstr *InstToDelete = DI;
341          ++DI;
342          InstToDelete->eraseFromParent();
343        }
344      }
345    }
346
347    if (CantAnalyze)
348      return true;
349
350    if (I == MBB.begin())
351      return false;
352
353    --I;
354  }
355
356  // We made it past the terminators without bailing out - we must have
357  // analyzed this branch successfully.
358  return false;
359}
360
361
362unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
363  MachineBasicBlock::iterator I = MBB.end();
364  if (I == MBB.begin()) return 0;
365  --I;
366  while (I->isDebugValue()) {
367    if (I == MBB.begin())
368      return 0;
369    --I;
370  }
371  if (!isUncondBranchOpcode(I->getOpcode()) &&
372      !isCondBranchOpcode(I->getOpcode()))
373    return 0;
374
375  // Remove the branch.
376  I->eraseFromParent();
377
378  I = MBB.end();
379
380  if (I == MBB.begin()) return 1;
381  --I;
382  if (!isCondBranchOpcode(I->getOpcode()))
383    return 1;
384
385  // Remove the branch.
386  I->eraseFromParent();
387  return 2;
388}
389
390unsigned
391ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
392                               MachineBasicBlock *FBB,
393                               const SmallVectorImpl<MachineOperand> &Cond,
394                               DebugLoc DL) const {
395  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
396  int BOpc   = !AFI->isThumbFunction()
397    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
398  int BccOpc = !AFI->isThumbFunction()
399    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
400  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
401
402  // Shouldn't be a fall through.
403  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
404  assert((Cond.size() == 2 || Cond.size() == 0) &&
405         "ARM branch conditions have two components!");
406
407  if (FBB == 0) {
408    if (Cond.empty()) { // Unconditional branch?
409      if (isThumb)
410        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
411      else
412        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
413    } else
414      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
415        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
416    return 1;
417  }
418
419  // Two-way conditional branch.
420  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
421    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
422  if (isThumb)
423    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
424  else
425    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
426  return 2;
427}
428
429bool ARMBaseInstrInfo::
430ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
431  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
432  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
433  return false;
434}
435
436bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
437  if (MI->isBundle()) {
438    MachineBasicBlock::const_instr_iterator I = MI;
439    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
440    while (++I != E && I->isInsideBundle()) {
441      int PIdx = I->findFirstPredOperandIdx();
442      if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
443        return true;
444    }
445    return false;
446  }
447
448  int PIdx = MI->findFirstPredOperandIdx();
449  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
450}
451
452bool ARMBaseInstrInfo::
453PredicateInstruction(MachineInstr *MI,
454                     const SmallVectorImpl<MachineOperand> &Pred) const {
455  unsigned Opc = MI->getOpcode();
456  if (isUncondBranchOpcode(Opc)) {
457    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
458    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
459      .addImm(Pred[0].getImm())
460      .addReg(Pred[1].getReg());
461    return true;
462  }
463
464  int PIdx = MI->findFirstPredOperandIdx();
465  if (PIdx != -1) {
466    MachineOperand &PMO = MI->getOperand(PIdx);
467    PMO.setImm(Pred[0].getImm());
468    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
469    return true;
470  }
471  return false;
472}
473
474bool ARMBaseInstrInfo::
475SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
476                  const SmallVectorImpl<MachineOperand> &Pred2) const {
477  if (Pred1.size() > 2 || Pred2.size() > 2)
478    return false;
479
480  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
481  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
482  if (CC1 == CC2)
483    return true;
484
485  switch (CC1) {
486  default:
487    return false;
488  case ARMCC::AL:
489    return true;
490  case ARMCC::HS:
491    return CC2 == ARMCC::HI;
492  case ARMCC::LS:
493    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
494  case ARMCC::GE:
495    return CC2 == ARMCC::GT;
496  case ARMCC::LE:
497    return CC2 == ARMCC::LT;
498  }
499}
500
501bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
502                                    std::vector<MachineOperand> &Pred) const {
503  bool Found = false;
504  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
505    const MachineOperand &MO = MI->getOperand(i);
506    if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
507        (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
508      Pred.push_back(MO);
509      Found = true;
510    }
511  }
512
513  return Found;
514}
515
516static bool isV8EligibleForIT(MachineInstr *MI) {
517  switch (MI->getOpcode()) {
518  default:
519    return false;
520  case ARM::tADC:
521  case ARM::tADDi3:
522  case ARM::tADDi8:
523  case ARM::tADDrSPi:
524  case ARM::tADDrr:
525  case ARM::tAND:
526  case ARM::tASRri:
527  case ARM::tASRrr:
528  case ARM::tBIC:
529  case ARM::tCMNz:
530  case ARM::tCMPi8:
531  case ARM::tCMPr:
532  case ARM::tEOR:
533  case ARM::tLDRBi:
534  case ARM::tLDRBr:
535  case ARM::tLDRHi:
536  case ARM::tLDRHr:
537  case ARM::tLDRSB:
538  case ARM::tLDRSH:
539  case ARM::tLDRi:
540  case ARM::tLDRr:
541  case ARM::tLDRspi:
542  case ARM::tLSLri:
543  case ARM::tLSLrr:
544  case ARM::tLSRri:
545  case ARM::tLSRrr:
546  case ARM::tMOVi8:
547  case ARM::tMUL:
548  case ARM::tMVN:
549  case ARM::tORR:
550  case ARM::tROR:
551  case ARM::tRSB:
552  case ARM::tSBC:
553  case ARM::tSTRBi:
554  case ARM::tSTRBr:
555  case ARM::tSTRHi:
556  case ARM::tSTRHr:
557  case ARM::tSTRi:
558  case ARM::tSTRr:
559  case ARM::tSTRspi:
560  case ARM::tSUBi3:
561  case ARM::tSUBi8:
562  case ARM::tSUBrr:
563  case ARM::tTST:
564    return true;
565// there are some "conditionally deprecated" opcodes
566  case ARM::tADDspr:
567    return MI->getOperand(2).getReg() != ARM::PC;
568  case ARM::tADDrSP:
569  case ARM::tBX:
570  case ARM::tBLXr:
571  // ADD PC, SP and BLX PC were always unpredictable,
572  // now on top of it they're deprecated
573    return MI->getOperand(0).getReg() != ARM::PC;
574  case ARM::tADDhirr:
575    return MI->getOperand(0).getReg() != ARM::PC &&
576           MI->getOperand(2).getReg() != ARM::PC;
577  case ARM::tCMPhir:
578  case ARM::tMOVr:
579    return MI->getOperand(0).getReg() != ARM::PC &&
580           MI->getOperand(1).getReg() != ARM::PC;
581  }
582}
583
584/// isPredicable - Return true if the specified instruction can be predicated.
585/// By default, this returns true for every instruction with a
586/// PredicateOperand.
587bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
588  if (!MI->isPredicable())
589    return false;
590
591  ARMFunctionInfo *AFI =
592    MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
593
594  if (AFI->isThumb2Function()) {
595    if (getSubtarget().hasV8Ops())
596      return isV8EligibleForIT(MI);
597  } else { // non-Thumb
598    if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
599      return false;
600  }
601
602  return true;
603}
604
605/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
606LLVM_ATTRIBUTE_NOINLINE
607static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
608                                unsigned JTI);
609static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
610                                unsigned JTI) {
611  assert(JTI < JT.size());
612  return JT[JTI].MBBs.size();
613}
614
615/// GetInstSize - Return the size of the specified MachineInstr.
616///
617unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
618  const MachineBasicBlock &MBB = *MI->getParent();
619  const MachineFunction *MF = MBB.getParent();
620  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
621
622  const MCInstrDesc &MCID = MI->getDesc();
623  if (MCID.getSize())
624    return MCID.getSize();
625
626  // If this machine instr is an inline asm, measure it.
627  if (MI->getOpcode() == ARM::INLINEASM)
628    return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
629  if (MI->isLabel())
630    return 0;
631  unsigned Opc = MI->getOpcode();
632  switch (Opc) {
633  case TargetOpcode::IMPLICIT_DEF:
634  case TargetOpcode::KILL:
635  case TargetOpcode::PROLOG_LABEL:
636  case TargetOpcode::EH_LABEL:
637  case TargetOpcode::DBG_VALUE:
638    return 0;
639  case TargetOpcode::BUNDLE:
640    return getInstBundleLength(MI);
641  case ARM::MOVi16_ga_pcrel:
642  case ARM::MOVTi16_ga_pcrel:
643  case ARM::t2MOVi16_ga_pcrel:
644  case ARM::t2MOVTi16_ga_pcrel:
645    return 4;
646  case ARM::MOVi32imm:
647  case ARM::t2MOVi32imm:
648    return 8;
649  case ARM::CONSTPOOL_ENTRY:
650    // If this machine instr is a constant pool entry, its size is recorded as
651    // operand #2.
652    return MI->getOperand(2).getImm();
653  case ARM::Int_eh_sjlj_longjmp:
654    return 16;
655  case ARM::tInt_eh_sjlj_longjmp:
656    return 10;
657  case ARM::Int_eh_sjlj_setjmp:
658  case ARM::Int_eh_sjlj_setjmp_nofp:
659    return 20;
660  case ARM::tInt_eh_sjlj_setjmp:
661  case ARM::t2Int_eh_sjlj_setjmp:
662  case ARM::t2Int_eh_sjlj_setjmp_nofp:
663    return 12;
664  case ARM::BR_JTr:
665  case ARM::BR_JTm:
666  case ARM::BR_JTadd:
667  case ARM::tBR_JTr:
668  case ARM::t2BR_JT:
669  case ARM::t2TBB_JT:
670  case ARM::t2TBH_JT: {
671    // These are jumptable branches, i.e. a branch followed by an inlined
672    // jumptable. The size is 4 + 4 * number of entries. For TBB, each
673    // entry is one byte; TBH two byte each.
674    unsigned EntrySize = (Opc == ARM::t2TBB_JT)
675      ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
676    unsigned NumOps = MCID.getNumOperands();
677    MachineOperand JTOP =
678      MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
679    unsigned JTI = JTOP.getIndex();
680    const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
681    assert(MJTI != 0);
682    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
683    assert(JTI < JT.size());
684    // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
685    // 4 aligned. The assembler / linker may add 2 byte padding just before
686    // the JT entries.  The size does not include this padding; the
687    // constant islands pass does separate bookkeeping for it.
688    // FIXME: If we know the size of the function is less than (1 << 16) *2
689    // bytes, we can use 16-bit entries instead. Then there won't be an
690    // alignment issue.
691    unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
692    unsigned NumEntries = getNumJTEntries(JT, JTI);
693    if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
694      // Make sure the instruction that follows TBB is 2-byte aligned.
695      // FIXME: Constant island pass should insert an "ALIGN" instruction
696      // instead.
697      ++NumEntries;
698    return NumEntries * EntrySize + InstSize;
699  }
700  default:
701    // Otherwise, pseudo-instruction sizes are zero.
702    return 0;
703  }
704}
705
706unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
707  unsigned Size = 0;
708  MachineBasicBlock::const_instr_iterator I = MI;
709  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
710  while (++I != E && I->isInsideBundle()) {
711    assert(!I->isBundle() && "No nested bundle!");
712    Size += GetInstSizeInBytes(&*I);
713  }
714  return Size;
715}
716
717void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
718                                   MachineBasicBlock::iterator I, DebugLoc DL,
719                                   unsigned DestReg, unsigned SrcReg,
720                                   bool KillSrc) const {
721  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
722  bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
723
724  if (GPRDest && GPRSrc) {
725    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
726                                  .addReg(SrcReg, getKillRegState(KillSrc))));
727    return;
728  }
729
730  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
731  bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
732
733  unsigned Opc = 0;
734  if (SPRDest && SPRSrc)
735    Opc = ARM::VMOVS;
736  else if (GPRDest && SPRSrc)
737    Opc = ARM::VMOVRS;
738  else if (SPRDest && GPRSrc)
739    Opc = ARM::VMOVSR;
740  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
741    Opc = ARM::VMOVD;
742  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
743    Opc = ARM::VORRq;
744
745  if (Opc) {
746    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
747    MIB.addReg(SrcReg, getKillRegState(KillSrc));
748    if (Opc == ARM::VORRq)
749      MIB.addReg(SrcReg, getKillRegState(KillSrc));
750    AddDefaultPred(MIB);
751    return;
752  }
753
754  // Handle register classes that require multiple instructions.
755  unsigned BeginIdx = 0;
756  unsigned SubRegs = 0;
757  int Spacing = 1;
758
759  // Use VORRq when possible.
760  if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
761    Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
762  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
763    Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
764  // Fall back to VMOVD.
765  else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
766    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
767  else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
768    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
769  else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
770    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
771  else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
772    Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
773
774  else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
775    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
776  else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
777    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
778  else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
779    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
780
781  assert(Opc && "Impossible reg-to-reg copy");
782
783  const TargetRegisterInfo *TRI = &getRegisterInfo();
784  MachineInstrBuilder Mov;
785
786  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
787  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
788    BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
789    Spacing = -Spacing;
790  }
791#ifndef NDEBUG
792  SmallSet<unsigned, 4> DstRegs;
793#endif
794  for (unsigned i = 0; i != SubRegs; ++i) {
795    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
796    unsigned Src = TRI->getSubReg(SrcReg,  BeginIdx + i*Spacing);
797    assert(Dst && Src && "Bad sub-register");
798#ifndef NDEBUG
799    assert(!DstRegs.count(Src) && "destructive vector copy");
800    DstRegs.insert(Dst);
801#endif
802    Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
803      .addReg(Src);
804    // VORR takes two source operands.
805    if (Opc == ARM::VORRq)
806      Mov.addReg(Src);
807    Mov = AddDefaultPred(Mov);
808    // MOVr can set CC.
809    if (Opc == ARM::MOVr)
810      Mov = AddDefaultCC(Mov);
811  }
812  // Add implicit super-register defs and kills to the last instruction.
813  Mov->addRegisterDefined(DestReg, TRI);
814  if (KillSrc)
815    Mov->addRegisterKilled(SrcReg, TRI);
816}
817
818const MachineInstrBuilder &
819ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
820                          unsigned SubIdx, unsigned State,
821                          const TargetRegisterInfo *TRI) const {
822  if (!SubIdx)
823    return MIB.addReg(Reg, State);
824
825  if (TargetRegisterInfo::isPhysicalRegister(Reg))
826    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
827  return MIB.addReg(Reg, State, SubIdx);
828}
829
830void ARMBaseInstrInfo::
831storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
832                    unsigned SrcReg, bool isKill, int FI,
833                    const TargetRegisterClass *RC,
834                    const TargetRegisterInfo *TRI) const {
835  DebugLoc DL;
836  if (I != MBB.end()) DL = I->getDebugLoc();
837  MachineFunction &MF = *MBB.getParent();
838  MachineFrameInfo &MFI = *MF.getFrameInfo();
839  unsigned Align = MFI.getObjectAlignment(FI);
840
841  MachineMemOperand *MMO =
842    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
843                            MachineMemOperand::MOStore,
844                            MFI.getObjectSize(FI),
845                            Align);
846
847  switch (RC->getSize()) {
848    case 4:
849      if (ARM::GPRRegClass.hasSubClassEq(RC)) {
850        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
851                   .addReg(SrcReg, getKillRegState(isKill))
852                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
853      } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
854        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
855                   .addReg(SrcReg, getKillRegState(isKill))
856                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
857      } else
858        llvm_unreachable("Unknown reg class!");
859      break;
860    case 8:
861      if (ARM::DPRRegClass.hasSubClassEq(RC)) {
862        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
863                   .addReg(SrcReg, getKillRegState(isKill))
864                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
865      } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
866        if (Subtarget.hasV5TEOps()) {
867          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
868          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
869          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
870          MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
871
872          AddDefaultPred(MIB);
873        } else {
874          // Fallback to STM instruction, which has existed since the dawn of
875          // time.
876          MachineInstrBuilder MIB =
877            AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
878                             .addFrameIndex(FI).addMemOperand(MMO));
879          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
880          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
881        }
882      } else
883        llvm_unreachable("Unknown reg class!");
884      break;
885    case 16:
886      if (ARM::DPairRegClass.hasSubClassEq(RC)) {
887        // Use aligned spills if the stack can be realigned.
888        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
889          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
890                     .addFrameIndex(FI).addImm(16)
891                     .addReg(SrcReg, getKillRegState(isKill))
892                     .addMemOperand(MMO));
893        } else {
894          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
895                     .addReg(SrcReg, getKillRegState(isKill))
896                     .addFrameIndex(FI)
897                     .addMemOperand(MMO));
898        }
899      } else
900        llvm_unreachable("Unknown reg class!");
901      break;
902    case 24:
903      if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
904        // Use aligned spills if the stack can be realigned.
905        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
906          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
907                     .addFrameIndex(FI).addImm(16)
908                     .addReg(SrcReg, getKillRegState(isKill))
909                     .addMemOperand(MMO));
910        } else {
911          MachineInstrBuilder MIB =
912          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
913                       .addFrameIndex(FI))
914                       .addMemOperand(MMO);
915          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
916          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
917          AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
918        }
919      } else
920        llvm_unreachable("Unknown reg class!");
921      break;
922    case 32:
923      if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
924        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
925          // FIXME: It's possible to only store part of the QQ register if the
926          // spilled def has a sub-register index.
927          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
928                     .addFrameIndex(FI).addImm(16)
929                     .addReg(SrcReg, getKillRegState(isKill))
930                     .addMemOperand(MMO));
931        } else {
932          MachineInstrBuilder MIB =
933          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
934                       .addFrameIndex(FI))
935                       .addMemOperand(MMO);
936          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
937          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
938          MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
939                AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
940        }
941      } else
942        llvm_unreachable("Unknown reg class!");
943      break;
944    case 64:
945      if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
946        MachineInstrBuilder MIB =
947          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
948                         .addFrameIndex(FI))
949                         .addMemOperand(MMO);
950        MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
951        MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
952        MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
953        MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
954        MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
955        MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
956        MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
957              AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
958      } else
959        llvm_unreachable("Unknown reg class!");
960      break;
961    default:
962      llvm_unreachable("Unknown reg class!");
963  }
964}
965
966unsigned
967ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
968                                     int &FrameIndex) const {
969  switch (MI->getOpcode()) {
970  default: break;
971  case ARM::STRrs:
972  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
973    if (MI->getOperand(1).isFI() &&
974        MI->getOperand(2).isReg() &&
975        MI->getOperand(3).isImm() &&
976        MI->getOperand(2).getReg() == 0 &&
977        MI->getOperand(3).getImm() == 0) {
978      FrameIndex = MI->getOperand(1).getIndex();
979      return MI->getOperand(0).getReg();
980    }
981    break;
982  case ARM::STRi12:
983  case ARM::t2STRi12:
984  case ARM::tSTRspi:
985  case ARM::VSTRD:
986  case ARM::VSTRS:
987    if (MI->getOperand(1).isFI() &&
988        MI->getOperand(2).isImm() &&
989        MI->getOperand(2).getImm() == 0) {
990      FrameIndex = MI->getOperand(1).getIndex();
991      return MI->getOperand(0).getReg();
992    }
993    break;
994  case ARM::VST1q64:
995  case ARM::VST1d64TPseudo:
996  case ARM::VST1d64QPseudo:
997    if (MI->getOperand(0).isFI() &&
998        MI->getOperand(2).getSubReg() == 0) {
999      FrameIndex = MI->getOperand(0).getIndex();
1000      return MI->getOperand(2).getReg();
1001    }
1002    break;
1003  case ARM::VSTMQIA:
1004    if (MI->getOperand(1).isFI() &&
1005        MI->getOperand(0).getSubReg() == 0) {
1006      FrameIndex = MI->getOperand(1).getIndex();
1007      return MI->getOperand(0).getReg();
1008    }
1009    break;
1010  }
1011
1012  return 0;
1013}
1014
1015unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
1016                                                    int &FrameIndex) const {
1017  const MachineMemOperand *Dummy;
1018  return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
1019}
1020
1021void ARMBaseInstrInfo::
1022loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
1023                     unsigned DestReg, int FI,
1024                     const TargetRegisterClass *RC,
1025                     const TargetRegisterInfo *TRI) const {
1026  DebugLoc DL;
1027  if (I != MBB.end()) DL = I->getDebugLoc();
1028  MachineFunction &MF = *MBB.getParent();
1029  MachineFrameInfo &MFI = *MF.getFrameInfo();
1030  unsigned Align = MFI.getObjectAlignment(FI);
1031  MachineMemOperand *MMO =
1032    MF.getMachineMemOperand(
1033                    MachinePointerInfo::getFixedStack(FI),
1034                            MachineMemOperand::MOLoad,
1035                            MFI.getObjectSize(FI),
1036                            Align);
1037
1038  switch (RC->getSize()) {
1039  case 4:
1040    if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1041      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1042                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1043
1044    } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1045      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1046                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1047    } else
1048      llvm_unreachable("Unknown reg class!");
1049    break;
1050  case 8:
1051    if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1052      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1053                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1054    } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1055      MachineInstrBuilder MIB;
1056
1057      if (Subtarget.hasV5TEOps()) {
1058        MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1059        AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1060        AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1061        MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
1062
1063        AddDefaultPred(MIB);
1064      } else {
1065        // Fallback to LDM instruction, which has existed since the dawn of
1066        // time.
1067        MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
1068                                 .addFrameIndex(FI).addMemOperand(MMO));
1069        MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1070        MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1071      }
1072
1073      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1074        MIB.addReg(DestReg, RegState::ImplicitDefine);
1075    } else
1076      llvm_unreachable("Unknown reg class!");
1077    break;
1078  case 16:
1079    if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1080      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1081        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1082                     .addFrameIndex(FI).addImm(16)
1083                     .addMemOperand(MMO));
1084      } else {
1085        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1086                       .addFrameIndex(FI)
1087                       .addMemOperand(MMO));
1088      }
1089    } else
1090      llvm_unreachable("Unknown reg class!");
1091    break;
1092  case 24:
1093    if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1094      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1095        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1096                     .addFrameIndex(FI).addImm(16)
1097                     .addMemOperand(MMO));
1098      } else {
1099        MachineInstrBuilder MIB =
1100          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1101                         .addFrameIndex(FI)
1102                         .addMemOperand(MMO));
1103        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1104        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1105        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1106        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1107          MIB.addReg(DestReg, RegState::ImplicitDefine);
1108      }
1109    } else
1110      llvm_unreachable("Unknown reg class!");
1111    break;
1112   case 32:
1113    if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1114      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1115        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1116                     .addFrameIndex(FI).addImm(16)
1117                     .addMemOperand(MMO));
1118      } else {
1119        MachineInstrBuilder MIB =
1120        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1121                       .addFrameIndex(FI))
1122                       .addMemOperand(MMO);
1123        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1124        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1125        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1126        MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1127        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1128          MIB.addReg(DestReg, RegState::ImplicitDefine);
1129      }
1130    } else
1131      llvm_unreachable("Unknown reg class!");
1132    break;
1133  case 64:
1134    if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1135      MachineInstrBuilder MIB =
1136      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1137                     .addFrameIndex(FI))
1138                     .addMemOperand(MMO);
1139      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1140      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1141      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1142      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1143      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1144      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1145      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1146      MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1147      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1148        MIB.addReg(DestReg, RegState::ImplicitDefine);
1149    } else
1150      llvm_unreachable("Unknown reg class!");
1151    break;
1152  default:
1153    llvm_unreachable("Unknown regclass!");
1154  }
1155}
1156
1157unsigned
1158ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
1159                                      int &FrameIndex) const {
1160  switch (MI->getOpcode()) {
1161  default: break;
1162  case ARM::LDRrs:
1163  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
1164    if (MI->getOperand(1).isFI() &&
1165        MI->getOperand(2).isReg() &&
1166        MI->getOperand(3).isImm() &&
1167        MI->getOperand(2).getReg() == 0 &&
1168        MI->getOperand(3).getImm() == 0) {
1169      FrameIndex = MI->getOperand(1).getIndex();
1170      return MI->getOperand(0).getReg();
1171    }
1172    break;
1173  case ARM::LDRi12:
1174  case ARM::t2LDRi12:
1175  case ARM::tLDRspi:
1176  case ARM::VLDRD:
1177  case ARM::VLDRS:
1178    if (MI->getOperand(1).isFI() &&
1179        MI->getOperand(2).isImm() &&
1180        MI->getOperand(2).getImm() == 0) {
1181      FrameIndex = MI->getOperand(1).getIndex();
1182      return MI->getOperand(0).getReg();
1183    }
1184    break;
1185  case ARM::VLD1q64:
1186  case ARM::VLD1d64TPseudo:
1187  case ARM::VLD1d64QPseudo:
1188    if (MI->getOperand(1).isFI() &&
1189        MI->getOperand(0).getSubReg() == 0) {
1190      FrameIndex = MI->getOperand(1).getIndex();
1191      return MI->getOperand(0).getReg();
1192    }
1193    break;
1194  case ARM::VLDMQIA:
1195    if (MI->getOperand(1).isFI() &&
1196        MI->getOperand(0).getSubReg() == 0) {
1197      FrameIndex = MI->getOperand(1).getIndex();
1198      return MI->getOperand(0).getReg();
1199    }
1200    break;
1201  }
1202
1203  return 0;
1204}
1205
1206unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
1207                                             int &FrameIndex) const {
1208  const MachineMemOperand *Dummy;
1209  return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1210}
1211
1212bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
1213  // This hook gets to expand COPY instructions before they become
1214  // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
1215  // widened to VMOVD.  We prefer the VMOVD when possible because it may be
1216  // changed into a VORR that can go down the NEON pipeline.
1217  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
1218    return false;
1219
1220  // Look for a copy between even S-registers.  That is where we keep floats
1221  // when using NEON v2f32 instructions for f32 arithmetic.
1222  unsigned DstRegS = MI->getOperand(0).getReg();
1223  unsigned SrcRegS = MI->getOperand(1).getReg();
1224  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1225    return false;
1226
1227  const TargetRegisterInfo *TRI = &getRegisterInfo();
1228  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1229                                              &ARM::DPRRegClass);
1230  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1231                                              &ARM::DPRRegClass);
1232  if (!DstRegD || !SrcRegD)
1233    return false;
1234
1235  // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
1236  // legal if the COPY already defines the full DstRegD, and it isn't a
1237  // sub-register insertion.
1238  if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
1239    return false;
1240
1241  // A dead copy shouldn't show up here, but reject it just in case.
1242  if (MI->getOperand(0).isDead())
1243    return false;
1244
1245  // All clear, widen the COPY.
1246  DEBUG(dbgs() << "widening:    " << *MI);
1247  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1248
1249  // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
1250  // or some other super-register.
1251  int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
1252  if (ImpDefIdx != -1)
1253    MI->RemoveOperand(ImpDefIdx);
1254
1255  // Change the opcode and operands.
1256  MI->setDesc(get(ARM::VMOVD));
1257  MI->getOperand(0).setReg(DstRegD);
1258  MI->getOperand(1).setReg(SrcRegD);
1259  AddDefaultPred(MIB);
1260
1261  // We are now reading SrcRegD instead of SrcRegS.  This may upset the
1262  // register scavenger and machine verifier, so we need to indicate that we
1263  // are reading an undefined value from SrcRegD, but a proper value from
1264  // SrcRegS.
1265  MI->getOperand(1).setIsUndef();
1266  MIB.addReg(SrcRegS, RegState::Implicit);
1267
1268  // SrcRegD may actually contain an unrelated value in the ssub_1
1269  // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
1270  if (MI->getOperand(1).isKill()) {
1271    MI->getOperand(1).setIsKill(false);
1272    MI->addRegisterKilled(SrcRegS, TRI, true);
1273  }
1274
1275  DEBUG(dbgs() << "replaced by: " << *MI);
1276  return true;
1277}
1278
1279/// Create a copy of a const pool value. Update CPI to the new index and return
1280/// the label UID.
1281static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1282  MachineConstantPool *MCP = MF.getConstantPool();
1283  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1284
1285  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1286  assert(MCPE.isMachineConstantPoolEntry() &&
1287         "Expecting a machine constantpool entry!");
1288  ARMConstantPoolValue *ACPV =
1289    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1290
1291  unsigned PCLabelId = AFI->createPICLabelUId();
1292  ARMConstantPoolValue *NewCPV = 0;
1293  // FIXME: The below assumes PIC relocation model and that the function
1294  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1295  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1296  // instructions, so that's probably OK, but is PIC always correct when
1297  // we get here?
1298  if (ACPV->isGlobalValue())
1299    NewCPV = ARMConstantPoolConstant::
1300      Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
1301             ARMCP::CPValue, 4);
1302  else if (ACPV->isExtSymbol())
1303    NewCPV = ARMConstantPoolSymbol::
1304      Create(MF.getFunction()->getContext(),
1305             cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1306  else if (ACPV->isBlockAddress())
1307    NewCPV = ARMConstantPoolConstant::
1308      Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1309             ARMCP::CPBlockAddress, 4);
1310  else if (ACPV->isLSDA())
1311    NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
1312                                             ARMCP::CPLSDA, 4);
1313  else if (ACPV->isMachineBasicBlock())
1314    NewCPV = ARMConstantPoolMBB::
1315      Create(MF.getFunction()->getContext(),
1316             cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1317  else
1318    llvm_unreachable("Unexpected ARM constantpool value type!!");
1319  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1320  return PCLabelId;
1321}
1322
1323void ARMBaseInstrInfo::
1324reMaterialize(MachineBasicBlock &MBB,
1325              MachineBasicBlock::iterator I,
1326              unsigned DestReg, unsigned SubIdx,
1327              const MachineInstr *Orig,
1328              const TargetRegisterInfo &TRI) const {
1329  unsigned Opcode = Orig->getOpcode();
1330  switch (Opcode) {
1331  default: {
1332    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
1333    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
1334    MBB.insert(I, MI);
1335    break;
1336  }
1337  case ARM::tLDRpci_pic:
1338  case ARM::t2LDRpci_pic: {
1339    MachineFunction &MF = *MBB.getParent();
1340    unsigned CPI = Orig->getOperand(1).getIndex();
1341    unsigned PCLabelId = duplicateCPV(MF, CPI);
1342    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
1343                                      DestReg)
1344      .addConstantPoolIndex(CPI).addImm(PCLabelId);
1345    MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
1346    break;
1347  }
1348  }
1349}
1350
1351MachineInstr *
1352ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
1353  MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
1354  switch(Orig->getOpcode()) {
1355  case ARM::tLDRpci_pic:
1356  case ARM::t2LDRpci_pic: {
1357    unsigned CPI = Orig->getOperand(1).getIndex();
1358    unsigned PCLabelId = duplicateCPV(MF, CPI);
1359    Orig->getOperand(1).setIndex(CPI);
1360    Orig->getOperand(2).setImm(PCLabelId);
1361    break;
1362  }
1363  }
1364  return MI;
1365}
1366
1367bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
1368                                        const MachineInstr *MI1,
1369                                        const MachineRegisterInfo *MRI) const {
1370  int Opcode = MI0->getOpcode();
1371  if (Opcode == ARM::t2LDRpci ||
1372      Opcode == ARM::t2LDRpci_pic ||
1373      Opcode == ARM::tLDRpci ||
1374      Opcode == ARM::tLDRpci_pic ||
1375      Opcode == ARM::MOV_ga_dyn ||
1376      Opcode == ARM::MOV_ga_pcrel ||
1377      Opcode == ARM::MOV_ga_pcrel_ldr ||
1378      Opcode == ARM::t2MOV_ga_dyn ||
1379      Opcode == ARM::t2MOV_ga_pcrel) {
1380    if (MI1->getOpcode() != Opcode)
1381      return false;
1382    if (MI0->getNumOperands() != MI1->getNumOperands())
1383      return false;
1384
1385    const MachineOperand &MO0 = MI0->getOperand(1);
1386    const MachineOperand &MO1 = MI1->getOperand(1);
1387    if (MO0.getOffset() != MO1.getOffset())
1388      return false;
1389
1390    if (Opcode == ARM::MOV_ga_dyn ||
1391        Opcode == ARM::MOV_ga_pcrel ||
1392        Opcode == ARM::MOV_ga_pcrel_ldr ||
1393        Opcode == ARM::t2MOV_ga_dyn ||
1394        Opcode == ARM::t2MOV_ga_pcrel)
1395      // Ignore the PC labels.
1396      return MO0.getGlobal() == MO1.getGlobal();
1397
1398    const MachineFunction *MF = MI0->getParent()->getParent();
1399    const MachineConstantPool *MCP = MF->getConstantPool();
1400    int CPI0 = MO0.getIndex();
1401    int CPI1 = MO1.getIndex();
1402    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1403    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1404    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1405    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1406    if (isARMCP0 && isARMCP1) {
1407      ARMConstantPoolValue *ACPV0 =
1408        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1409      ARMConstantPoolValue *ACPV1 =
1410        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1411      return ACPV0->hasSameValue(ACPV1);
1412    } else if (!isARMCP0 && !isARMCP1) {
1413      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1414    }
1415    return false;
1416  } else if (Opcode == ARM::PICLDR) {
1417    if (MI1->getOpcode() != Opcode)
1418      return false;
1419    if (MI0->getNumOperands() != MI1->getNumOperands())
1420      return false;
1421
1422    unsigned Addr0 = MI0->getOperand(1).getReg();
1423    unsigned Addr1 = MI1->getOperand(1).getReg();
1424    if (Addr0 != Addr1) {
1425      if (!MRI ||
1426          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
1427          !TargetRegisterInfo::isVirtualRegister(Addr1))
1428        return false;
1429
1430      // This assumes SSA form.
1431      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1432      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1433      // Check if the loaded value, e.g. a constantpool of a global address, are
1434      // the same.
1435      if (!produceSameValue(Def0, Def1, MRI))
1436        return false;
1437    }
1438
1439    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
1440      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
1441      const MachineOperand &MO0 = MI0->getOperand(i);
1442      const MachineOperand &MO1 = MI1->getOperand(i);
1443      if (!MO0.isIdenticalTo(MO1))
1444        return false;
1445    }
1446    return true;
1447  }
1448
1449  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1450}
1451
1452/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1453/// determine if two loads are loading from the same base address. It should
1454/// only return true if the base pointers are the same and the only differences
1455/// between the two addresses is the offset. It also returns the offsets by
1456/// reference.
1457///
1458/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1459/// is permanently disabled.
1460bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1461                                               int64_t &Offset1,
1462                                               int64_t &Offset2) const {
1463  // Don't worry about Thumb: just ARM and Thumb2.
1464  if (Subtarget.isThumb1Only()) return false;
1465
1466  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1467    return false;
1468
1469  switch (Load1->getMachineOpcode()) {
1470  default:
1471    return false;
1472  case ARM::LDRi12:
1473  case ARM::LDRBi12:
1474  case ARM::LDRD:
1475  case ARM::LDRH:
1476  case ARM::LDRSB:
1477  case ARM::LDRSH:
1478  case ARM::VLDRD:
1479  case ARM::VLDRS:
1480  case ARM::t2LDRi8:
1481  case ARM::t2LDRBi8:
1482  case ARM::t2LDRDi8:
1483  case ARM::t2LDRSHi8:
1484  case ARM::t2LDRi12:
1485  case ARM::t2LDRBi12:
1486  case ARM::t2LDRSHi12:
1487    break;
1488  }
1489
1490  switch (Load2->getMachineOpcode()) {
1491  default:
1492    return false;
1493  case ARM::LDRi12:
1494  case ARM::LDRBi12:
1495  case ARM::LDRD:
1496  case ARM::LDRH:
1497  case ARM::LDRSB:
1498  case ARM::LDRSH:
1499  case ARM::VLDRD:
1500  case ARM::VLDRS:
1501  case ARM::t2LDRi8:
1502  case ARM::t2LDRBi8:
1503  case ARM::t2LDRSHi8:
1504  case ARM::t2LDRi12:
1505  case ARM::t2LDRBi12:
1506  case ARM::t2LDRSHi12:
1507    break;
1508  }
1509
1510  // Check if base addresses and chain operands match.
1511  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1512      Load1->getOperand(4) != Load2->getOperand(4))
1513    return false;
1514
1515  // Index should be Reg0.
1516  if (Load1->getOperand(3) != Load2->getOperand(3))
1517    return false;
1518
1519  // Determine the offsets.
1520  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1521      isa<ConstantSDNode>(Load2->getOperand(1))) {
1522    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1523    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1524    return true;
1525  }
1526
1527  return false;
1528}
1529
1530/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1531/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1532/// be scheduled togther. On some targets if two loads are loading from
1533/// addresses in the same cache line, it's better if they are scheduled
1534/// together. This function takes two integers that represent the load offsets
1535/// from the common base address. It returns true if it decides it's desirable
1536/// to schedule the two loads together. "NumLoads" is the number of loads that
1537/// have already been scheduled after Load1.
1538///
1539/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1540/// is permanently disabled.
1541bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1542                                               int64_t Offset1, int64_t Offset2,
1543                                               unsigned NumLoads) const {
1544  // Don't worry about Thumb: just ARM and Thumb2.
1545  if (Subtarget.isThumb1Only()) return false;
1546
1547  assert(Offset2 > Offset1);
1548
1549  if ((Offset2 - Offset1) / 8 > 64)
1550    return false;
1551
1552  // Check if the machine opcodes are different. If they are different
1553  // then we consider them to not be of the same base address,
1554  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1555  // In this case, they are considered to be the same because they are different
1556  // encoding forms of the same basic instruction.
1557  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1558      !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1559         Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1560        (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1561         Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1562    return false;  // FIXME: overly conservative?
1563
1564  // Four loads in a row should be sufficient.
1565  if (NumLoads >= 3)
1566    return false;
1567
1568  return true;
1569}
1570
1571bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
1572                                            const MachineBasicBlock *MBB,
1573                                            const MachineFunction &MF) const {
1574  // Debug info is never a scheduling boundary. It's necessary to be explicit
1575  // due to the special treatment of IT instructions below, otherwise a
1576  // dbg_value followed by an IT will result in the IT instruction being
1577  // considered a scheduling hazard, which is wrong. It should be the actual
1578  // instruction preceding the dbg_value instruction(s), just like it is
1579  // when debug info is not present.
1580  if (MI->isDebugValue())
1581    return false;
1582
1583  // Terminators and labels can't be scheduled around.
1584  if (MI->isTerminator() || MI->isLabel())
1585    return true;
1586
1587  // Treat the start of the IT block as a scheduling boundary, but schedule
1588  // t2IT along with all instructions following it.
1589  // FIXME: This is a big hammer. But the alternative is to add all potential
1590  // true and anti dependencies to IT block instructions as implicit operands
1591  // to the t2IT instruction. The added compile time and complexity does not
1592  // seem worth it.
1593  MachineBasicBlock::const_iterator I = MI;
1594  // Make sure to skip any dbg_value instructions
1595  while (++I != MBB->end() && I->isDebugValue())
1596    ;
1597  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1598    return true;
1599
1600  // Don't attempt to schedule around any instruction that defines
1601  // a stack-oriented pointer, as it's unlikely to be profitable. This
1602  // saves compile time, because it doesn't require every single
1603  // stack slot reference to depend on the instruction that does the
1604  // modification.
1605  // Calls don't actually change the stack pointer, even if they have imp-defs.
1606  // No ARM calling conventions change the stack pointer. (X86 calling
1607  // conventions sometimes do).
1608  if (!MI->isCall() && MI->definesRegister(ARM::SP))
1609    return true;
1610
1611  return false;
1612}
1613
1614bool ARMBaseInstrInfo::
1615isProfitableToIfCvt(MachineBasicBlock &MBB,
1616                    unsigned NumCycles, unsigned ExtraPredCycles,
1617                    const BranchProbability &Probability) const {
1618  if (!NumCycles)
1619    return false;
1620
1621  // Attempt to estimate the relative costs of predication versus branching.
1622  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
1623  UnpredCost /= Probability.getDenominator();
1624  UnpredCost += 1; // The branch itself
1625  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1626
1627  return (NumCycles + ExtraPredCycles) <= UnpredCost;
1628}
1629
1630bool ARMBaseInstrInfo::
1631isProfitableToIfCvt(MachineBasicBlock &TMBB,
1632                    unsigned TCycles, unsigned TExtra,
1633                    MachineBasicBlock &FMBB,
1634                    unsigned FCycles, unsigned FExtra,
1635                    const BranchProbability &Probability) const {
1636  if (!TCycles || !FCycles)
1637    return false;
1638
1639  // Attempt to estimate the relative costs of predication versus branching.
1640  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
1641  TUnpredCost /= Probability.getDenominator();
1642
1643  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
1644  unsigned FUnpredCost = Comp * FCycles;
1645  FUnpredCost /= Probability.getDenominator();
1646
1647  unsigned UnpredCost = TUnpredCost + FUnpredCost;
1648  UnpredCost += 1; // The branch itself
1649  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1650
1651  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
1652}
1653
1654bool
1655ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
1656                                            MachineBasicBlock &FMBB) const {
1657  // Reduce false anti-dependencies to let Swift's out-of-order execution
1658  // engine do its thing.
1659  return Subtarget.isSwift();
1660}
1661
1662/// getInstrPredicate - If instruction is predicated, returns its predicate
1663/// condition, otherwise returns AL. It also returns the condition code
1664/// register by reference.
1665ARMCC::CondCodes
1666llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
1667  int PIdx = MI->findFirstPredOperandIdx();
1668  if (PIdx == -1) {
1669    PredReg = 0;
1670    return ARMCC::AL;
1671  }
1672
1673  PredReg = MI->getOperand(PIdx+1).getReg();
1674  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
1675}
1676
1677
1678int llvm::getMatchingCondBranchOpcode(int Opc) {
1679  if (Opc == ARM::B)
1680    return ARM::Bcc;
1681  if (Opc == ARM::tB)
1682    return ARM::tBcc;
1683  if (Opc == ARM::t2B)
1684    return ARM::t2Bcc;
1685
1686  llvm_unreachable("Unknown unconditional branch opcode!");
1687}
1688
1689/// commuteInstruction - Handle commutable instructions.
1690MachineInstr *
1691ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
1692  switch (MI->getOpcode()) {
1693  case ARM::MOVCCr:
1694  case ARM::t2MOVCCr: {
1695    // MOVCC can be commuted by inverting the condition.
1696    unsigned PredReg = 0;
1697    ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
1698    // MOVCC AL can't be inverted. Shouldn't happen.
1699    if (CC == ARMCC::AL || PredReg != ARM::CPSR)
1700      return NULL;
1701    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
1702    if (!MI)
1703      return NULL;
1704    // After swapping the MOVCC operands, also invert the condition.
1705    MI->getOperand(MI->findFirstPredOperandIdx())
1706      .setImm(ARMCC::getOppositeCondition(CC));
1707    return MI;
1708  }
1709  }
1710  return TargetInstrInfo::commuteInstruction(MI, NewMI);
1711}
1712
1713/// Identify instructions that can be folded into a MOVCC instruction, and
1714/// return the defining instruction.
1715static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
1716                                      const MachineRegisterInfo &MRI,
1717                                      const TargetInstrInfo *TII) {
1718  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1719    return 0;
1720  if (!MRI.hasOneNonDBGUse(Reg))
1721    return 0;
1722  MachineInstr *MI = MRI.getVRegDef(Reg);
1723  if (!MI)
1724    return 0;
1725  // MI is folded into the MOVCC by predicating it.
1726  if (!MI->isPredicable())
1727    return 0;
1728  // Check if MI has any non-dead defs or physreg uses. This also detects
1729  // predicated instructions which will be reading CPSR.
1730  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
1731    const MachineOperand &MO = MI->getOperand(i);
1732    // Reject frame index operands, PEI can't handle the predicated pseudos.
1733    if (MO.isFI() || MO.isCPI() || MO.isJTI())
1734      return 0;
1735    if (!MO.isReg())
1736      continue;
1737    // MI can't have any tied operands, that would conflict with predication.
1738    if (MO.isTied())
1739      return 0;
1740    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1741      return 0;
1742    if (MO.isDef() && !MO.isDead())
1743      return 0;
1744  }
1745  bool DontMoveAcrossStores = true;
1746  if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
1747    return 0;
1748  return MI;
1749}
1750
1751bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
1752                                     SmallVectorImpl<MachineOperand> &Cond,
1753                                     unsigned &TrueOp, unsigned &FalseOp,
1754                                     bool &Optimizable) const {
1755  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
1756         "Unknown select instruction");
1757  // MOVCC operands:
1758  // 0: Def.
1759  // 1: True use.
1760  // 2: False use.
1761  // 3: Condition code.
1762  // 4: CPSR use.
1763  TrueOp = 1;
1764  FalseOp = 2;
1765  Cond.push_back(MI->getOperand(3));
1766  Cond.push_back(MI->getOperand(4));
1767  // We can always fold a def.
1768  Optimizable = true;
1769  return false;
1770}
1771
1772MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
1773                                               bool PreferFalse) const {
1774  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
1775         "Unknown select instruction");
1776  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1777  MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
1778  bool Invert = !DefMI;
1779  if (!DefMI)
1780    DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
1781  if (!DefMI)
1782    return 0;
1783
1784  // Create a new predicated version of DefMI.
1785  // Rfalse is the first use.
1786  MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1787                                      DefMI->getDesc(),
1788                                      MI->getOperand(0).getReg());
1789
1790  // Copy all the DefMI operands, excluding its (null) predicate.
1791  const MCInstrDesc &DefDesc = DefMI->getDesc();
1792  for (unsigned i = 1, e = DefDesc.getNumOperands();
1793       i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
1794    NewMI.addOperand(DefMI->getOperand(i));
1795
1796  unsigned CondCode = MI->getOperand(3).getImm();
1797  if (Invert)
1798    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
1799  else
1800    NewMI.addImm(CondCode);
1801  NewMI.addOperand(MI->getOperand(4));
1802
1803  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
1804  if (NewMI->hasOptionalDef())
1805    AddDefaultCC(NewMI);
1806
1807  // The output register value when the predicate is false is an implicit
1808  // register operand tied to the first def.
1809  // The tie makes the register allocator ensure the FalseReg is allocated the
1810  // same register as operand 0.
1811  MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
1812  FalseReg.setImplicit();
1813  NewMI.addOperand(FalseReg);
1814  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
1815
1816  // The caller will erase MI, but not DefMI.
1817  DefMI->eraseFromParent();
1818  return NewMI;
1819}
1820
1821/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
1822/// instruction is encoded with an 'S' bit is determined by the optional CPSR
1823/// def operand.
1824///
1825/// This will go away once we can teach tblgen how to set the optional CPSR def
1826/// operand itself.
1827struct AddSubFlagsOpcodePair {
1828  uint16_t PseudoOpc;
1829  uint16_t MachineOpc;
1830};
1831
1832static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
1833  {ARM::ADDSri, ARM::ADDri},
1834  {ARM::ADDSrr, ARM::ADDrr},
1835  {ARM::ADDSrsi, ARM::ADDrsi},
1836  {ARM::ADDSrsr, ARM::ADDrsr},
1837
1838  {ARM::SUBSri, ARM::SUBri},
1839  {ARM::SUBSrr, ARM::SUBrr},
1840  {ARM::SUBSrsi, ARM::SUBrsi},
1841  {ARM::SUBSrsr, ARM::SUBrsr},
1842
1843  {ARM::RSBSri, ARM::RSBri},
1844  {ARM::RSBSrsi, ARM::RSBrsi},
1845  {ARM::RSBSrsr, ARM::RSBrsr},
1846
1847  {ARM::t2ADDSri, ARM::t2ADDri},
1848  {ARM::t2ADDSrr, ARM::t2ADDrr},
1849  {ARM::t2ADDSrs, ARM::t2ADDrs},
1850
1851  {ARM::t2SUBSri, ARM::t2SUBri},
1852  {ARM::t2SUBSrr, ARM::t2SUBrr},
1853  {ARM::t2SUBSrs, ARM::t2SUBrs},
1854
1855  {ARM::t2RSBSri, ARM::t2RSBri},
1856  {ARM::t2RSBSrs, ARM::t2RSBrs},
1857};
1858
1859unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
1860  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
1861    if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
1862      return AddSubFlagsOpcodeMap[i].MachineOpc;
1863  return 0;
1864}
1865
1866void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
1867                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
1868                               unsigned DestReg, unsigned BaseReg, int NumBytes,
1869                               ARMCC::CondCodes Pred, unsigned PredReg,
1870                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
1871  bool isSub = NumBytes < 0;
1872  if (isSub) NumBytes = -NumBytes;
1873
1874  while (NumBytes) {
1875    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
1876    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
1877    assert(ThisVal && "Didn't extract field correctly");
1878
1879    // We will handle these bits from offset, clear them.
1880    NumBytes &= ~ThisVal;
1881
1882    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
1883
1884    // Build the new ADD / SUB.
1885    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
1886    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
1887      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
1888      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
1889      .setMIFlags(MIFlags);
1890    BaseReg = DestReg;
1891  }
1892}
1893
1894bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
1895                                unsigned FrameReg, int &Offset,
1896                                const ARMBaseInstrInfo &TII) {
1897  unsigned Opcode = MI.getOpcode();
1898  const MCInstrDesc &Desc = MI.getDesc();
1899  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
1900  bool isSub = false;
1901
1902  // Memory operands in inline assembly always use AddrMode2.
1903  if (Opcode == ARM::INLINEASM)
1904    AddrMode = ARMII::AddrMode2;
1905
1906  if (Opcode == ARM::ADDri) {
1907    Offset += MI.getOperand(FrameRegIdx+1).getImm();
1908    if (Offset == 0) {
1909      // Turn it into a move.
1910      MI.setDesc(TII.get(ARM::MOVr));
1911      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1912      MI.RemoveOperand(FrameRegIdx+1);
1913      Offset = 0;
1914      return true;
1915    } else if (Offset < 0) {
1916      Offset = -Offset;
1917      isSub = true;
1918      MI.setDesc(TII.get(ARM::SUBri));
1919    }
1920
1921    // Common case: small offset, fits into instruction.
1922    if (ARM_AM::getSOImmVal(Offset) != -1) {
1923      // Replace the FrameIndex with sp / fp
1924      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1925      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
1926      Offset = 0;
1927      return true;
1928    }
1929
1930    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
1931    // as possible.
1932    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
1933    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
1934
1935    // We will handle these bits from offset, clear them.
1936    Offset &= ~ThisImmVal;
1937
1938    // Get the properly encoded SOImmVal field.
1939    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
1940           "Bit extraction didn't work?");
1941    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
1942 } else {
1943    unsigned ImmIdx = 0;
1944    int InstrOffs = 0;
1945    unsigned NumBits = 0;
1946    unsigned Scale = 1;
1947    switch (AddrMode) {
1948    case ARMII::AddrMode_i12: {
1949      ImmIdx = FrameRegIdx + 1;
1950      InstrOffs = MI.getOperand(ImmIdx).getImm();
1951      NumBits = 12;
1952      break;
1953    }
1954    case ARMII::AddrMode2: {
1955      ImmIdx = FrameRegIdx+2;
1956      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
1957      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1958        InstrOffs *= -1;
1959      NumBits = 12;
1960      break;
1961    }
1962    case ARMII::AddrMode3: {
1963      ImmIdx = FrameRegIdx+2;
1964      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
1965      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1966        InstrOffs *= -1;
1967      NumBits = 8;
1968      break;
1969    }
1970    case ARMII::AddrMode4:
1971    case ARMII::AddrMode6:
1972      // Can't fold any offset even if it's zero.
1973      return false;
1974    case ARMII::AddrMode5: {
1975      ImmIdx = FrameRegIdx+1;
1976      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
1977      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1978        InstrOffs *= -1;
1979      NumBits = 8;
1980      Scale = 4;
1981      break;
1982    }
1983    default:
1984      llvm_unreachable("Unsupported addressing mode!");
1985    }
1986
1987    Offset += InstrOffs * Scale;
1988    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
1989    if (Offset < 0) {
1990      Offset = -Offset;
1991      isSub = true;
1992    }
1993
1994    // Attempt to fold address comp. if opcode has offset bits
1995    if (NumBits > 0) {
1996      // Common case: small offset, fits into instruction.
1997      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
1998      int ImmedOffset = Offset / Scale;
1999      unsigned Mask = (1 << NumBits) - 1;
2000      if ((unsigned)Offset <= Mask * Scale) {
2001        // Replace the FrameIndex with sp
2002        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2003        // FIXME: When addrmode2 goes away, this will simplify (like the
2004        // T2 version), as the LDR.i12 versions don't need the encoding
2005        // tricks for the offset value.
2006        if (isSub) {
2007          if (AddrMode == ARMII::AddrMode_i12)
2008            ImmedOffset = -ImmedOffset;
2009          else
2010            ImmedOffset |= 1 << NumBits;
2011        }
2012        ImmOp.ChangeToImmediate(ImmedOffset);
2013        Offset = 0;
2014        return true;
2015      }
2016
2017      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2018      ImmedOffset = ImmedOffset & Mask;
2019      if (isSub) {
2020        if (AddrMode == ARMII::AddrMode_i12)
2021          ImmedOffset = -ImmedOffset;
2022        else
2023          ImmedOffset |= 1 << NumBits;
2024      }
2025      ImmOp.ChangeToImmediate(ImmedOffset);
2026      Offset &= ~(Mask*Scale);
2027    }
2028  }
2029
2030  Offset = (isSub) ? -Offset : Offset;
2031  return Offset == 0;
2032}
2033
2034/// analyzeCompare - For a comparison instruction, return the source registers
2035/// in SrcReg and SrcReg2 if having two register operands, and the value it
2036/// compares against in CmpValue. Return true if the comparison instruction
2037/// can be analyzed.
2038bool ARMBaseInstrInfo::
2039analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
2040               int &CmpMask, int &CmpValue) const {
2041  switch (MI->getOpcode()) {
2042  default: break;
2043  case ARM::CMPri:
2044  case ARM::t2CMPri:
2045    SrcReg = MI->getOperand(0).getReg();
2046    SrcReg2 = 0;
2047    CmpMask = ~0;
2048    CmpValue = MI->getOperand(1).getImm();
2049    return true;
2050  case ARM::CMPrr:
2051  case ARM::t2CMPrr:
2052    SrcReg = MI->getOperand(0).getReg();
2053    SrcReg2 = MI->getOperand(1).getReg();
2054    CmpMask = ~0;
2055    CmpValue = 0;
2056    return true;
2057  case ARM::TSTri:
2058  case ARM::t2TSTri:
2059    SrcReg = MI->getOperand(0).getReg();
2060    SrcReg2 = 0;
2061    CmpMask = MI->getOperand(1).getImm();
2062    CmpValue = 0;
2063    return true;
2064  }
2065
2066  return false;
2067}
2068
2069/// isSuitableForMask - Identify a suitable 'and' instruction that
2070/// operates on the given source register and applies the same mask
2071/// as a 'tst' instruction. Provide a limited look-through for copies.
2072/// When successful, MI will hold the found instruction.
2073static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2074                              int CmpMask, bool CommonUse) {
2075  switch (MI->getOpcode()) {
2076    case ARM::ANDri:
2077    case ARM::t2ANDri:
2078      if (CmpMask != MI->getOperand(2).getImm())
2079        return false;
2080      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2081        return true;
2082      break;
2083    case ARM::COPY: {
2084      // Walk down one instruction which is potentially an 'and'.
2085      const MachineInstr &Copy = *MI;
2086      MachineBasicBlock::iterator AND(
2087        llvm::next(MachineBasicBlock::iterator(MI)));
2088      if (AND == MI->getParent()->end()) return false;
2089      MI = AND;
2090      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
2091                               CmpMask, true);
2092    }
2093  }
2094
2095  return false;
2096}
2097
2098/// getSwappedCondition - assume the flags are set by MI(a,b), return
2099/// the condition code if we modify the instructions such that flags are
2100/// set by MI(b,a).
2101inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
2102  switch (CC) {
2103  default: return ARMCC::AL;
2104  case ARMCC::EQ: return ARMCC::EQ;
2105  case ARMCC::NE: return ARMCC::NE;
2106  case ARMCC::HS: return ARMCC::LS;
2107  case ARMCC::LO: return ARMCC::HI;
2108  case ARMCC::HI: return ARMCC::LO;
2109  case ARMCC::LS: return ARMCC::HS;
2110  case ARMCC::GE: return ARMCC::LE;
2111  case ARMCC::LT: return ARMCC::GT;
2112  case ARMCC::GT: return ARMCC::LT;
2113  case ARMCC::LE: return ARMCC::GE;
2114  }
2115}
2116
2117/// isRedundantFlagInstr - check whether the first instruction, whose only
2118/// purpose is to update flags, can be made redundant.
2119/// CMPrr can be made redundant by SUBrr if the operands are the same.
2120/// CMPri can be made redundant by SUBri if the operands are the same.
2121/// This function can be extended later on.
2122inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
2123                                        unsigned SrcReg2, int ImmValue,
2124                                        MachineInstr *OI) {
2125  if ((CmpI->getOpcode() == ARM::CMPrr ||
2126       CmpI->getOpcode() == ARM::t2CMPrr) &&
2127      (OI->getOpcode() == ARM::SUBrr ||
2128       OI->getOpcode() == ARM::t2SUBrr) &&
2129      ((OI->getOperand(1).getReg() == SrcReg &&
2130        OI->getOperand(2).getReg() == SrcReg2) ||
2131       (OI->getOperand(1).getReg() == SrcReg2 &&
2132        OI->getOperand(2).getReg() == SrcReg)))
2133    return true;
2134
2135  if ((CmpI->getOpcode() == ARM::CMPri ||
2136       CmpI->getOpcode() == ARM::t2CMPri) &&
2137      (OI->getOpcode() == ARM::SUBri ||
2138       OI->getOpcode() == ARM::t2SUBri) &&
2139      OI->getOperand(1).getReg() == SrcReg &&
2140      OI->getOperand(2).getImm() == ImmValue)
2141    return true;
2142  return false;
2143}
2144
2145/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2146/// comparison into one that sets the zero bit in the flags register;
2147/// Remove a redundant Compare instruction if an earlier instruction can set the
2148/// flags in the same way as Compare.
2149/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2150/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2151/// condition code of instructions which use the flags.
2152bool ARMBaseInstrInfo::
2153optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
2154                     int CmpMask, int CmpValue,
2155                     const MachineRegisterInfo *MRI) const {
2156  // Get the unique definition of SrcReg.
2157  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2158  if (!MI) return false;
2159
2160  // Masked compares sometimes use the same register as the corresponding 'and'.
2161  if (CmpMask != ~0) {
2162    if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
2163      MI = 0;
2164      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
2165           UE = MRI->use_end(); UI != UE; ++UI) {
2166        if (UI->getParent() != CmpInstr->getParent()) continue;
2167        MachineInstr *PotentialAND = &*UI;
2168        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2169            isPredicated(PotentialAND))
2170          continue;
2171        MI = PotentialAND;
2172        break;
2173      }
2174      if (!MI) return false;
2175    }
2176  }
2177
2178  // Get ready to iterate backward from CmpInstr.
2179  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2180                              B = CmpInstr->getParent()->begin();
2181
2182  // Early exit if CmpInstr is at the beginning of the BB.
2183  if (I == B) return false;
2184
2185  // There are two possible candidates which can be changed to set CPSR:
2186  // One is MI, the other is a SUB instruction.
2187  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2188  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2189  MachineInstr *Sub = NULL;
2190  if (SrcReg2 != 0)
2191    // MI is not a candidate for CMPrr.
2192    MI = NULL;
2193  else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
2194    // Conservatively refuse to convert an instruction which isn't in the same
2195    // BB as the comparison.
2196    // For CMPri, we need to check Sub, thus we can't return here.
2197    if (CmpInstr->getOpcode() == ARM::CMPri ||
2198       CmpInstr->getOpcode() == ARM::t2CMPri)
2199      MI = NULL;
2200    else
2201      return false;
2202  }
2203
2204  // Check that CPSR isn't set between the comparison instruction and the one we
2205  // want to change. At the same time, search for Sub.
2206  const TargetRegisterInfo *TRI = &getRegisterInfo();
2207  --I;
2208  for (; I != E; --I) {
2209    const MachineInstr &Instr = *I;
2210
2211    if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2212        Instr.readsRegister(ARM::CPSR, TRI))
2213      // This instruction modifies or uses CPSR after the one we want to
2214      // change. We can't do this transformation.
2215      return false;
2216
2217    // Check whether CmpInstr can be made redundant by the current instruction.
2218    if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
2219      Sub = &*I;
2220      break;
2221    }
2222
2223    if (I == B)
2224      // The 'and' is below the comparison instruction.
2225      return false;
2226  }
2227
2228  // Return false if no candidates exist.
2229  if (!MI && !Sub)
2230    return false;
2231
2232  // The single candidate is called MI.
2233  if (!MI) MI = Sub;
2234
2235  // We can't use a predicated instruction - it doesn't always write the flags.
2236  if (isPredicated(MI))
2237    return false;
2238
2239  switch (MI->getOpcode()) {
2240  default: break;
2241  case ARM::RSBrr:
2242  case ARM::RSBri:
2243  case ARM::RSCrr:
2244  case ARM::RSCri:
2245  case ARM::ADDrr:
2246  case ARM::ADDri:
2247  case ARM::ADCrr:
2248  case ARM::ADCri:
2249  case ARM::SUBrr:
2250  case ARM::SUBri:
2251  case ARM::SBCrr:
2252  case ARM::SBCri:
2253  case ARM::t2RSBri:
2254  case ARM::t2ADDrr:
2255  case ARM::t2ADDri:
2256  case ARM::t2ADCrr:
2257  case ARM::t2ADCri:
2258  case ARM::t2SUBrr:
2259  case ARM::t2SUBri:
2260  case ARM::t2SBCrr:
2261  case ARM::t2SBCri:
2262  case ARM::ANDrr:
2263  case ARM::ANDri:
2264  case ARM::t2ANDrr:
2265  case ARM::t2ANDri:
2266  case ARM::ORRrr:
2267  case ARM::ORRri:
2268  case ARM::t2ORRrr:
2269  case ARM::t2ORRri:
2270  case ARM::EORrr:
2271  case ARM::EORri:
2272  case ARM::t2EORrr:
2273  case ARM::t2EORri: {
2274    // Scan forward for the use of CPSR
2275    // When checking against MI: if it's a conditional code requires
2276    // checking of V bit, then this is not safe to do.
2277    // It is safe to remove CmpInstr if CPSR is redefined or killed.
2278    // If we are done with the basic block, we need to check whether CPSR is
2279    // live-out.
2280    SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
2281        OperandsToUpdate;
2282    bool isSafe = false;
2283    I = CmpInstr;
2284    E = CmpInstr->getParent()->end();
2285    while (!isSafe && ++I != E) {
2286      const MachineInstr &Instr = *I;
2287      for (unsigned IO = 0, EO = Instr.getNumOperands();
2288           !isSafe && IO != EO; ++IO) {
2289        const MachineOperand &MO = Instr.getOperand(IO);
2290        if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2291          isSafe = true;
2292          break;
2293        }
2294        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2295          continue;
2296        if (MO.isDef()) {
2297          isSafe = true;
2298          break;
2299        }
2300        // Condition code is after the operand before CPSR.
2301        ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
2302        if (Sub) {
2303          ARMCC::CondCodes NewCC = getSwappedCondition(CC);
2304          if (NewCC == ARMCC::AL)
2305            return false;
2306          // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2307          // on CMP needs to be updated to be based on SUB.
2308          // Push the condition code operands to OperandsToUpdate.
2309          // If it is safe to remove CmpInstr, the condition code of these
2310          // operands will be modified.
2311          if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2312              Sub->getOperand(2).getReg() == SrcReg)
2313            OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
2314                                                      NewCC));
2315        }
2316        else
2317          switch (CC) {
2318          default:
2319            // CPSR can be used multiple times, we should continue.
2320            break;
2321          case ARMCC::VS:
2322          case ARMCC::VC:
2323          case ARMCC::GE:
2324          case ARMCC::LT:
2325          case ARMCC::GT:
2326          case ARMCC::LE:
2327            return false;
2328          }
2329      }
2330    }
2331
2332    // If CPSR is not killed nor re-defined, we should check whether it is
2333    // live-out. If it is live-out, do not optimize.
2334    if (!isSafe) {
2335      MachineBasicBlock *MBB = CmpInstr->getParent();
2336      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
2337               SE = MBB->succ_end(); SI != SE; ++SI)
2338        if ((*SI)->isLiveIn(ARM::CPSR))
2339          return false;
2340    }
2341
2342    // Toggle the optional operand to CPSR.
2343    MI->getOperand(5).setReg(ARM::CPSR);
2344    MI->getOperand(5).setIsDef(true);
2345    assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
2346    CmpInstr->eraseFromParent();
2347
2348    // Modify the condition code of operands in OperandsToUpdate.
2349    // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2350    // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2351    for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2352      OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2353    return true;
2354  }
2355  }
2356
2357  return false;
2358}
2359
2360bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
2361                                     MachineInstr *DefMI, unsigned Reg,
2362                                     MachineRegisterInfo *MRI) const {
2363  // Fold large immediates into add, sub, or, xor.
2364  unsigned DefOpc = DefMI->getOpcode();
2365  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2366    return false;
2367  if (!DefMI->getOperand(1).isImm())
2368    // Could be t2MOVi32imm <ga:xx>
2369    return false;
2370
2371  if (!MRI->hasOneNonDBGUse(Reg))
2372    return false;
2373
2374  const MCInstrDesc &DefMCID = DefMI->getDesc();
2375  if (DefMCID.hasOptionalDef()) {
2376    unsigned NumOps = DefMCID.getNumOperands();
2377    const MachineOperand &MO = DefMI->getOperand(NumOps-1);
2378    if (MO.getReg() == ARM::CPSR && !MO.isDead())
2379      // If DefMI defines CPSR and it is not dead, it's obviously not safe
2380      // to delete DefMI.
2381      return false;
2382  }
2383
2384  const MCInstrDesc &UseMCID = UseMI->getDesc();
2385  if (UseMCID.hasOptionalDef()) {
2386    unsigned NumOps = UseMCID.getNumOperands();
2387    if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
2388      // If the instruction sets the flag, do not attempt this optimization
2389      // since it may change the semantics of the code.
2390      return false;
2391  }
2392
2393  unsigned UseOpc = UseMI->getOpcode();
2394  unsigned NewUseOpc = 0;
2395  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
2396  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
2397  bool Commute = false;
2398  switch (UseOpc) {
2399  default: return false;
2400  case ARM::SUBrr:
2401  case ARM::ADDrr:
2402  case ARM::ORRrr:
2403  case ARM::EORrr:
2404  case ARM::t2SUBrr:
2405  case ARM::t2ADDrr:
2406  case ARM::t2ORRrr:
2407  case ARM::t2EORrr: {
2408    Commute = UseMI->getOperand(2).getReg() != Reg;
2409    switch (UseOpc) {
2410    default: break;
2411    case ARM::SUBrr: {
2412      if (Commute)
2413        return false;
2414      ImmVal = -ImmVal;
2415      NewUseOpc = ARM::SUBri;
2416      // Fallthrough
2417    }
2418    case ARM::ADDrr:
2419    case ARM::ORRrr:
2420    case ARM::EORrr: {
2421      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
2422        return false;
2423      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2424      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2425      switch (UseOpc) {
2426      default: break;
2427      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
2428      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
2429      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
2430      }
2431      break;
2432    }
2433    case ARM::t2SUBrr: {
2434      if (Commute)
2435        return false;
2436      ImmVal = -ImmVal;
2437      NewUseOpc = ARM::t2SUBri;
2438      // Fallthrough
2439    }
2440    case ARM::t2ADDrr:
2441    case ARM::t2ORRrr:
2442    case ARM::t2EORrr: {
2443      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
2444        return false;
2445      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2446      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2447      switch (UseOpc) {
2448      default: break;
2449      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
2450      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
2451      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
2452      }
2453      break;
2454    }
2455    }
2456  }
2457  }
2458
2459  unsigned OpIdx = Commute ? 2 : 1;
2460  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
2461  bool isKill = UseMI->getOperand(OpIdx).isKill();
2462  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
2463  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
2464                                      UseMI, UseMI->getDebugLoc(),
2465                                      get(NewUseOpc), NewReg)
2466                              .addReg(Reg1, getKillRegState(isKill))
2467                              .addImm(SOImmValV1)));
2468  UseMI->setDesc(get(NewUseOpc));
2469  UseMI->getOperand(1).setReg(NewReg);
2470  UseMI->getOperand(1).setIsKill();
2471  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
2472  DefMI->eraseFromParent();
2473  return true;
2474}
2475
2476static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
2477                                        const MachineInstr *MI) {
2478  switch (MI->getOpcode()) {
2479  default: {
2480    const MCInstrDesc &Desc = MI->getDesc();
2481    int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
2482    assert(UOps >= 0 && "bad # UOps");
2483    return UOps;
2484  }
2485
2486  case ARM::LDRrs:
2487  case ARM::LDRBrs:
2488  case ARM::STRrs:
2489  case ARM::STRBrs: {
2490    unsigned ShOpVal = MI->getOperand(3).getImm();
2491    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2492    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2493    if (!isSub &&
2494        (ShImm == 0 ||
2495         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2496          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2497      return 1;
2498    return 2;
2499  }
2500
2501  case ARM::LDRH:
2502  case ARM::STRH: {
2503    if (!MI->getOperand(2).getReg())
2504      return 1;
2505
2506    unsigned ShOpVal = MI->getOperand(3).getImm();
2507    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2508    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2509    if (!isSub &&
2510        (ShImm == 0 ||
2511         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2512          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2513      return 1;
2514    return 2;
2515  }
2516
2517  case ARM::LDRSB:
2518  case ARM::LDRSH:
2519    return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
2520
2521  case ARM::LDRSB_POST:
2522  case ARM::LDRSH_POST: {
2523    unsigned Rt = MI->getOperand(0).getReg();
2524    unsigned Rm = MI->getOperand(3).getReg();
2525    return (Rt == Rm) ? 4 : 3;
2526  }
2527
2528  case ARM::LDR_PRE_REG:
2529  case ARM::LDRB_PRE_REG: {
2530    unsigned Rt = MI->getOperand(0).getReg();
2531    unsigned Rm = MI->getOperand(3).getReg();
2532    if (Rt == Rm)
2533      return 3;
2534    unsigned ShOpVal = MI->getOperand(4).getImm();
2535    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2536    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2537    if (!isSub &&
2538        (ShImm == 0 ||
2539         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2540          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2541      return 2;
2542    return 3;
2543  }
2544
2545  case ARM::STR_PRE_REG:
2546  case ARM::STRB_PRE_REG: {
2547    unsigned ShOpVal = MI->getOperand(4).getImm();
2548    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2549    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2550    if (!isSub &&
2551        (ShImm == 0 ||
2552         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2553          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2554      return 2;
2555    return 3;
2556  }
2557
2558  case ARM::LDRH_PRE:
2559  case ARM::STRH_PRE: {
2560    unsigned Rt = MI->getOperand(0).getReg();
2561    unsigned Rm = MI->getOperand(3).getReg();
2562    if (!Rm)
2563      return 2;
2564    if (Rt == Rm)
2565      return 3;
2566    return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
2567      ? 3 : 2;
2568  }
2569
2570  case ARM::LDR_POST_REG:
2571  case ARM::LDRB_POST_REG:
2572  case ARM::LDRH_POST: {
2573    unsigned Rt = MI->getOperand(0).getReg();
2574    unsigned Rm = MI->getOperand(3).getReg();
2575    return (Rt == Rm) ? 3 : 2;
2576  }
2577
2578  case ARM::LDR_PRE_IMM:
2579  case ARM::LDRB_PRE_IMM:
2580  case ARM::LDR_POST_IMM:
2581  case ARM::LDRB_POST_IMM:
2582  case ARM::STRB_POST_IMM:
2583  case ARM::STRB_POST_REG:
2584  case ARM::STRB_PRE_IMM:
2585  case ARM::STRH_POST:
2586  case ARM::STR_POST_IMM:
2587  case ARM::STR_POST_REG:
2588  case ARM::STR_PRE_IMM:
2589    return 2;
2590
2591  case ARM::LDRSB_PRE:
2592  case ARM::LDRSH_PRE: {
2593    unsigned Rm = MI->getOperand(3).getReg();
2594    if (Rm == 0)
2595      return 3;
2596    unsigned Rt = MI->getOperand(0).getReg();
2597    if (Rt == Rm)
2598      return 4;
2599    unsigned ShOpVal = MI->getOperand(4).getImm();
2600    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2601    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2602    if (!isSub &&
2603        (ShImm == 0 ||
2604         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2605          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2606      return 3;
2607    return 4;
2608  }
2609
2610  case ARM::LDRD: {
2611    unsigned Rt = MI->getOperand(0).getReg();
2612    unsigned Rn = MI->getOperand(2).getReg();
2613    unsigned Rm = MI->getOperand(3).getReg();
2614    if (Rm)
2615      return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
2616    return (Rt == Rn) ? 3 : 2;
2617  }
2618
2619  case ARM::STRD: {
2620    unsigned Rm = MI->getOperand(3).getReg();
2621    if (Rm)
2622      return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
2623    return 2;
2624  }
2625
2626  case ARM::LDRD_POST:
2627  case ARM::t2LDRD_POST:
2628    return 3;
2629
2630  case ARM::STRD_POST:
2631  case ARM::t2STRD_POST:
2632    return 4;
2633
2634  case ARM::LDRD_PRE: {
2635    unsigned Rt = MI->getOperand(0).getReg();
2636    unsigned Rn = MI->getOperand(3).getReg();
2637    unsigned Rm = MI->getOperand(4).getReg();
2638    if (Rm)
2639      return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
2640    return (Rt == Rn) ? 4 : 3;
2641  }
2642
2643  case ARM::t2LDRD_PRE: {
2644    unsigned Rt = MI->getOperand(0).getReg();
2645    unsigned Rn = MI->getOperand(3).getReg();
2646    return (Rt == Rn) ? 4 : 3;
2647  }
2648
2649  case ARM::STRD_PRE: {
2650    unsigned Rm = MI->getOperand(4).getReg();
2651    if (Rm)
2652      return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
2653    return 3;
2654  }
2655
2656  case ARM::t2STRD_PRE:
2657    return 3;
2658
2659  case ARM::t2LDR_POST:
2660  case ARM::t2LDRB_POST:
2661  case ARM::t2LDRB_PRE:
2662  case ARM::t2LDRSBi12:
2663  case ARM::t2LDRSBi8:
2664  case ARM::t2LDRSBpci:
2665  case ARM::t2LDRSBs:
2666  case ARM::t2LDRH_POST:
2667  case ARM::t2LDRH_PRE:
2668  case ARM::t2LDRSBT:
2669  case ARM::t2LDRSB_POST:
2670  case ARM::t2LDRSB_PRE:
2671  case ARM::t2LDRSH_POST:
2672  case ARM::t2LDRSH_PRE:
2673  case ARM::t2LDRSHi12:
2674  case ARM::t2LDRSHi8:
2675  case ARM::t2LDRSHpci:
2676  case ARM::t2LDRSHs:
2677    return 2;
2678
2679  case ARM::t2LDRDi8: {
2680    unsigned Rt = MI->getOperand(0).getReg();
2681    unsigned Rn = MI->getOperand(2).getReg();
2682    return (Rt == Rn) ? 3 : 2;
2683  }
2684
2685  case ARM::t2STRB_POST:
2686  case ARM::t2STRB_PRE:
2687  case ARM::t2STRBs:
2688  case ARM::t2STRDi8:
2689  case ARM::t2STRH_POST:
2690  case ARM::t2STRH_PRE:
2691  case ARM::t2STRHs:
2692  case ARM::t2STR_POST:
2693  case ARM::t2STR_PRE:
2694  case ARM::t2STRs:
2695    return 2;
2696  }
2697}
2698
2699// Return the number of 32-bit words loaded by LDM or stored by STM. If this
2700// can't be easily determined return 0 (missing MachineMemOperand).
2701//
2702// FIXME: The current MachineInstr design does not support relying on machine
2703// mem operands to determine the width of a memory access. Instead, we expect
2704// the target to provide this information based on the instruction opcode and
2705// operands. However, using MachineMemOperand is a the best solution now for
2706// two reasons:
2707//
2708// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
2709// operands. This is much more dangerous than using the MachineMemOperand
2710// sizes because CodeGen passes can insert/remove optional machine operands. In
2711// fact, it's totally incorrect for preRA passes and appears to be wrong for
2712// postRA passes as well.
2713//
2714// 2) getNumLDMAddresses is only used by the scheduling machine model and any
2715// machine model that calls this should handle the unknown (zero size) case.
2716//
2717// Long term, we should require a target hook that verifies MachineMemOperand
2718// sizes during MC lowering. That target hook should be local to MC lowering
2719// because we can't ensure that it is aware of other MI forms. Doing this will
2720// ensure that MachineMemOperands are correctly propagated through all passes.
2721unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
2722  unsigned Size = 0;
2723  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
2724         E = MI->memoperands_end(); I != E; ++I) {
2725    Size += (*I)->getSize();
2726  }
2727  return Size / 4;
2728}
2729
2730unsigned
2731ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
2732                                 const MachineInstr *MI) const {
2733  if (!ItinData || ItinData->isEmpty())
2734    return 1;
2735
2736  const MCInstrDesc &Desc = MI->getDesc();
2737  unsigned Class = Desc.getSchedClass();
2738  int ItinUOps = ItinData->getNumMicroOps(Class);
2739  if (ItinUOps >= 0) {
2740    if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
2741      return getNumMicroOpsSwiftLdSt(ItinData, MI);
2742
2743    return ItinUOps;
2744  }
2745
2746  unsigned Opc = MI->getOpcode();
2747  switch (Opc) {
2748  default:
2749    llvm_unreachable("Unexpected multi-uops instruction!");
2750  case ARM::VLDMQIA:
2751  case ARM::VSTMQIA:
2752    return 2;
2753
2754  // The number of uOps for load / store multiple are determined by the number
2755  // registers.
2756  //
2757  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
2758  // same cycle. The scheduling for the first load / store must be done
2759  // separately by assuming the address is not 64-bit aligned.
2760  //
2761  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
2762  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
2763  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
2764  case ARM::VLDMDIA:
2765  case ARM::VLDMDIA_UPD:
2766  case ARM::VLDMDDB_UPD:
2767  case ARM::VLDMSIA:
2768  case ARM::VLDMSIA_UPD:
2769  case ARM::VLDMSDB_UPD:
2770  case ARM::VSTMDIA:
2771  case ARM::VSTMDIA_UPD:
2772  case ARM::VSTMDDB_UPD:
2773  case ARM::VSTMSIA:
2774  case ARM::VSTMSIA_UPD:
2775  case ARM::VSTMSDB_UPD: {
2776    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
2777    return (NumRegs / 2) + (NumRegs % 2) + 1;
2778  }
2779
2780  case ARM::LDMIA_RET:
2781  case ARM::LDMIA:
2782  case ARM::LDMDA:
2783  case ARM::LDMDB:
2784  case ARM::LDMIB:
2785  case ARM::LDMIA_UPD:
2786  case ARM::LDMDA_UPD:
2787  case ARM::LDMDB_UPD:
2788  case ARM::LDMIB_UPD:
2789  case ARM::STMIA:
2790  case ARM::STMDA:
2791  case ARM::STMDB:
2792  case ARM::STMIB:
2793  case ARM::STMIA_UPD:
2794  case ARM::STMDA_UPD:
2795  case ARM::STMDB_UPD:
2796  case ARM::STMIB_UPD:
2797  case ARM::tLDMIA:
2798  case ARM::tLDMIA_UPD:
2799  case ARM::tSTMIA_UPD:
2800  case ARM::tPOP_RET:
2801  case ARM::tPOP:
2802  case ARM::tPUSH:
2803  case ARM::t2LDMIA_RET:
2804  case ARM::t2LDMIA:
2805  case ARM::t2LDMDB:
2806  case ARM::t2LDMIA_UPD:
2807  case ARM::t2LDMDB_UPD:
2808  case ARM::t2STMIA:
2809  case ARM::t2STMDB:
2810  case ARM::t2STMIA_UPD:
2811  case ARM::t2STMDB_UPD: {
2812    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
2813    if (Subtarget.isSwift()) {
2814      int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
2815      switch (Opc) {
2816      default: break;
2817      case ARM::VLDMDIA_UPD:
2818      case ARM::VLDMDDB_UPD:
2819      case ARM::VLDMSIA_UPD:
2820      case ARM::VLDMSDB_UPD:
2821      case ARM::VSTMDIA_UPD:
2822      case ARM::VSTMDDB_UPD:
2823      case ARM::VSTMSIA_UPD:
2824      case ARM::VSTMSDB_UPD:
2825      case ARM::LDMIA_UPD:
2826      case ARM::LDMDA_UPD:
2827      case ARM::LDMDB_UPD:
2828      case ARM::LDMIB_UPD:
2829      case ARM::STMIA_UPD:
2830      case ARM::STMDA_UPD:
2831      case ARM::STMDB_UPD:
2832      case ARM::STMIB_UPD:
2833      case ARM::tLDMIA_UPD:
2834      case ARM::tSTMIA_UPD:
2835      case ARM::t2LDMIA_UPD:
2836      case ARM::t2LDMDB_UPD:
2837      case ARM::t2STMIA_UPD:
2838      case ARM::t2STMDB_UPD:
2839        ++UOps; // One for base register writeback.
2840        break;
2841      case ARM::LDMIA_RET:
2842      case ARM::tPOP_RET:
2843      case ARM::t2LDMIA_RET:
2844        UOps += 2; // One for base reg wb, one for write to pc.
2845        break;
2846      }
2847      return UOps;
2848    } else if (Subtarget.isCortexA8()) {
2849      if (NumRegs < 4)
2850        return 2;
2851      // 4 registers would be issued: 2, 2.
2852      // 5 registers would be issued: 2, 2, 1.
2853      int A8UOps = (NumRegs / 2);
2854      if (NumRegs % 2)
2855        ++A8UOps;
2856      return A8UOps;
2857    } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2858      int A9UOps = (NumRegs / 2);
2859      // If there are odd number of registers or if it's not 64-bit aligned,
2860      // then it takes an extra AGU (Address Generation Unit) cycle.
2861      if ((NumRegs % 2) ||
2862          !MI->hasOneMemOperand() ||
2863          (*MI->memoperands_begin())->getAlignment() < 8)
2864        ++A9UOps;
2865      return A9UOps;
2866    } else {
2867      // Assume the worst.
2868      return NumRegs;
2869    }
2870  }
2871  }
2872}
2873
2874int
2875ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
2876                                  const MCInstrDesc &DefMCID,
2877                                  unsigned DefClass,
2878                                  unsigned DefIdx, unsigned DefAlign) const {
2879  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
2880  if (RegNo <= 0)
2881    // Def is the address writeback.
2882    return ItinData->getOperandCycle(DefClass, DefIdx);
2883
2884  int DefCycle;
2885  if (Subtarget.isCortexA8()) {
2886    // (regno / 2) + (regno % 2) + 1
2887    DefCycle = RegNo / 2 + 1;
2888    if (RegNo % 2)
2889      ++DefCycle;
2890  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2891    DefCycle = RegNo;
2892    bool isSLoad = false;
2893
2894    switch (DefMCID.getOpcode()) {
2895    default: break;
2896    case ARM::VLDMSIA:
2897    case ARM::VLDMSIA_UPD:
2898    case ARM::VLDMSDB_UPD:
2899      isSLoad = true;
2900      break;
2901    }
2902
2903    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
2904    // then it takes an extra cycle.
2905    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
2906      ++DefCycle;
2907  } else {
2908    // Assume the worst.
2909    DefCycle = RegNo + 2;
2910  }
2911
2912  return DefCycle;
2913}
2914
2915int
2916ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
2917                                 const MCInstrDesc &DefMCID,
2918                                 unsigned DefClass,
2919                                 unsigned DefIdx, unsigned DefAlign) const {
2920  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
2921  if (RegNo <= 0)
2922    // Def is the address writeback.
2923    return ItinData->getOperandCycle(DefClass, DefIdx);
2924
2925  int DefCycle;
2926  if (Subtarget.isCortexA8()) {
2927    // 4 registers would be issued: 1, 2, 1.
2928    // 5 registers would be issued: 1, 2, 2.
2929    DefCycle = RegNo / 2;
2930    if (DefCycle < 1)
2931      DefCycle = 1;
2932    // Result latency is issue cycle + 2: E2.
2933    DefCycle += 2;
2934  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2935    DefCycle = (RegNo / 2);
2936    // If there are odd number of registers or if it's not 64-bit aligned,
2937    // then it takes an extra AGU (Address Generation Unit) cycle.
2938    if ((RegNo % 2) || DefAlign < 8)
2939      ++DefCycle;
2940    // Result latency is AGU cycles + 2.
2941    DefCycle += 2;
2942  } else {
2943    // Assume the worst.
2944    DefCycle = RegNo + 2;
2945  }
2946
2947  return DefCycle;
2948}
2949
2950int
2951ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
2952                                  const MCInstrDesc &UseMCID,
2953                                  unsigned UseClass,
2954                                  unsigned UseIdx, unsigned UseAlign) const {
2955  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
2956  if (RegNo <= 0)
2957    return ItinData->getOperandCycle(UseClass, UseIdx);
2958
2959  int UseCycle;
2960  if (Subtarget.isCortexA8()) {
2961    // (regno / 2) + (regno % 2) + 1
2962    UseCycle = RegNo / 2 + 1;
2963    if (RegNo % 2)
2964      ++UseCycle;
2965  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2966    UseCycle = RegNo;
2967    bool isSStore = false;
2968
2969    switch (UseMCID.getOpcode()) {
2970    default: break;
2971    case ARM::VSTMSIA:
2972    case ARM::VSTMSIA_UPD:
2973    case ARM::VSTMSDB_UPD:
2974      isSStore = true;
2975      break;
2976    }
2977
2978    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
2979    // then it takes an extra cycle.
2980    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
2981      ++UseCycle;
2982  } else {
2983    // Assume the worst.
2984    UseCycle = RegNo + 2;
2985  }
2986
2987  return UseCycle;
2988}
2989
2990int
2991ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
2992                                 const MCInstrDesc &UseMCID,
2993                                 unsigned UseClass,
2994                                 unsigned UseIdx, unsigned UseAlign) const {
2995  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
2996  if (RegNo <= 0)
2997    return ItinData->getOperandCycle(UseClass, UseIdx);
2998
2999  int UseCycle;
3000  if (Subtarget.isCortexA8()) {
3001    UseCycle = RegNo / 2;
3002    if (UseCycle < 2)
3003      UseCycle = 2;
3004    // Read in E3.
3005    UseCycle += 2;
3006  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3007    UseCycle = (RegNo / 2);
3008    // If there are odd number of registers or if it's not 64-bit aligned,
3009    // then it takes an extra AGU (Address Generation Unit) cycle.
3010    if ((RegNo % 2) || UseAlign < 8)
3011      ++UseCycle;
3012  } else {
3013    // Assume the worst.
3014    UseCycle = 1;
3015  }
3016  return UseCycle;
3017}
3018
3019int
3020ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3021                                    const MCInstrDesc &DefMCID,
3022                                    unsigned DefIdx, unsigned DefAlign,
3023                                    const MCInstrDesc &UseMCID,
3024                                    unsigned UseIdx, unsigned UseAlign) const {
3025  unsigned DefClass = DefMCID.getSchedClass();
3026  unsigned UseClass = UseMCID.getSchedClass();
3027
3028  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3029    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3030
3031  // This may be a def / use of a variable_ops instruction, the operand
3032  // latency might be determinable dynamically. Let the target try to
3033  // figure it out.
3034  int DefCycle = -1;
3035  bool LdmBypass = false;
3036  switch (DefMCID.getOpcode()) {
3037  default:
3038    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3039    break;
3040
3041  case ARM::VLDMDIA:
3042  case ARM::VLDMDIA_UPD:
3043  case ARM::VLDMDDB_UPD:
3044  case ARM::VLDMSIA:
3045  case ARM::VLDMSIA_UPD:
3046  case ARM::VLDMSDB_UPD:
3047    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3048    break;
3049
3050  case ARM::LDMIA_RET:
3051  case ARM::LDMIA:
3052  case ARM::LDMDA:
3053  case ARM::LDMDB:
3054  case ARM::LDMIB:
3055  case ARM::LDMIA_UPD:
3056  case ARM::LDMDA_UPD:
3057  case ARM::LDMDB_UPD:
3058  case ARM::LDMIB_UPD:
3059  case ARM::tLDMIA:
3060  case ARM::tLDMIA_UPD:
3061  case ARM::tPUSH:
3062  case ARM::t2LDMIA_RET:
3063  case ARM::t2LDMIA:
3064  case ARM::t2LDMDB:
3065  case ARM::t2LDMIA_UPD:
3066  case ARM::t2LDMDB_UPD:
3067    LdmBypass = 1;
3068    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3069    break;
3070  }
3071
3072  if (DefCycle == -1)
3073    // We can't seem to determine the result latency of the def, assume it's 2.
3074    DefCycle = 2;
3075
3076  int UseCycle = -1;
3077  switch (UseMCID.getOpcode()) {
3078  default:
3079    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3080    break;
3081
3082  case ARM::VSTMDIA:
3083  case ARM::VSTMDIA_UPD:
3084  case ARM::VSTMDDB_UPD:
3085  case ARM::VSTMSIA:
3086  case ARM::VSTMSIA_UPD:
3087  case ARM::VSTMSDB_UPD:
3088    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3089    break;
3090
3091  case ARM::STMIA:
3092  case ARM::STMDA:
3093  case ARM::STMDB:
3094  case ARM::STMIB:
3095  case ARM::STMIA_UPD:
3096  case ARM::STMDA_UPD:
3097  case ARM::STMDB_UPD:
3098  case ARM::STMIB_UPD:
3099  case ARM::tSTMIA_UPD:
3100  case ARM::tPOP_RET:
3101  case ARM::tPOP:
3102  case ARM::t2STMIA:
3103  case ARM::t2STMDB:
3104  case ARM::t2STMIA_UPD:
3105  case ARM::t2STMDB_UPD:
3106    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3107    break;
3108  }
3109
3110  if (UseCycle == -1)
3111    // Assume it's read in the first stage.
3112    UseCycle = 1;
3113
3114  UseCycle = DefCycle - UseCycle + 1;
3115  if (UseCycle > 0) {
3116    if (LdmBypass) {
3117      // It's a variable_ops instruction so we can't use DefIdx here. Just use
3118      // first def operand.
3119      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3120                                          UseClass, UseIdx))
3121        --UseCycle;
3122    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3123                                               UseClass, UseIdx)) {
3124      --UseCycle;
3125    }
3126  }
3127
3128  return UseCycle;
3129}
3130
3131static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
3132                                           const MachineInstr *MI, unsigned Reg,
3133                                           unsigned &DefIdx, unsigned &Dist) {
3134  Dist = 0;
3135
3136  MachineBasicBlock::const_iterator I = MI; ++I;
3137  MachineBasicBlock::const_instr_iterator II =
3138    llvm::prior(I.getInstrIterator());
3139  assert(II->isInsideBundle() && "Empty bundle?");
3140
3141  int Idx = -1;
3142  while (II->isInsideBundle()) {
3143    Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3144    if (Idx != -1)
3145      break;
3146    --II;
3147    ++Dist;
3148  }
3149
3150  assert(Idx != -1 && "Cannot find bundled definition!");
3151  DefIdx = Idx;
3152  return II;
3153}
3154
3155static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
3156                                           const MachineInstr *MI, unsigned Reg,
3157                                           unsigned &UseIdx, unsigned &Dist) {
3158  Dist = 0;
3159
3160  MachineBasicBlock::const_instr_iterator II = MI; ++II;
3161  assert(II->isInsideBundle() && "Empty bundle?");
3162  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
3163
3164  // FIXME: This doesn't properly handle multiple uses.
3165  int Idx = -1;
3166  while (II != E && II->isInsideBundle()) {
3167    Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3168    if (Idx != -1)
3169      break;
3170    if (II->getOpcode() != ARM::t2IT)
3171      ++Dist;
3172    ++II;
3173  }
3174
3175  if (Idx == -1) {
3176    Dist = 0;
3177    return 0;
3178  }
3179
3180  UseIdx = Idx;
3181  return II;
3182}
3183
3184/// Return the number of cycles to add to (or subtract from) the static
3185/// itinerary based on the def opcode and alignment. The caller will ensure that
3186/// adjusted latency is at least one cycle.
3187static int adjustDefLatency(const ARMSubtarget &Subtarget,
3188                            const MachineInstr *DefMI,
3189                            const MCInstrDesc *DefMCID, unsigned DefAlign) {
3190  int Adjust = 0;
3191  if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
3192    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3193    // variants are one cycle cheaper.
3194    switch (DefMCID->getOpcode()) {
3195    default: break;
3196    case ARM::LDRrs:
3197    case ARM::LDRBrs: {
3198      unsigned ShOpVal = DefMI->getOperand(3).getImm();
3199      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3200      if (ShImm == 0 ||
3201          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3202        --Adjust;
3203      break;
3204    }
3205    case ARM::t2LDRs:
3206    case ARM::t2LDRBs:
3207    case ARM::t2LDRHs:
3208    case ARM::t2LDRSHs: {
3209      // Thumb2 mode: lsl only.
3210      unsigned ShAmt = DefMI->getOperand(3).getImm();
3211      if (ShAmt == 0 || ShAmt == 2)
3212        --Adjust;
3213      break;
3214    }
3215    }
3216  } else if (Subtarget.isSwift()) {
3217    // FIXME: Properly handle all of the latency adjustments for address
3218    // writeback.
3219    switch (DefMCID->getOpcode()) {
3220    default: break;
3221    case ARM::LDRrs:
3222    case ARM::LDRBrs: {
3223      unsigned ShOpVal = DefMI->getOperand(3).getImm();
3224      bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3225      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3226      if (!isSub &&
3227          (ShImm == 0 ||
3228           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3229            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3230        Adjust -= 2;
3231      else if (!isSub &&
3232               ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3233        --Adjust;
3234      break;
3235    }
3236    case ARM::t2LDRs:
3237    case ARM::t2LDRBs:
3238    case ARM::t2LDRHs:
3239    case ARM::t2LDRSHs: {
3240      // Thumb2 mode: lsl only.
3241      unsigned ShAmt = DefMI->getOperand(3).getImm();
3242      if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3243        Adjust -= 2;
3244      break;
3245    }
3246    }
3247  }
3248
3249  if (DefAlign < 8 && Subtarget.isLikeA9()) {
3250    switch (DefMCID->getOpcode()) {
3251    default: break;
3252    case ARM::VLD1q8:
3253    case ARM::VLD1q16:
3254    case ARM::VLD1q32:
3255    case ARM::VLD1q64:
3256    case ARM::VLD1q8wb_fixed:
3257    case ARM::VLD1q16wb_fixed:
3258    case ARM::VLD1q32wb_fixed:
3259    case ARM::VLD1q64wb_fixed:
3260    case ARM::VLD1q8wb_register:
3261    case ARM::VLD1q16wb_register:
3262    case ARM::VLD1q32wb_register:
3263    case ARM::VLD1q64wb_register:
3264    case ARM::VLD2d8:
3265    case ARM::VLD2d16:
3266    case ARM::VLD2d32:
3267    case ARM::VLD2q8:
3268    case ARM::VLD2q16:
3269    case ARM::VLD2q32:
3270    case ARM::VLD2d8wb_fixed:
3271    case ARM::VLD2d16wb_fixed:
3272    case ARM::VLD2d32wb_fixed:
3273    case ARM::VLD2q8wb_fixed:
3274    case ARM::VLD2q16wb_fixed:
3275    case ARM::VLD2q32wb_fixed:
3276    case ARM::VLD2d8wb_register:
3277    case ARM::VLD2d16wb_register:
3278    case ARM::VLD2d32wb_register:
3279    case ARM::VLD2q8wb_register:
3280    case ARM::VLD2q16wb_register:
3281    case ARM::VLD2q32wb_register:
3282    case ARM::VLD3d8:
3283    case ARM::VLD3d16:
3284    case ARM::VLD3d32:
3285    case ARM::VLD1d64T:
3286    case ARM::VLD3d8_UPD:
3287    case ARM::VLD3d16_UPD:
3288    case ARM::VLD3d32_UPD:
3289    case ARM::VLD1d64Twb_fixed:
3290    case ARM::VLD1d64Twb_register:
3291    case ARM::VLD3q8_UPD:
3292    case ARM::VLD3q16_UPD:
3293    case ARM::VLD3q32_UPD:
3294    case ARM::VLD4d8:
3295    case ARM::VLD4d16:
3296    case ARM::VLD4d32:
3297    case ARM::VLD1d64Q:
3298    case ARM::VLD4d8_UPD:
3299    case ARM::VLD4d16_UPD:
3300    case ARM::VLD4d32_UPD:
3301    case ARM::VLD1d64Qwb_fixed:
3302    case ARM::VLD1d64Qwb_register:
3303    case ARM::VLD4q8_UPD:
3304    case ARM::VLD4q16_UPD:
3305    case ARM::VLD4q32_UPD:
3306    case ARM::VLD1DUPq8:
3307    case ARM::VLD1DUPq16:
3308    case ARM::VLD1DUPq32:
3309    case ARM::VLD1DUPq8wb_fixed:
3310    case ARM::VLD1DUPq16wb_fixed:
3311    case ARM::VLD1DUPq32wb_fixed:
3312    case ARM::VLD1DUPq8wb_register:
3313    case ARM::VLD1DUPq16wb_register:
3314    case ARM::VLD1DUPq32wb_register:
3315    case ARM::VLD2DUPd8:
3316    case ARM::VLD2DUPd16:
3317    case ARM::VLD2DUPd32:
3318    case ARM::VLD2DUPd8wb_fixed:
3319    case ARM::VLD2DUPd16wb_fixed:
3320    case ARM::VLD2DUPd32wb_fixed:
3321    case ARM::VLD2DUPd8wb_register:
3322    case ARM::VLD2DUPd16wb_register:
3323    case ARM::VLD2DUPd32wb_register:
3324    case ARM::VLD4DUPd8:
3325    case ARM::VLD4DUPd16:
3326    case ARM::VLD4DUPd32:
3327    case ARM::VLD4DUPd8_UPD:
3328    case ARM::VLD4DUPd16_UPD:
3329    case ARM::VLD4DUPd32_UPD:
3330    case ARM::VLD1LNd8:
3331    case ARM::VLD1LNd16:
3332    case ARM::VLD1LNd32:
3333    case ARM::VLD1LNd8_UPD:
3334    case ARM::VLD1LNd16_UPD:
3335    case ARM::VLD1LNd32_UPD:
3336    case ARM::VLD2LNd8:
3337    case ARM::VLD2LNd16:
3338    case ARM::VLD2LNd32:
3339    case ARM::VLD2LNq16:
3340    case ARM::VLD2LNq32:
3341    case ARM::VLD2LNd8_UPD:
3342    case ARM::VLD2LNd16_UPD:
3343    case ARM::VLD2LNd32_UPD:
3344    case ARM::VLD2LNq16_UPD:
3345    case ARM::VLD2LNq32_UPD:
3346    case ARM::VLD4LNd8:
3347    case ARM::VLD4LNd16:
3348    case ARM::VLD4LNd32:
3349    case ARM::VLD4LNq16:
3350    case ARM::VLD4LNq32:
3351    case ARM::VLD4LNd8_UPD:
3352    case ARM::VLD4LNd16_UPD:
3353    case ARM::VLD4LNd32_UPD:
3354    case ARM::VLD4LNq16_UPD:
3355    case ARM::VLD4LNq32_UPD:
3356      // If the address is not 64-bit aligned, the latencies of these
3357      // instructions increases by one.
3358      ++Adjust;
3359      break;
3360    }
3361  }
3362  return Adjust;
3363}
3364
3365
3366
3367int
3368ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3369                                    const MachineInstr *DefMI, unsigned DefIdx,
3370                                    const MachineInstr *UseMI,
3371                                    unsigned UseIdx) const {
3372  // No operand latency. The caller may fall back to getInstrLatency.
3373  if (!ItinData || ItinData->isEmpty())
3374    return -1;
3375
3376  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
3377  unsigned Reg = DefMO.getReg();
3378  const MCInstrDesc *DefMCID = &DefMI->getDesc();
3379  const MCInstrDesc *UseMCID = &UseMI->getDesc();
3380
3381  unsigned DefAdj = 0;
3382  if (DefMI->isBundle()) {
3383    DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
3384    DefMCID = &DefMI->getDesc();
3385  }
3386  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
3387      DefMI->isRegSequence() || DefMI->isImplicitDef()) {
3388    return 1;
3389  }
3390
3391  unsigned UseAdj = 0;
3392  if (UseMI->isBundle()) {
3393    unsigned NewUseIdx;
3394    const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
3395                                                   Reg, NewUseIdx, UseAdj);
3396    if (!NewUseMI)
3397      return -1;
3398
3399    UseMI = NewUseMI;
3400    UseIdx = NewUseIdx;
3401    UseMCID = &UseMI->getDesc();
3402  }
3403
3404  if (Reg == ARM::CPSR) {
3405    if (DefMI->getOpcode() == ARM::FMSTAT) {
3406      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
3407      return Subtarget.isLikeA9() ? 1 : 20;
3408    }
3409
3410    // CPSR set and branch can be paired in the same cycle.
3411    if (UseMI->isBranch())
3412      return 0;
3413
3414    // Otherwise it takes the instruction latency (generally one).
3415    unsigned Latency = getInstrLatency(ItinData, DefMI);
3416
3417    // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
3418    // its uses. Instructions which are otherwise scheduled between them may
3419    // incur a code size penalty (not able to use the CPSR setting 16-bit
3420    // instructions).
3421    if (Latency > 0 && Subtarget.isThumb2()) {
3422      const MachineFunction *MF = DefMI->getParent()->getParent();
3423      if (MF->getFunction()->getAttributes().
3424            hasAttribute(AttributeSet::FunctionIndex,
3425                         Attribute::OptimizeForSize))
3426        --Latency;
3427    }
3428    return Latency;
3429  }
3430
3431  if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
3432    return -1;
3433
3434  unsigned DefAlign = DefMI->hasOneMemOperand()
3435    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
3436  unsigned UseAlign = UseMI->hasOneMemOperand()
3437    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
3438
3439  // Get the itinerary's latency if possible, and handle variable_ops.
3440  int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
3441                                  *UseMCID, UseIdx, UseAlign);
3442  // Unable to find operand latency. The caller may resort to getInstrLatency.
3443  if (Latency < 0)
3444    return Latency;
3445
3446  // Adjust for IT block position.
3447  int Adj = DefAdj + UseAdj;
3448
3449  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
3450  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
3451  if (Adj >= 0 || (int)Latency > -Adj) {
3452    return Latency + Adj;
3453  }
3454  // Return the itinerary latency, which may be zero but not less than zero.
3455  return Latency;
3456}
3457
3458int
3459ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3460                                    SDNode *DefNode, unsigned DefIdx,
3461                                    SDNode *UseNode, unsigned UseIdx) const {
3462  if (!DefNode->isMachineOpcode())
3463    return 1;
3464
3465  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
3466
3467  if (isZeroCost(DefMCID.Opcode))
3468    return 0;
3469
3470  if (!ItinData || ItinData->isEmpty())
3471    return DefMCID.mayLoad() ? 3 : 1;
3472
3473  if (!UseNode->isMachineOpcode()) {
3474    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
3475    if (Subtarget.isLikeA9() || Subtarget.isSwift())
3476      return Latency <= 2 ? 1 : Latency - 1;
3477    else
3478      return Latency <= 3 ? 1 : Latency - 2;
3479  }
3480
3481  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
3482  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
3483  unsigned DefAlign = !DefMN->memoperands_empty()
3484    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
3485  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
3486  unsigned UseAlign = !UseMN->memoperands_empty()
3487    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
3488  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
3489                                  UseMCID, UseIdx, UseAlign);
3490
3491  if (Latency > 1 &&
3492      (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
3493    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3494    // variants are one cycle cheaper.
3495    switch (DefMCID.getOpcode()) {
3496    default: break;
3497    case ARM::LDRrs:
3498    case ARM::LDRBrs: {
3499      unsigned ShOpVal =
3500        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3501      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3502      if (ShImm == 0 ||
3503          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3504        --Latency;
3505      break;
3506    }
3507    case ARM::t2LDRs:
3508    case ARM::t2LDRBs:
3509    case ARM::t2LDRHs:
3510    case ARM::t2LDRSHs: {
3511      // Thumb2 mode: lsl only.
3512      unsigned ShAmt =
3513        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3514      if (ShAmt == 0 || ShAmt == 2)
3515        --Latency;
3516      break;
3517    }
3518    }
3519  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
3520    // FIXME: Properly handle all of the latency adjustments for address
3521    // writeback.
3522    switch (DefMCID.getOpcode()) {
3523    default: break;
3524    case ARM::LDRrs:
3525    case ARM::LDRBrs: {
3526      unsigned ShOpVal =
3527        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3528      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3529      if (ShImm == 0 ||
3530          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3531           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3532        Latency -= 2;
3533      else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3534        --Latency;
3535      break;
3536    }
3537    case ARM::t2LDRs:
3538    case ARM::t2LDRBs:
3539    case ARM::t2LDRHs:
3540    case ARM::t2LDRSHs: {
3541      // Thumb2 mode: lsl 0-3 only.
3542      Latency -= 2;
3543      break;
3544    }
3545    }
3546  }
3547
3548  if (DefAlign < 8 && Subtarget.isLikeA9())
3549    switch (DefMCID.getOpcode()) {
3550    default: break;
3551    case ARM::VLD1q8:
3552    case ARM::VLD1q16:
3553    case ARM::VLD1q32:
3554    case ARM::VLD1q64:
3555    case ARM::VLD1q8wb_register:
3556    case ARM::VLD1q16wb_register:
3557    case ARM::VLD1q32wb_register:
3558    case ARM::VLD1q64wb_register:
3559    case ARM::VLD1q8wb_fixed:
3560    case ARM::VLD1q16wb_fixed:
3561    case ARM::VLD1q32wb_fixed:
3562    case ARM::VLD1q64wb_fixed:
3563    case ARM::VLD2d8:
3564    case ARM::VLD2d16:
3565    case ARM::VLD2d32:
3566    case ARM::VLD2q8Pseudo:
3567    case ARM::VLD2q16Pseudo:
3568    case ARM::VLD2q32Pseudo:
3569    case ARM::VLD2d8wb_fixed:
3570    case ARM::VLD2d16wb_fixed:
3571    case ARM::VLD2d32wb_fixed:
3572    case ARM::VLD2q8PseudoWB_fixed:
3573    case ARM::VLD2q16PseudoWB_fixed:
3574    case ARM::VLD2q32PseudoWB_fixed:
3575    case ARM::VLD2d8wb_register:
3576    case ARM::VLD2d16wb_register:
3577    case ARM::VLD2d32wb_register:
3578    case ARM::VLD2q8PseudoWB_register:
3579    case ARM::VLD2q16PseudoWB_register:
3580    case ARM::VLD2q32PseudoWB_register:
3581    case ARM::VLD3d8Pseudo:
3582    case ARM::VLD3d16Pseudo:
3583    case ARM::VLD3d32Pseudo:
3584    case ARM::VLD1d64TPseudo:
3585    case ARM::VLD3d8Pseudo_UPD:
3586    case ARM::VLD3d16Pseudo_UPD:
3587    case ARM::VLD3d32Pseudo_UPD:
3588    case ARM::VLD3q8Pseudo_UPD:
3589    case ARM::VLD3q16Pseudo_UPD:
3590    case ARM::VLD3q32Pseudo_UPD:
3591    case ARM::VLD3q8oddPseudo:
3592    case ARM::VLD3q16oddPseudo:
3593    case ARM::VLD3q32oddPseudo:
3594    case ARM::VLD3q8oddPseudo_UPD:
3595    case ARM::VLD3q16oddPseudo_UPD:
3596    case ARM::VLD3q32oddPseudo_UPD:
3597    case ARM::VLD4d8Pseudo:
3598    case ARM::VLD4d16Pseudo:
3599    case ARM::VLD4d32Pseudo:
3600    case ARM::VLD1d64QPseudo:
3601    case ARM::VLD4d8Pseudo_UPD:
3602    case ARM::VLD4d16Pseudo_UPD:
3603    case ARM::VLD4d32Pseudo_UPD:
3604    case ARM::VLD4q8Pseudo_UPD:
3605    case ARM::VLD4q16Pseudo_UPD:
3606    case ARM::VLD4q32Pseudo_UPD:
3607    case ARM::VLD4q8oddPseudo:
3608    case ARM::VLD4q16oddPseudo:
3609    case ARM::VLD4q32oddPseudo:
3610    case ARM::VLD4q8oddPseudo_UPD:
3611    case ARM::VLD4q16oddPseudo_UPD:
3612    case ARM::VLD4q32oddPseudo_UPD:
3613    case ARM::VLD1DUPq8:
3614    case ARM::VLD1DUPq16:
3615    case ARM::VLD1DUPq32:
3616    case ARM::VLD1DUPq8wb_fixed:
3617    case ARM::VLD1DUPq16wb_fixed:
3618    case ARM::VLD1DUPq32wb_fixed:
3619    case ARM::VLD1DUPq8wb_register:
3620    case ARM::VLD1DUPq16wb_register:
3621    case ARM::VLD1DUPq32wb_register:
3622    case ARM::VLD2DUPd8:
3623    case ARM::VLD2DUPd16:
3624    case ARM::VLD2DUPd32:
3625    case ARM::VLD2DUPd8wb_fixed:
3626    case ARM::VLD2DUPd16wb_fixed:
3627    case ARM::VLD2DUPd32wb_fixed:
3628    case ARM::VLD2DUPd8wb_register:
3629    case ARM::VLD2DUPd16wb_register:
3630    case ARM::VLD2DUPd32wb_register:
3631    case ARM::VLD4DUPd8Pseudo:
3632    case ARM::VLD4DUPd16Pseudo:
3633    case ARM::VLD4DUPd32Pseudo:
3634    case ARM::VLD4DUPd8Pseudo_UPD:
3635    case ARM::VLD4DUPd16Pseudo_UPD:
3636    case ARM::VLD4DUPd32Pseudo_UPD:
3637    case ARM::VLD1LNq8Pseudo:
3638    case ARM::VLD1LNq16Pseudo:
3639    case ARM::VLD1LNq32Pseudo:
3640    case ARM::VLD1LNq8Pseudo_UPD:
3641    case ARM::VLD1LNq16Pseudo_UPD:
3642    case ARM::VLD1LNq32Pseudo_UPD:
3643    case ARM::VLD2LNd8Pseudo:
3644    case ARM::VLD2LNd16Pseudo:
3645    case ARM::VLD2LNd32Pseudo:
3646    case ARM::VLD2LNq16Pseudo:
3647    case ARM::VLD2LNq32Pseudo:
3648    case ARM::VLD2LNd8Pseudo_UPD:
3649    case ARM::VLD2LNd16Pseudo_UPD:
3650    case ARM::VLD2LNd32Pseudo_UPD:
3651    case ARM::VLD2LNq16Pseudo_UPD:
3652    case ARM::VLD2LNq32Pseudo_UPD:
3653    case ARM::VLD4LNd8Pseudo:
3654    case ARM::VLD4LNd16Pseudo:
3655    case ARM::VLD4LNd32Pseudo:
3656    case ARM::VLD4LNq16Pseudo:
3657    case ARM::VLD4LNq32Pseudo:
3658    case ARM::VLD4LNd8Pseudo_UPD:
3659    case ARM::VLD4LNd16Pseudo_UPD:
3660    case ARM::VLD4LNd32Pseudo_UPD:
3661    case ARM::VLD4LNq16Pseudo_UPD:
3662    case ARM::VLD4LNq32Pseudo_UPD:
3663      // If the address is not 64-bit aligned, the latencies of these
3664      // instructions increases by one.
3665      ++Latency;
3666      break;
3667    }
3668
3669  return Latency;
3670}
3671
3672unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
3673                                           const MachineInstr *MI,
3674                                           unsigned *PredCost) const {
3675  if (MI->isCopyLike() || MI->isInsertSubreg() ||
3676      MI->isRegSequence() || MI->isImplicitDef())
3677    return 1;
3678
3679  // An instruction scheduler typically runs on unbundled instructions, however
3680  // other passes may query the latency of a bundled instruction.
3681  if (MI->isBundle()) {
3682    unsigned Latency = 0;
3683    MachineBasicBlock::const_instr_iterator I = MI;
3684    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
3685    while (++I != E && I->isInsideBundle()) {
3686      if (I->getOpcode() != ARM::t2IT)
3687        Latency += getInstrLatency(ItinData, I, PredCost);
3688    }
3689    return Latency;
3690  }
3691
3692  const MCInstrDesc &MCID = MI->getDesc();
3693  if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
3694    // When predicated, CPSR is an additional source operand for CPSR updating
3695    // instructions, this apparently increases their latencies.
3696    *PredCost = 1;
3697  }
3698  // Be sure to call getStageLatency for an empty itinerary in case it has a
3699  // valid MinLatency property.
3700  if (!ItinData)
3701    return MI->mayLoad() ? 3 : 1;
3702
3703  unsigned Class = MCID.getSchedClass();
3704
3705  // For instructions with variable uops, use uops as latency.
3706  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
3707    return getNumMicroOps(ItinData, MI);
3708
3709  // For the common case, fall back on the itinerary's latency.
3710  unsigned Latency = ItinData->getStageLatency(Class);
3711
3712  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
3713  unsigned DefAlign = MI->hasOneMemOperand()
3714    ? (*MI->memoperands_begin())->getAlignment() : 0;
3715  int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
3716  if (Adj >= 0 || (int)Latency > -Adj) {
3717    return Latency + Adj;
3718  }
3719  return Latency;
3720}
3721
3722int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
3723                                      SDNode *Node) const {
3724  if (!Node->isMachineOpcode())
3725    return 1;
3726
3727  if (!ItinData || ItinData->isEmpty())
3728    return 1;
3729
3730  unsigned Opcode = Node->getMachineOpcode();
3731  switch (Opcode) {
3732  default:
3733    return ItinData->getStageLatency(get(Opcode).getSchedClass());
3734  case ARM::VLDMQIA:
3735  case ARM::VSTMQIA:
3736    return 2;
3737  }
3738}
3739
3740bool ARMBaseInstrInfo::
3741hasHighOperandLatency(const InstrItineraryData *ItinData,
3742                      const MachineRegisterInfo *MRI,
3743                      const MachineInstr *DefMI, unsigned DefIdx,
3744                      const MachineInstr *UseMI, unsigned UseIdx) const {
3745  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
3746  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
3747  if (Subtarget.isCortexA8() &&
3748      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
3749    // CortexA8 VFP instructions are not pipelined.
3750    return true;
3751
3752  // Hoist VFP / NEON instructions with 4 or higher latency.
3753  int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
3754  if (Latency < 0)
3755    Latency = getInstrLatency(ItinData, DefMI);
3756  if (Latency <= 3)
3757    return false;
3758  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
3759         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
3760}
3761
3762bool ARMBaseInstrInfo::
3763hasLowDefLatency(const InstrItineraryData *ItinData,
3764                 const MachineInstr *DefMI, unsigned DefIdx) const {
3765  if (!ItinData || ItinData->isEmpty())
3766    return false;
3767
3768  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
3769  if (DDomain == ARMII::DomainGeneral) {
3770    unsigned DefClass = DefMI->getDesc().getSchedClass();
3771    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3772    return (DefCycle != -1 && DefCycle <= 2);
3773  }
3774  return false;
3775}
3776
3777bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
3778                                         StringRef &ErrInfo) const {
3779  if (convertAddSubFlagsOpcode(MI->getOpcode())) {
3780    ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
3781    return false;
3782  }
3783  return true;
3784}
3785
3786bool
3787ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
3788                                     unsigned &AddSubOpc,
3789                                     bool &NegAcc, bool &HasLane) const {
3790  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
3791  if (I == MLxEntryMap.end())
3792    return false;
3793
3794  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
3795  MulOpc = Entry.MulOpc;
3796  AddSubOpc = Entry.AddSubOpc;
3797  NegAcc = Entry.NegAcc;
3798  HasLane = Entry.HasLane;
3799  return true;
3800}
3801
3802//===----------------------------------------------------------------------===//
3803// Execution domains.
3804//===----------------------------------------------------------------------===//
3805//
3806// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
3807// and some can go down both.  The vmov instructions go down the VFP pipeline,
3808// but they can be changed to vorr equivalents that are executed by the NEON
3809// pipeline.
3810//
3811// We use the following execution domain numbering:
3812//
3813enum ARMExeDomain {
3814  ExeGeneric = 0,
3815  ExeVFP = 1,
3816  ExeNEON = 2
3817};
3818//
3819// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
3820//
3821std::pair<uint16_t, uint16_t>
3822ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
3823  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
3824  // if they are not predicated.
3825  if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
3826    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
3827
3828  // CortexA9 is particularly picky about mixing the two and wants these
3829  // converted.
3830  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
3831      (MI->getOpcode() == ARM::VMOVRS ||
3832       MI->getOpcode() == ARM::VMOVSR ||
3833       MI->getOpcode() == ARM::VMOVS))
3834    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
3835
3836  // No other instructions can be swizzled, so just determine their domain.
3837  unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
3838
3839  if (Domain & ARMII::DomainNEON)
3840    return std::make_pair(ExeNEON, 0);
3841
3842  // Certain instructions can go either way on Cortex-A8.
3843  // Treat them as NEON instructions.
3844  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
3845    return std::make_pair(ExeNEON, 0);
3846
3847  if (Domain & ARMII::DomainVFP)
3848    return std::make_pair(ExeVFP, 0);
3849
3850  return std::make_pair(ExeGeneric, 0);
3851}
3852
3853static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
3854                                            unsigned SReg, unsigned &Lane) {
3855  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
3856  Lane = 0;
3857
3858  if (DReg != ARM::NoRegister)
3859   return DReg;
3860
3861  Lane = 1;
3862  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
3863
3864  assert(DReg && "S-register with no D super-register?");
3865  return DReg;
3866}
3867
3868/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
3869/// set ImplicitSReg to a register number that must be marked as implicit-use or
3870/// zero if no register needs to be defined as implicit-use.
3871///
3872/// If the function cannot determine if an SPR should be marked implicit use or
3873/// not, it returns false.
3874///
3875/// This function handles cases where an instruction is being modified from taking
3876/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
3877/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
3878/// lane of the DPR).
3879///
3880/// If the other SPR is defined, an implicit-use of it should be added. Else,
3881/// (including the case where the DPR itself is defined), it should not.
3882///
3883static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
3884                                       MachineInstr *MI,
3885                                       unsigned DReg, unsigned Lane,
3886                                       unsigned &ImplicitSReg) {
3887  // If the DPR is defined or used already, the other SPR lane will be chained
3888  // correctly, so there is nothing to be done.
3889  if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
3890    ImplicitSReg = 0;
3891    return true;
3892  }
3893
3894  // Otherwise we need to go searching to see if the SPR is set explicitly.
3895  ImplicitSReg = TRI->getSubReg(DReg,
3896                                (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
3897  MachineBasicBlock::LivenessQueryResult LQR =
3898    MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
3899
3900  if (LQR == MachineBasicBlock::LQR_Live)
3901    return true;
3902  else if (LQR == MachineBasicBlock::LQR_Unknown)
3903    return false;
3904
3905  // If the register is known not to be live, there is no need to add an
3906  // implicit-use.
3907  ImplicitSReg = 0;
3908  return true;
3909}
3910
3911void
3912ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
3913  unsigned DstReg, SrcReg, DReg;
3914  unsigned Lane;
3915  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
3916  const TargetRegisterInfo *TRI = &getRegisterInfo();
3917  switch (MI->getOpcode()) {
3918    default:
3919      llvm_unreachable("cannot handle opcode!");
3920      break;
3921    case ARM::VMOVD:
3922      if (Domain != ExeNEON)
3923        break;
3924
3925      // Zap the predicate operands.
3926      assert(!isPredicated(MI) && "Cannot predicate a VORRd");
3927
3928      // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
3929      DstReg = MI->getOperand(0).getReg();
3930      SrcReg = MI->getOperand(1).getReg();
3931
3932      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
3933        MI->RemoveOperand(i-1);
3934
3935      // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
3936      MI->setDesc(get(ARM::VORRd));
3937      AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
3938                        .addReg(SrcReg)
3939                        .addReg(SrcReg));
3940      break;
3941    case ARM::VMOVRS:
3942      if (Domain != ExeNEON)
3943        break;
3944      assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
3945
3946      // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
3947      DstReg = MI->getOperand(0).getReg();
3948      SrcReg = MI->getOperand(1).getReg();
3949
3950      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
3951        MI->RemoveOperand(i-1);
3952
3953      DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
3954
3955      // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
3956      // Note that DSrc has been widened and the other lane may be undef, which
3957      // contaminates the entire register.
3958      MI->setDesc(get(ARM::VGETLNi32));
3959      AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
3960                        .addReg(DReg, RegState::Undef)
3961                        .addImm(Lane));
3962
3963      // The old source should be an implicit use, otherwise we might think it
3964      // was dead before here.
3965      MIB.addReg(SrcReg, RegState::Implicit);
3966      break;
3967    case ARM::VMOVSR: {
3968      if (Domain != ExeNEON)
3969        break;
3970      assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
3971
3972      // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
3973      DstReg = MI->getOperand(0).getReg();
3974      SrcReg = MI->getOperand(1).getReg();
3975
3976      DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
3977
3978      unsigned ImplicitSReg;
3979      if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
3980        break;
3981
3982      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
3983        MI->RemoveOperand(i-1);
3984
3985      // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
3986      // Again DDst may be undefined at the beginning of this instruction.
3987      MI->setDesc(get(ARM::VSETLNi32));
3988      MIB.addReg(DReg, RegState::Define)
3989         .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
3990         .addReg(SrcReg)
3991         .addImm(Lane);
3992      AddDefaultPred(MIB);
3993
3994      // The narrower destination must be marked as set to keep previous chains
3995      // in place.
3996      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
3997      if (ImplicitSReg != 0)
3998        MIB.addReg(ImplicitSReg, RegState::Implicit);
3999      break;
4000    }
4001    case ARM::VMOVS: {
4002      if (Domain != ExeNEON)
4003        break;
4004
4005      // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4006      DstReg = MI->getOperand(0).getReg();
4007      SrcReg = MI->getOperand(1).getReg();
4008
4009      unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4010      DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4011      DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4012
4013      unsigned ImplicitSReg;
4014      if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4015        break;
4016
4017      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4018        MI->RemoveOperand(i-1);
4019
4020      if (DSrc == DDst) {
4021        // Destination can be:
4022        //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4023        MI->setDesc(get(ARM::VDUPLN32d));
4024        MIB.addReg(DDst, RegState::Define)
4025           .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
4026           .addImm(SrcLane);
4027        AddDefaultPred(MIB);
4028
4029        // Neither the source or the destination are naturally represented any
4030        // more, so add them in manually.
4031        MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4032        MIB.addReg(SrcReg, RegState::Implicit);
4033        if (ImplicitSReg != 0)
4034          MIB.addReg(ImplicitSReg, RegState::Implicit);
4035        break;
4036      }
4037
4038      // In general there's no single instruction that can perform an S <-> S
4039      // move in NEON space, but a pair of VEXT instructions *can* do the
4040      // job. It turns out that the VEXTs needed will only use DSrc once, with
4041      // the position based purely on the combination of lane-0 and lane-1
4042      // involved. For example
4043      //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
4044      //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
4045      //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
4046      //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
4047      //
4048      // Pattern of the MachineInstrs is:
4049      //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4050      MachineInstrBuilder NewMIB;
4051      NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
4052                       get(ARM::VEXTd32), DDst);
4053
4054      // On the first instruction, both DSrc and DDst may be <undef> if present.
4055      // Specifically when the original instruction didn't have them as an
4056      // <imp-use>.
4057      unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4058      bool CurUndef = !MI->readsRegister(CurReg, TRI);
4059      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4060
4061      CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4062      CurUndef = !MI->readsRegister(CurReg, TRI);
4063      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4064
4065      NewMIB.addImm(1);
4066      AddDefaultPred(NewMIB);
4067
4068      if (SrcLane == DstLane)
4069        NewMIB.addReg(SrcReg, RegState::Implicit);
4070
4071      MI->setDesc(get(ARM::VEXTd32));
4072      MIB.addReg(DDst, RegState::Define);
4073
4074      // On the second instruction, DDst has definitely been defined above, so
4075      // it is not <undef>. DSrc, if present, can be <undef> as above.
4076      CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4077      CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
4078      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4079
4080      CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4081      CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
4082      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4083
4084      MIB.addImm(1);
4085      AddDefaultPred(MIB);
4086
4087      if (SrcLane != DstLane)
4088        MIB.addReg(SrcReg, RegState::Implicit);
4089
4090      // As before, the original destination is no longer represented, add it
4091      // implicitly.
4092      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4093      if (ImplicitSReg != 0)
4094        MIB.addReg(ImplicitSReg, RegState::Implicit);
4095      break;
4096    }
4097  }
4098
4099}
4100
4101//===----------------------------------------------------------------------===//
4102// Partial register updates
4103//===----------------------------------------------------------------------===//
4104//
4105// Swift renames NEON registers with 64-bit granularity.  That means any
4106// instruction writing an S-reg implicitly reads the containing D-reg.  The
4107// problem is mostly avoided by translating f32 operations to v2f32 operations
4108// on D-registers, but f32 loads are still a problem.
4109//
4110// These instructions can load an f32 into a NEON register:
4111//
4112// VLDRS - Only writes S, partial D update.
4113// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4114// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4115//
4116// FCONSTD can be used as a dependency-breaking instruction.
4117unsigned ARMBaseInstrInfo::
4118getPartialRegUpdateClearance(const MachineInstr *MI,
4119                             unsigned OpNum,
4120                             const TargetRegisterInfo *TRI) const {
4121  if (!SwiftPartialUpdateClearance ||
4122      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
4123    return 0;
4124
4125  assert(TRI && "Need TRI instance");
4126
4127  const MachineOperand &MO = MI->getOperand(OpNum);
4128  if (MO.readsReg())
4129    return 0;
4130  unsigned Reg = MO.getReg();
4131  int UseOp = -1;
4132
4133  switch(MI->getOpcode()) {
4134    // Normal instructions writing only an S-register.
4135  case ARM::VLDRS:
4136  case ARM::FCONSTS:
4137  case ARM::VMOVSR:
4138  case ARM::VMOVv8i8:
4139  case ARM::VMOVv4i16:
4140  case ARM::VMOVv2i32:
4141  case ARM::VMOVv2f32:
4142  case ARM::VMOVv1i64:
4143    UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
4144    break;
4145
4146    // Explicitly reads the dependency.
4147  case ARM::VLD1LNd32:
4148    UseOp = 3;
4149    break;
4150  default:
4151    return 0;
4152  }
4153
4154  // If this instruction actually reads a value from Reg, there is no unwanted
4155  // dependency.
4156  if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
4157    return 0;
4158
4159  // We must be able to clobber the whole D-reg.
4160  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
4161    // Virtual register must be a foo:ssub_0<def,undef> operand.
4162    if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
4163      return 0;
4164  } else if (ARM::SPRRegClass.contains(Reg)) {
4165    // Physical register: MI must define the full D-reg.
4166    unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4167                                             &ARM::DPRRegClass);
4168    if (!DReg || !MI->definesRegister(DReg, TRI))
4169      return 0;
4170  }
4171
4172  // MI has an unwanted D-register dependency.
4173  // Avoid defs in the previous N instructrions.
4174  return SwiftPartialUpdateClearance;
4175}
4176
4177// Break a partial register dependency after getPartialRegUpdateClearance
4178// returned non-zero.
4179void ARMBaseInstrInfo::
4180breakPartialRegDependency(MachineBasicBlock::iterator MI,
4181                          unsigned OpNum,
4182                          const TargetRegisterInfo *TRI) const {
4183  assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
4184  assert(TRI && "Need TRI instance");
4185
4186  const MachineOperand &MO = MI->getOperand(OpNum);
4187  unsigned Reg = MO.getReg();
4188  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
4189         "Can't break virtual register dependencies.");
4190  unsigned DReg = Reg;
4191
4192  // If MI defines an S-reg, find the corresponding D super-register.
4193  if (ARM::SPRRegClass.contains(Reg)) {
4194    DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4195    assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4196  }
4197
4198  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4199  assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4200
4201  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4202  // the full D-register by loading the same value to both lanes.  The
4203  // instruction is micro-coded with 2 uops, so don't do this until we can
4204  // properly schedule micro-coded instructions.  The dispatcher stalls cause
4205  // too big regressions.
4206
4207  // Insert the dependency-breaking FCONSTD before MI.
4208  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4209  AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
4210                         get(ARM::FCONSTD), DReg).addImm(96));
4211  MI->addRegisterKilled(DReg, TRI, true);
4212}
4213
4214bool ARMBaseInstrInfo::hasNOP() const {
4215  return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
4216}
4217
4218bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
4219  if (MI->getNumOperands() < 4)
4220    return true;
4221  unsigned ShOpVal = MI->getOperand(3).getImm();
4222  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4223  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4224  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4225      ((ShImm == 1 || ShImm == 2) &&
4226       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4227    return true;
4228
4229  return false;
4230}
4231