ARMBaseInstrInfo.cpp revision 8239daf7c83a65a189c352cce3191cdc3bbfe151
1//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the Base ARM implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMBaseInstrInfo.h"
15#include "ARM.h"
16#include "ARMAddressingModes.h"
17#include "ARMConstantPoolValue.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMRegisterInfo.h"
20#include "ARMGenInstrInfo.inc"
21#include "llvm/Constants.h"
22#include "llvm/Function.h"
23#include "llvm/GlobalValue.h"
24#include "llvm/CodeGen/LiveVariables.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineJumpTableInfo.h"
29#include "llvm/CodeGen/MachineMemOperand.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/PseudoSourceValue.h"
32#include "llvm/MC/MCAsmInfo.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/ADT/STLExtras.h"
37using namespace llvm;
38
39static cl::opt<bool>
40EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
41               cl::desc("Enable ARM 2-addr to 3-addr conv"));
42
43ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
44  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
45    Subtarget(STI) {
46}
47
48MachineInstr *
49ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
50                                        MachineBasicBlock::iterator &MBBI,
51                                        LiveVariables *LV) const {
52  // FIXME: Thumb2 support.
53
54  if (!EnableARM3Addr)
55    return NULL;
56
57  MachineInstr *MI = MBBI;
58  MachineFunction &MF = *MI->getParent()->getParent();
59  uint64_t TSFlags = MI->getDesc().TSFlags;
60  bool isPre = false;
61  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
62  default: return NULL;
63  case ARMII::IndexModePre:
64    isPre = true;
65    break;
66  case ARMII::IndexModePost:
67    break;
68  }
69
70  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
71  // operation.
72  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
73  if (MemOpc == 0)
74    return NULL;
75
76  MachineInstr *UpdateMI = NULL;
77  MachineInstr *MemMI = NULL;
78  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
79  const TargetInstrDesc &TID = MI->getDesc();
80  unsigned NumOps = TID.getNumOperands();
81  bool isLoad = !TID.mayStore();
82  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
83  const MachineOperand &Base = MI->getOperand(2);
84  const MachineOperand &Offset = MI->getOperand(NumOps-3);
85  unsigned WBReg = WB.getReg();
86  unsigned BaseReg = Base.getReg();
87  unsigned OffReg = Offset.getReg();
88  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
89  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
90  switch (AddrMode) {
91  default:
92    assert(false && "Unknown indexed op!");
93    return NULL;
94  case ARMII::AddrMode2: {
95    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
96    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
97    if (OffReg == 0) {
98      if (ARM_AM::getSOImmVal(Amt) == -1)
99        // Can't encode it in a so_imm operand. This transformation will
100        // add more than 1 instruction. Abandon!
101        return NULL;
102      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
103                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
104        .addReg(BaseReg).addImm(Amt)
105        .addImm(Pred).addReg(0).addReg(0);
106    } else if (Amt != 0) {
107      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
108      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
109      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
110                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
111        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
112        .addImm(Pred).addReg(0).addReg(0);
113    } else
114      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
115                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
116        .addReg(BaseReg).addReg(OffReg)
117        .addImm(Pred).addReg(0).addReg(0);
118    break;
119  }
120  case ARMII::AddrMode3 : {
121    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
122    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
123    if (OffReg == 0)
124      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
125      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
126                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
127        .addReg(BaseReg).addImm(Amt)
128        .addImm(Pred).addReg(0).addReg(0);
129    else
130      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
131                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
132        .addReg(BaseReg).addReg(OffReg)
133        .addImm(Pred).addReg(0).addReg(0);
134    break;
135  }
136  }
137
138  std::vector<MachineInstr*> NewMIs;
139  if (isPre) {
140    if (isLoad)
141      MemMI = BuildMI(MF, MI->getDebugLoc(),
142                      get(MemOpc), MI->getOperand(0).getReg())
143        .addReg(WBReg).addImm(0).addImm(Pred);
144    else
145      MemMI = BuildMI(MF, MI->getDebugLoc(),
146                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
147        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
148    NewMIs.push_back(MemMI);
149    NewMIs.push_back(UpdateMI);
150  } else {
151    if (isLoad)
152      MemMI = BuildMI(MF, MI->getDebugLoc(),
153                      get(MemOpc), MI->getOperand(0).getReg())
154        .addReg(BaseReg).addImm(0).addImm(Pred);
155    else
156      MemMI = BuildMI(MF, MI->getDebugLoc(),
157                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
158        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
159    if (WB.isDead())
160      UpdateMI->getOperand(0).setIsDead();
161    NewMIs.push_back(UpdateMI);
162    NewMIs.push_back(MemMI);
163  }
164
165  // Transfer LiveVariables states, kill / dead info.
166  if (LV) {
167    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
168      MachineOperand &MO = MI->getOperand(i);
169      if (MO.isReg() && MO.getReg() &&
170          TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
171        unsigned Reg = MO.getReg();
172
173        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
174        if (MO.isDef()) {
175          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
176          if (MO.isDead())
177            LV->addVirtualRegisterDead(Reg, NewMI);
178        }
179        if (MO.isUse() && MO.isKill()) {
180          for (unsigned j = 0; j < 2; ++j) {
181            // Look at the two new MI's in reverse order.
182            MachineInstr *NewMI = NewMIs[j];
183            if (!NewMI->readsRegister(Reg))
184              continue;
185            LV->addVirtualRegisterKilled(Reg, NewMI);
186            if (VI.removeKill(MI))
187              VI.Kills.push_back(NewMI);
188            break;
189          }
190        }
191      }
192    }
193  }
194
195  MFI->insert(MBBI, NewMIs[1]);
196  MFI->insert(MBBI, NewMIs[0]);
197  return NewMIs[0];
198}
199
200bool
201ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
202                                        MachineBasicBlock::iterator MI,
203                                        const std::vector<CalleeSavedInfo> &CSI,
204                                        const TargetRegisterInfo *TRI) const {
205  if (CSI.empty())
206    return false;
207
208  DebugLoc DL;
209  if (MI != MBB.end()) DL = MI->getDebugLoc();
210
211  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
212    unsigned Reg = CSI[i].getReg();
213    bool isKill = true;
214
215    // Add the callee-saved register as live-in unless it's LR and
216    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
217    // then it's already added to the function and entry block live-in sets.
218    if (Reg == ARM::LR) {
219      MachineFunction &MF = *MBB.getParent();
220      if (MF.getFrameInfo()->isReturnAddressTaken() &&
221          MF.getRegInfo().isLiveIn(Reg))
222        isKill = false;
223    }
224
225    if (isKill)
226      MBB.addLiveIn(Reg);
227
228    // Insert the spill to the stack frame. The register is killed at the spill
229    //
230    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
231    storeRegToStackSlot(MBB, MI, Reg, isKill,
232                        CSI[i].getFrameIdx(), RC, TRI);
233  }
234  return true;
235}
236
237// Branch analysis.
238bool
239ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
240                                MachineBasicBlock *&FBB,
241                                SmallVectorImpl<MachineOperand> &Cond,
242                                bool AllowModify) const {
243  // If the block has no terminators, it just falls into the block after it.
244  MachineBasicBlock::iterator I = MBB.end();
245  if (I == MBB.begin())
246    return false;
247  --I;
248  while (I->isDebugValue()) {
249    if (I == MBB.begin())
250      return false;
251    --I;
252  }
253  if (!isUnpredicatedTerminator(I))
254    return false;
255
256  // Get the last instruction in the block.
257  MachineInstr *LastInst = I;
258
259  // If there is only one terminator instruction, process it.
260  unsigned LastOpc = LastInst->getOpcode();
261  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
262    if (isUncondBranchOpcode(LastOpc)) {
263      TBB = LastInst->getOperand(0).getMBB();
264      return false;
265    }
266    if (isCondBranchOpcode(LastOpc)) {
267      // Block ends with fall-through condbranch.
268      TBB = LastInst->getOperand(0).getMBB();
269      Cond.push_back(LastInst->getOperand(1));
270      Cond.push_back(LastInst->getOperand(2));
271      return false;
272    }
273    return true;  // Can't handle indirect branch.
274  }
275
276  // Get the instruction before it if it is a terminator.
277  MachineInstr *SecondLastInst = I;
278  unsigned SecondLastOpc = SecondLastInst->getOpcode();
279
280  // If AllowModify is true and the block ends with two or more unconditional
281  // branches, delete all but the first unconditional branch.
282  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
283    while (isUncondBranchOpcode(SecondLastOpc)) {
284      LastInst->eraseFromParent();
285      LastInst = SecondLastInst;
286      LastOpc = LastInst->getOpcode();
287      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
288        // Return now the only terminator is an unconditional branch.
289        TBB = LastInst->getOperand(0).getMBB();
290        return false;
291      } else {
292        SecondLastInst = I;
293        SecondLastOpc = SecondLastInst->getOpcode();
294      }
295    }
296  }
297
298  // If there are three terminators, we don't know what sort of block this is.
299  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
300    return true;
301
302  // If the block ends with a B and a Bcc, handle it.
303  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
304    TBB =  SecondLastInst->getOperand(0).getMBB();
305    Cond.push_back(SecondLastInst->getOperand(1));
306    Cond.push_back(SecondLastInst->getOperand(2));
307    FBB = LastInst->getOperand(0).getMBB();
308    return false;
309  }
310
311  // If the block ends with two unconditional branches, handle it.  The second
312  // one is not executed, so remove it.
313  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
314    TBB = SecondLastInst->getOperand(0).getMBB();
315    I = LastInst;
316    if (AllowModify)
317      I->eraseFromParent();
318    return false;
319  }
320
321  // ...likewise if it ends with a branch table followed by an unconditional
322  // branch. The branch folder can create these, and we must get rid of them for
323  // correctness of Thumb constant islands.
324  if ((isJumpTableBranchOpcode(SecondLastOpc) ||
325       isIndirectBranchOpcode(SecondLastOpc)) &&
326      isUncondBranchOpcode(LastOpc)) {
327    I = LastInst;
328    if (AllowModify)
329      I->eraseFromParent();
330    return true;
331  }
332
333  // Otherwise, can't handle this.
334  return true;
335}
336
337
338unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
339  MachineBasicBlock::iterator I = MBB.end();
340  if (I == MBB.begin()) return 0;
341  --I;
342  while (I->isDebugValue()) {
343    if (I == MBB.begin())
344      return 0;
345    --I;
346  }
347  if (!isUncondBranchOpcode(I->getOpcode()) &&
348      !isCondBranchOpcode(I->getOpcode()))
349    return 0;
350
351  // Remove the branch.
352  I->eraseFromParent();
353
354  I = MBB.end();
355
356  if (I == MBB.begin()) return 1;
357  --I;
358  if (!isCondBranchOpcode(I->getOpcode()))
359    return 1;
360
361  // Remove the branch.
362  I->eraseFromParent();
363  return 2;
364}
365
366unsigned
367ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
368                               MachineBasicBlock *FBB,
369                               const SmallVectorImpl<MachineOperand> &Cond,
370                               DebugLoc DL) const {
371  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
372  int BOpc   = !AFI->isThumbFunction()
373    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
374  int BccOpc = !AFI->isThumbFunction()
375    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
376
377  // Shouldn't be a fall through.
378  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
379  assert((Cond.size() == 2 || Cond.size() == 0) &&
380         "ARM branch conditions have two components!");
381
382  if (FBB == 0) {
383    if (Cond.empty()) // Unconditional branch?
384      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
385    else
386      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
387        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
388    return 1;
389  }
390
391  // Two-way conditional branch.
392  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
393    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
394  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
395  return 2;
396}
397
398bool ARMBaseInstrInfo::
399ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
400  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
401  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
402  return false;
403}
404
405bool ARMBaseInstrInfo::
406PredicateInstruction(MachineInstr *MI,
407                     const SmallVectorImpl<MachineOperand> &Pred) const {
408  unsigned Opc = MI->getOpcode();
409  if (isUncondBranchOpcode(Opc)) {
410    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
411    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
412    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
413    return true;
414  }
415
416  int PIdx = MI->findFirstPredOperandIdx();
417  if (PIdx != -1) {
418    MachineOperand &PMO = MI->getOperand(PIdx);
419    PMO.setImm(Pred[0].getImm());
420    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
421    return true;
422  }
423  return false;
424}
425
426bool ARMBaseInstrInfo::
427SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
428                  const SmallVectorImpl<MachineOperand> &Pred2) const {
429  if (Pred1.size() > 2 || Pred2.size() > 2)
430    return false;
431
432  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
433  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
434  if (CC1 == CC2)
435    return true;
436
437  switch (CC1) {
438  default:
439    return false;
440  case ARMCC::AL:
441    return true;
442  case ARMCC::HS:
443    return CC2 == ARMCC::HI;
444  case ARMCC::LS:
445    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
446  case ARMCC::GE:
447    return CC2 == ARMCC::GT;
448  case ARMCC::LE:
449    return CC2 == ARMCC::LT;
450  }
451}
452
453bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
454                                    std::vector<MachineOperand> &Pred) const {
455  // FIXME: This confuses implicit_def with optional CPSR def.
456  const TargetInstrDesc &TID = MI->getDesc();
457  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
458    return false;
459
460  bool Found = false;
461  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
462    const MachineOperand &MO = MI->getOperand(i);
463    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
464      Pred.push_back(MO);
465      Found = true;
466    }
467  }
468
469  return Found;
470}
471
472/// isPredicable - Return true if the specified instruction can be predicated.
473/// By default, this returns true for every instruction with a
474/// PredicateOperand.
475bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
476  const TargetInstrDesc &TID = MI->getDesc();
477  if (!TID.isPredicable())
478    return false;
479
480  if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
481    ARMFunctionInfo *AFI =
482      MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
483    return AFI->isThumb2Function();
484  }
485  return true;
486}
487
488/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
489LLVM_ATTRIBUTE_NOINLINE
490static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
491                                unsigned JTI);
492static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
493                                unsigned JTI) {
494  assert(JTI < JT.size());
495  return JT[JTI].MBBs.size();
496}
497
498/// GetInstSize - Return the size of the specified MachineInstr.
499///
500unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
501  const MachineBasicBlock &MBB = *MI->getParent();
502  const MachineFunction *MF = MBB.getParent();
503  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
504
505  // Basic size info comes from the TSFlags field.
506  const TargetInstrDesc &TID = MI->getDesc();
507  uint64_t TSFlags = TID.TSFlags;
508
509  unsigned Opc = MI->getOpcode();
510  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
511  default: {
512    // If this machine instr is an inline asm, measure it.
513    if (MI->getOpcode() == ARM::INLINEASM)
514      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
515    if (MI->isLabel())
516      return 0;
517    switch (Opc) {
518    default:
519      llvm_unreachable("Unknown or unset size field for instr!");
520    case TargetOpcode::IMPLICIT_DEF:
521    case TargetOpcode::KILL:
522    case TargetOpcode::PROLOG_LABEL:
523    case TargetOpcode::EH_LABEL:
524    case TargetOpcode::DBG_VALUE:
525      return 0;
526    }
527    break;
528  }
529  case ARMII::Size8Bytes: return 8;          // ARM instruction x 2.
530  case ARMII::Size4Bytes: return 4;          // ARM / Thumb2 instruction.
531  case ARMII::Size2Bytes: return 2;          // Thumb1 instruction.
532  case ARMII::SizeSpecial: {
533    switch (Opc) {
534    case ARM::MOVi32imm:
535    case ARM::t2MOVi32imm:
536      return 8;
537    case ARM::CONSTPOOL_ENTRY:
538      // If this machine instr is a constant pool entry, its size is recorded as
539      // operand #2.
540      return MI->getOperand(2).getImm();
541    case ARM::Int_eh_sjlj_longjmp:
542      return 16;
543    case ARM::tInt_eh_sjlj_longjmp:
544      return 10;
545    case ARM::Int_eh_sjlj_setjmp:
546    case ARM::Int_eh_sjlj_setjmp_nofp:
547      return 20;
548    case ARM::tInt_eh_sjlj_setjmp:
549    case ARM::t2Int_eh_sjlj_setjmp:
550    case ARM::t2Int_eh_sjlj_setjmp_nofp:
551      return 12;
552    case ARM::BR_JTr:
553    case ARM::BR_JTm:
554    case ARM::BR_JTadd:
555    case ARM::tBR_JTr:
556    case ARM::t2BR_JT:
557    case ARM::t2TBB:
558    case ARM::t2TBH: {
559      // These are jumptable branches, i.e. a branch followed by an inlined
560      // jumptable. The size is 4 + 4 * number of entries. For TBB, each
561      // entry is one byte; TBH two byte each.
562      unsigned EntrySize = (Opc == ARM::t2TBB)
563        ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4);
564      unsigned NumOps = TID.getNumOperands();
565      MachineOperand JTOP =
566        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
567      unsigned JTI = JTOP.getIndex();
568      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
569      assert(MJTI != 0);
570      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
571      assert(JTI < JT.size());
572      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
573      // 4 aligned. The assembler / linker may add 2 byte padding just before
574      // the JT entries.  The size does not include this padding; the
575      // constant islands pass does separate bookkeeping for it.
576      // FIXME: If we know the size of the function is less than (1 << 16) *2
577      // bytes, we can use 16-bit entries instead. Then there won't be an
578      // alignment issue.
579      unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
580      unsigned NumEntries = getNumJTEntries(JT, JTI);
581      if (Opc == ARM::t2TBB && (NumEntries & 1))
582        // Make sure the instruction that follows TBB is 2-byte aligned.
583        // FIXME: Constant island pass should insert an "ALIGN" instruction
584        // instead.
585        ++NumEntries;
586      return NumEntries * EntrySize + InstSize;
587    }
588    default:
589      // Otherwise, pseudo-instruction sizes are zero.
590      return 0;
591    }
592  }
593  }
594  return 0; // Not reached
595}
596
597void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
598                                   MachineBasicBlock::iterator I, DebugLoc DL,
599                                   unsigned DestReg, unsigned SrcReg,
600                                   bool KillSrc) const {
601  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
602  bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
603
604  if (GPRDest && GPRSrc) {
605    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
606                                  .addReg(SrcReg, getKillRegState(KillSrc))));
607    return;
608  }
609
610  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
611  bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
612
613  unsigned Opc;
614  if (SPRDest && SPRSrc)
615    Opc = ARM::VMOVS;
616  else if (GPRDest && SPRSrc)
617    Opc = ARM::VMOVRS;
618  else if (SPRDest && GPRSrc)
619    Opc = ARM::VMOVSR;
620  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
621    Opc = ARM::VMOVD;
622  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
623    Opc = ARM::VMOVQ;
624  else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
625    Opc = ARM::VMOVQQ;
626  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
627    Opc = ARM::VMOVQQQQ;
628  else
629    llvm_unreachable("Impossible reg-to-reg copy");
630
631  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
632  MIB.addReg(SrcReg, getKillRegState(KillSrc));
633  if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
634    AddDefaultPred(MIB);
635}
636
637static const
638MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
639                             unsigned Reg, unsigned SubIdx, unsigned State,
640                             const TargetRegisterInfo *TRI) {
641  if (!SubIdx)
642    return MIB.addReg(Reg, State);
643
644  if (TargetRegisterInfo::isPhysicalRegister(Reg))
645    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
646  return MIB.addReg(Reg, State, SubIdx);
647}
648
649void ARMBaseInstrInfo::
650storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
651                    unsigned SrcReg, bool isKill, int FI,
652                    const TargetRegisterClass *RC,
653                    const TargetRegisterInfo *TRI) const {
654  DebugLoc DL;
655  if (I != MBB.end()) DL = I->getDebugLoc();
656  MachineFunction &MF = *MBB.getParent();
657  MachineFrameInfo &MFI = *MF.getFrameInfo();
658  unsigned Align = MFI.getObjectAlignment(FI);
659
660  MachineMemOperand *MMO =
661    MF.getMachineMemOperand(MachinePointerInfo(
662                                         PseudoSourceValue::getFixedStack(FI)),
663                            MachineMemOperand::MOStore,
664                            MFI.getObjectSize(FI),
665                            Align);
666
667  // tGPR is used sometimes in ARM instructions that need to avoid using
668  // certain registers.  Just treat it as GPR here. Likewise, rGPR.
669  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
670      || RC == ARM::rGPRRegisterClass)
671    RC = ARM::GPRRegisterClass;
672
673  switch (RC->getID()) {
674  case ARM::GPRRegClassID:
675    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
676                   .addReg(SrcReg, getKillRegState(isKill))
677                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
678    break;
679  case ARM::SPRRegClassID:
680    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
681                   .addReg(SrcReg, getKillRegState(isKill))
682                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
683    break;
684  case ARM::DPRRegClassID:
685  case ARM::DPR_VFP2RegClassID:
686  case ARM::DPR_8RegClassID:
687    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
688                   .addReg(SrcReg, getKillRegState(isKill))
689                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
690    break;
691  case ARM::QPRRegClassID:
692  case ARM::QPR_VFP2RegClassID:
693  case ARM::QPR_8RegClassID:
694    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
695      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
696                     .addFrameIndex(FI).addImm(16)
697                     .addReg(SrcReg, getKillRegState(isKill))
698                     .addMemOperand(MMO));
699    } else {
700      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
701                     .addReg(SrcReg, getKillRegState(isKill))
702                     .addFrameIndex(FI)
703                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
704                     .addMemOperand(MMO));
705    }
706    break;
707  case ARM::QQPRRegClassID:
708  case ARM::QQPR_VFP2RegClassID:
709    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
710      // FIXME: It's possible to only store part of the QQ register if the
711      // spilled def has a sub-register index.
712      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
713                     .addFrameIndex(FI).addImm(16)
714                     .addReg(SrcReg, getKillRegState(isKill))
715                     .addMemOperand(MMO));
716    } else {
717      MachineInstrBuilder MIB =
718        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
719                       .addFrameIndex(FI)
720                       .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
721        .addMemOperand(MMO);
722      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
723      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
724      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
725            AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
726    }
727    break;
728  case ARM::QQQQPRRegClassID: {
729    MachineInstrBuilder MIB =
730      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
731                     .addFrameIndex(FI)
732                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
733      .addMemOperand(MMO);
734    MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
735    MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
736    MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
737    MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
738    MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
739    MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
740    MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
741          AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
742    break;
743  }
744  default:
745    llvm_unreachable("Unknown regclass!");
746  }
747}
748
749unsigned
750ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
751                                     int &FrameIndex) const {
752  switch (MI->getOpcode()) {
753  default: break;
754  case ARM::STRrs:
755  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
756    if (MI->getOperand(1).isFI() &&
757        MI->getOperand(2).isReg() &&
758        MI->getOperand(3).isImm() &&
759        MI->getOperand(2).getReg() == 0 &&
760        MI->getOperand(3).getImm() == 0) {
761      FrameIndex = MI->getOperand(1).getIndex();
762      return MI->getOperand(0).getReg();
763    }
764    break;
765  case ARM::STRi12:
766  case ARM::t2STRi12:
767  case ARM::tSpill:
768  case ARM::VSTRD:
769  case ARM::VSTRS:
770    if (MI->getOperand(1).isFI() &&
771        MI->getOperand(2).isImm() &&
772        MI->getOperand(2).getImm() == 0) {
773      FrameIndex = MI->getOperand(1).getIndex();
774      return MI->getOperand(0).getReg();
775    }
776    break;
777  case ARM::VST1q64Pseudo:
778    if (MI->getOperand(0).isFI() &&
779        MI->getOperand(2).getSubReg() == 0) {
780      FrameIndex = MI->getOperand(0).getIndex();
781      return MI->getOperand(2).getReg();
782    }
783    break;
784  case ARM::VSTMQ:
785    if (MI->getOperand(1).isFI() &&
786        MI->getOperand(2).isImm() &&
787        MI->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia) &&
788        MI->getOperand(0).getSubReg() == 0) {
789      FrameIndex = MI->getOperand(1).getIndex();
790      return MI->getOperand(0).getReg();
791    }
792    break;
793  }
794
795  return 0;
796}
797
798void ARMBaseInstrInfo::
799loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
800                     unsigned DestReg, int FI,
801                     const TargetRegisterClass *RC,
802                     const TargetRegisterInfo *TRI) const {
803  DebugLoc DL;
804  if (I != MBB.end()) DL = I->getDebugLoc();
805  MachineFunction &MF = *MBB.getParent();
806  MachineFrameInfo &MFI = *MF.getFrameInfo();
807  unsigned Align = MFI.getObjectAlignment(FI);
808  MachineMemOperand *MMO =
809    MF.getMachineMemOperand(
810                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
811                            MachineMemOperand::MOLoad,
812                            MFI.getObjectSize(FI),
813                            Align);
814
815  // tGPR is used sometimes in ARM instructions that need to avoid using
816  // certain registers.  Just treat it as GPR here.
817  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
818      || RC == ARM::rGPRRegisterClass)
819    RC = ARM::GPRRegisterClass;
820
821  switch (RC->getID()) {
822  case ARM::GPRRegClassID:
823    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
824                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
825    break;
826  case ARM::SPRRegClassID:
827    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
828                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
829    break;
830  case ARM::DPRRegClassID:
831  case ARM::DPR_VFP2RegClassID:
832  case ARM::DPR_8RegClassID:
833    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
834                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
835    break;
836  case ARM::QPRRegClassID:
837  case ARM::QPR_VFP2RegClassID:
838  case ARM::QPR_8RegClassID:
839    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
840      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
841                     .addFrameIndex(FI).addImm(16)
842                     .addMemOperand(MMO));
843    } else {
844      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
845                     .addFrameIndex(FI)
846                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
847                     .addMemOperand(MMO));
848    }
849    break;
850  case ARM::QQPRRegClassID:
851  case ARM::QQPR_VFP2RegClassID:
852    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
853      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
854                     .addFrameIndex(FI).addImm(16)
855                     .addMemOperand(MMO));
856    } else {
857      MachineInstrBuilder MIB =
858        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
859                       .addFrameIndex(FI)
860                       .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
861        .addMemOperand(MMO);
862      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
863      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
864      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
865            AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
866    }
867    break;
868  case ARM::QQQQPRRegClassID: {
869    MachineInstrBuilder MIB =
870      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
871                     .addFrameIndex(FI)
872                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
873      .addMemOperand(MMO);
874    MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
875    MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
876    MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
877    MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
878    MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
879    MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
880    MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
881    AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
882    break;
883  }
884  default:
885    llvm_unreachable("Unknown regclass!");
886  }
887}
888
889unsigned
890ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
891                                      int &FrameIndex) const {
892  switch (MI->getOpcode()) {
893  default: break;
894  case ARM::LDRrs:
895  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
896    if (MI->getOperand(1).isFI() &&
897        MI->getOperand(2).isReg() &&
898        MI->getOperand(3).isImm() &&
899        MI->getOperand(2).getReg() == 0 &&
900        MI->getOperand(3).getImm() == 0) {
901      FrameIndex = MI->getOperand(1).getIndex();
902      return MI->getOperand(0).getReg();
903    }
904    break;
905  case ARM::LDRi12:
906  case ARM::t2LDRi12:
907  case ARM::tRestore:
908  case ARM::VLDRD:
909  case ARM::VLDRS:
910    if (MI->getOperand(1).isFI() &&
911        MI->getOperand(2).isImm() &&
912        MI->getOperand(2).getImm() == 0) {
913      FrameIndex = MI->getOperand(1).getIndex();
914      return MI->getOperand(0).getReg();
915    }
916    break;
917  case ARM::VLD1q64Pseudo:
918    if (MI->getOperand(1).isFI() &&
919        MI->getOperand(0).getSubReg() == 0) {
920      FrameIndex = MI->getOperand(1).getIndex();
921      return MI->getOperand(0).getReg();
922    }
923    break;
924  case ARM::VLDMQ:
925    if (MI->getOperand(1).isFI() &&
926        MI->getOperand(2).isImm() &&
927        MI->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia) &&
928        MI->getOperand(0).getSubReg() == 0) {
929      FrameIndex = MI->getOperand(1).getIndex();
930      return MI->getOperand(0).getReg();
931    }
932    break;
933  }
934
935  return 0;
936}
937
938MachineInstr*
939ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
940                                           int FrameIx, uint64_t Offset,
941                                           const MDNode *MDPtr,
942                                           DebugLoc DL) const {
943  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
944    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
945  return &*MIB;
946}
947
948/// Create a copy of a const pool value. Update CPI to the new index and return
949/// the label UID.
950static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
951  MachineConstantPool *MCP = MF.getConstantPool();
952  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
953
954  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
955  assert(MCPE.isMachineConstantPoolEntry() &&
956         "Expecting a machine constantpool entry!");
957  ARMConstantPoolValue *ACPV =
958    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
959
960  unsigned PCLabelId = AFI->createConstPoolEntryUId();
961  ARMConstantPoolValue *NewCPV = 0;
962  // FIXME: The below assumes PIC relocation model and that the function
963  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
964  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
965  // instructions, so that's probably OK, but is PIC always correct when
966  // we get here?
967  if (ACPV->isGlobalValue())
968    NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
969                                      ARMCP::CPValue, 4);
970  else if (ACPV->isExtSymbol())
971    NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
972                                      ACPV->getSymbol(), PCLabelId, 4);
973  else if (ACPV->isBlockAddress())
974    NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
975                                      ARMCP::CPBlockAddress, 4);
976  else if (ACPV->isLSDA())
977    NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
978                                      ARMCP::CPLSDA, 4);
979  else
980    llvm_unreachable("Unexpected ARM constantpool value type!!");
981  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
982  return PCLabelId;
983}
984
985void ARMBaseInstrInfo::
986reMaterialize(MachineBasicBlock &MBB,
987              MachineBasicBlock::iterator I,
988              unsigned DestReg, unsigned SubIdx,
989              const MachineInstr *Orig,
990              const TargetRegisterInfo &TRI) const {
991  unsigned Opcode = Orig->getOpcode();
992  switch (Opcode) {
993  default: {
994    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
995    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
996    MBB.insert(I, MI);
997    break;
998  }
999  case ARM::tLDRpci_pic:
1000  case ARM::t2LDRpci_pic: {
1001    MachineFunction &MF = *MBB.getParent();
1002    unsigned CPI = Orig->getOperand(1).getIndex();
1003    unsigned PCLabelId = duplicateCPV(MF, CPI);
1004    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
1005                                      DestReg)
1006      .addConstantPoolIndex(CPI).addImm(PCLabelId);
1007    (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
1008    break;
1009  }
1010  }
1011}
1012
1013MachineInstr *
1014ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
1015  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
1016  switch(Orig->getOpcode()) {
1017  case ARM::tLDRpci_pic:
1018  case ARM::t2LDRpci_pic: {
1019    unsigned CPI = Orig->getOperand(1).getIndex();
1020    unsigned PCLabelId = duplicateCPV(MF, CPI);
1021    Orig->getOperand(1).setIndex(CPI);
1022    Orig->getOperand(2).setImm(PCLabelId);
1023    break;
1024  }
1025  }
1026  return MI;
1027}
1028
1029bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
1030                                        const MachineInstr *MI1) const {
1031  int Opcode = MI0->getOpcode();
1032  if (Opcode == ARM::t2LDRpci ||
1033      Opcode == ARM::t2LDRpci_pic ||
1034      Opcode == ARM::tLDRpci ||
1035      Opcode == ARM::tLDRpci_pic) {
1036    if (MI1->getOpcode() != Opcode)
1037      return false;
1038    if (MI0->getNumOperands() != MI1->getNumOperands())
1039      return false;
1040
1041    const MachineOperand &MO0 = MI0->getOperand(1);
1042    const MachineOperand &MO1 = MI1->getOperand(1);
1043    if (MO0.getOffset() != MO1.getOffset())
1044      return false;
1045
1046    const MachineFunction *MF = MI0->getParent()->getParent();
1047    const MachineConstantPool *MCP = MF->getConstantPool();
1048    int CPI0 = MO0.getIndex();
1049    int CPI1 = MO1.getIndex();
1050    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1051    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1052    ARMConstantPoolValue *ACPV0 =
1053      static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1054    ARMConstantPoolValue *ACPV1 =
1055      static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1056    return ACPV0->hasSameValue(ACPV1);
1057  }
1058
1059  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1060}
1061
1062/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1063/// determine if two loads are loading from the same base address. It should
1064/// only return true if the base pointers are the same and the only differences
1065/// between the two addresses is the offset. It also returns the offsets by
1066/// reference.
1067bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1068                                               int64_t &Offset1,
1069                                               int64_t &Offset2) const {
1070  // Don't worry about Thumb: just ARM and Thumb2.
1071  if (Subtarget.isThumb1Only()) return false;
1072
1073  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1074    return false;
1075
1076  switch (Load1->getMachineOpcode()) {
1077  default:
1078    return false;
1079  case ARM::LDRi12:
1080  case ARM::LDRBi12:
1081  case ARM::LDRD:
1082  case ARM::LDRH:
1083  case ARM::LDRSB:
1084  case ARM::LDRSH:
1085  case ARM::VLDRD:
1086  case ARM::VLDRS:
1087  case ARM::t2LDRi8:
1088  case ARM::t2LDRDi8:
1089  case ARM::t2LDRSHi8:
1090  case ARM::t2LDRi12:
1091  case ARM::t2LDRSHi12:
1092    break;
1093  }
1094
1095  switch (Load2->getMachineOpcode()) {
1096  default:
1097    return false;
1098  case ARM::LDRi12:
1099  case ARM::LDRBi12:
1100  case ARM::LDRD:
1101  case ARM::LDRH:
1102  case ARM::LDRSB:
1103  case ARM::LDRSH:
1104  case ARM::VLDRD:
1105  case ARM::VLDRS:
1106  case ARM::t2LDRi8:
1107  case ARM::t2LDRDi8:
1108  case ARM::t2LDRSHi8:
1109  case ARM::t2LDRi12:
1110  case ARM::t2LDRSHi12:
1111    break;
1112  }
1113
1114  // Check if base addresses and chain operands match.
1115  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1116      Load1->getOperand(4) != Load2->getOperand(4))
1117    return false;
1118
1119  // Index should be Reg0.
1120  if (Load1->getOperand(3) != Load2->getOperand(3))
1121    return false;
1122
1123  // Determine the offsets.
1124  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1125      isa<ConstantSDNode>(Load2->getOperand(1))) {
1126    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1127    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1128    return true;
1129  }
1130
1131  return false;
1132}
1133
1134/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1135/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
1136/// be scheduled togther. On some targets if two loads are loading from
1137/// addresses in the same cache line, it's better if they are scheduled
1138/// together. This function takes two integers that represent the load offsets
1139/// from the common base address. It returns true if it decides it's desirable
1140/// to schedule the two loads together. "NumLoads" is the number of loads that
1141/// have already been scheduled after Load1.
1142bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1143                                               int64_t Offset1, int64_t Offset2,
1144                                               unsigned NumLoads) const {
1145  // Don't worry about Thumb: just ARM and Thumb2.
1146  if (Subtarget.isThumb1Only()) return false;
1147
1148  assert(Offset2 > Offset1);
1149
1150  if ((Offset2 - Offset1) / 8 > 64)
1151    return false;
1152
1153  if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
1154    return false;  // FIXME: overly conservative?
1155
1156  // Four loads in a row should be sufficient.
1157  if (NumLoads >= 3)
1158    return false;
1159
1160  return true;
1161}
1162
1163bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
1164                                            const MachineBasicBlock *MBB,
1165                                            const MachineFunction &MF) const {
1166  // Debug info is never a scheduling boundary. It's necessary to be explicit
1167  // due to the special treatment of IT instructions below, otherwise a
1168  // dbg_value followed by an IT will result in the IT instruction being
1169  // considered a scheduling hazard, which is wrong. It should be the actual
1170  // instruction preceding the dbg_value instruction(s), just like it is
1171  // when debug info is not present.
1172  if (MI->isDebugValue())
1173    return false;
1174
1175  // Terminators and labels can't be scheduled around.
1176  if (MI->getDesc().isTerminator() || MI->isLabel())
1177    return true;
1178
1179  // Treat the start of the IT block as a scheduling boundary, but schedule
1180  // t2IT along with all instructions following it.
1181  // FIXME: This is a big hammer. But the alternative is to add all potential
1182  // true and anti dependencies to IT block instructions as implicit operands
1183  // to the t2IT instruction. The added compile time and complexity does not
1184  // seem worth it.
1185  MachineBasicBlock::const_iterator I = MI;
1186  // Make sure to skip any dbg_value instructions
1187  while (++I != MBB->end() && I->isDebugValue())
1188    ;
1189  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1190    return true;
1191
1192  // Don't attempt to schedule around any instruction that defines
1193  // a stack-oriented pointer, as it's unlikely to be profitable. This
1194  // saves compile time, because it doesn't require every single
1195  // stack slot reference to depend on the instruction that does the
1196  // modification.
1197  if (MI->definesRegister(ARM::SP))
1198    return true;
1199
1200  return false;
1201}
1202
1203bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
1204                                           unsigned NumCyles,
1205                                           unsigned ExtraPredCycles,
1206                                           float Probability,
1207                                           float Confidence) const {
1208  if (!NumCyles)
1209    return false;
1210
1211  // Attempt to estimate the relative costs of predication versus branching.
1212  float UnpredCost = Probability * NumCyles;
1213  UnpredCost += 1.0; // The branch itself
1214  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
1215
1216  return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
1217}
1218
1219bool ARMBaseInstrInfo::
1220isProfitableToIfCvt(MachineBasicBlock &TMBB,
1221                    unsigned TCycles, unsigned TExtra,
1222                    MachineBasicBlock &FMBB,
1223                    unsigned FCycles, unsigned FExtra,
1224                    float Probability, float Confidence) const {
1225  if (!TCycles || !FCycles)
1226    return false;
1227
1228  // Attempt to estimate the relative costs of predication versus branching.
1229  float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
1230  UnpredCost += 1.0; // The branch itself
1231  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
1232
1233  return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
1234}
1235
1236/// getInstrPredicate - If instruction is predicated, returns its predicate
1237/// condition, otherwise returns AL. It also returns the condition code
1238/// register by reference.
1239ARMCC::CondCodes
1240llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
1241  int PIdx = MI->findFirstPredOperandIdx();
1242  if (PIdx == -1) {
1243    PredReg = 0;
1244    return ARMCC::AL;
1245  }
1246
1247  PredReg = MI->getOperand(PIdx+1).getReg();
1248  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
1249}
1250
1251
1252int llvm::getMatchingCondBranchOpcode(int Opc) {
1253  if (Opc == ARM::B)
1254    return ARM::Bcc;
1255  else if (Opc == ARM::tB)
1256    return ARM::tBcc;
1257  else if (Opc == ARM::t2B)
1258      return ARM::t2Bcc;
1259
1260  llvm_unreachable("Unknown unconditional branch opcode!");
1261  return 0;
1262}
1263
1264
1265void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
1266                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
1267                               unsigned DestReg, unsigned BaseReg, int NumBytes,
1268                               ARMCC::CondCodes Pred, unsigned PredReg,
1269                               const ARMBaseInstrInfo &TII) {
1270  bool isSub = NumBytes < 0;
1271  if (isSub) NumBytes = -NumBytes;
1272
1273  while (NumBytes) {
1274    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
1275    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
1276    assert(ThisVal && "Didn't extract field correctly");
1277
1278    // We will handle these bits from offset, clear them.
1279    NumBytes &= ~ThisVal;
1280
1281    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
1282
1283    // Build the new ADD / SUB.
1284    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
1285    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
1286      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
1287      .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
1288    BaseReg = DestReg;
1289  }
1290}
1291
1292bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
1293                                unsigned FrameReg, int &Offset,
1294                                const ARMBaseInstrInfo &TII) {
1295  unsigned Opcode = MI.getOpcode();
1296  const TargetInstrDesc &Desc = MI.getDesc();
1297  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
1298  bool isSub = false;
1299
1300  // Memory operands in inline assembly always use AddrMode2.
1301  if (Opcode == ARM::INLINEASM)
1302    AddrMode = ARMII::AddrMode2;
1303
1304  if (Opcode == ARM::ADDri) {
1305    Offset += MI.getOperand(FrameRegIdx+1).getImm();
1306    if (Offset == 0) {
1307      // Turn it into a move.
1308      MI.setDesc(TII.get(ARM::MOVr));
1309      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1310      MI.RemoveOperand(FrameRegIdx+1);
1311      Offset = 0;
1312      return true;
1313    } else if (Offset < 0) {
1314      Offset = -Offset;
1315      isSub = true;
1316      MI.setDesc(TII.get(ARM::SUBri));
1317    }
1318
1319    // Common case: small offset, fits into instruction.
1320    if (ARM_AM::getSOImmVal(Offset) != -1) {
1321      // Replace the FrameIndex with sp / fp
1322      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1323      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
1324      Offset = 0;
1325      return true;
1326    }
1327
1328    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
1329    // as possible.
1330    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
1331    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
1332
1333    // We will handle these bits from offset, clear them.
1334    Offset &= ~ThisImmVal;
1335
1336    // Get the properly encoded SOImmVal field.
1337    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
1338           "Bit extraction didn't work?");
1339    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
1340 } else {
1341    unsigned ImmIdx = 0;
1342    int InstrOffs = 0;
1343    unsigned NumBits = 0;
1344    unsigned Scale = 1;
1345    switch (AddrMode) {
1346    case ARMII::AddrMode_i12: {
1347      ImmIdx = FrameRegIdx + 1;
1348      InstrOffs = MI.getOperand(ImmIdx).getImm();
1349      NumBits = 12;
1350      break;
1351    }
1352    case ARMII::AddrMode2: {
1353      ImmIdx = FrameRegIdx+2;
1354      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
1355      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1356        InstrOffs *= -1;
1357      NumBits = 12;
1358      break;
1359    }
1360    case ARMII::AddrMode3: {
1361      ImmIdx = FrameRegIdx+2;
1362      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
1363      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1364        InstrOffs *= -1;
1365      NumBits = 8;
1366      break;
1367    }
1368    case ARMII::AddrMode4:
1369    case ARMII::AddrMode6:
1370      // Can't fold any offset even if it's zero.
1371      return false;
1372    case ARMII::AddrMode5: {
1373      ImmIdx = FrameRegIdx+1;
1374      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
1375      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1376        InstrOffs *= -1;
1377      NumBits = 8;
1378      Scale = 4;
1379      break;
1380    }
1381    default:
1382      llvm_unreachable("Unsupported addressing mode!");
1383      break;
1384    }
1385
1386    Offset += InstrOffs * Scale;
1387    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
1388    if (Offset < 0) {
1389      Offset = -Offset;
1390      isSub = true;
1391    }
1392
1393    // Attempt to fold address comp. if opcode has offset bits
1394    if (NumBits > 0) {
1395      // Common case: small offset, fits into instruction.
1396      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
1397      int ImmedOffset = Offset / Scale;
1398      unsigned Mask = (1 << NumBits) - 1;
1399      if ((unsigned)Offset <= Mask * Scale) {
1400        // Replace the FrameIndex with sp
1401        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1402        // FIXME: When addrmode2 goes away, this will simplify (like the
1403        // T2 version), as the LDR.i12 versions don't need the encoding
1404        // tricks for the offset value.
1405        if (isSub) {
1406          if (AddrMode == ARMII::AddrMode_i12)
1407            ImmedOffset = -ImmedOffset;
1408          else
1409            ImmedOffset |= 1 << NumBits;
1410        }
1411        ImmOp.ChangeToImmediate(ImmedOffset);
1412        Offset = 0;
1413        return true;
1414      }
1415
1416      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
1417      ImmedOffset = ImmedOffset & Mask;
1418      if (isSub) {
1419        if (AddrMode == ARMII::AddrMode_i12)
1420          ImmedOffset = -ImmedOffset;
1421        else
1422          ImmedOffset |= 1 << NumBits;
1423      }
1424      ImmOp.ChangeToImmediate(ImmedOffset);
1425      Offset &= ~(Mask*Scale);
1426    }
1427  }
1428
1429  Offset = (isSub) ? -Offset : Offset;
1430  return Offset == 0;
1431}
1432
1433bool ARMBaseInstrInfo::
1434AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
1435               int &CmpValue) const {
1436  switch (MI->getOpcode()) {
1437  default: break;
1438  case ARM::CMPri:
1439  case ARM::CMPzri:
1440  case ARM::t2CMPri:
1441  case ARM::t2CMPzri:
1442    SrcReg = MI->getOperand(0).getReg();
1443    CmpMask = ~0;
1444    CmpValue = MI->getOperand(1).getImm();
1445    return true;
1446  case ARM::TSTri:
1447  case ARM::t2TSTri:
1448    SrcReg = MI->getOperand(0).getReg();
1449    CmpMask = MI->getOperand(1).getImm();
1450    CmpValue = 0;
1451    return true;
1452  }
1453
1454  return false;
1455}
1456
1457/// isSuitableForMask - Identify a suitable 'and' instruction that
1458/// operates on the given source register and applies the same mask
1459/// as a 'tst' instruction. Provide a limited look-through for copies.
1460/// When successful, MI will hold the found instruction.
1461static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
1462                              int CmpMask, bool CommonUse) {
1463  switch (MI->getOpcode()) {
1464    case ARM::ANDri:
1465    case ARM::t2ANDri:
1466      if (CmpMask != MI->getOperand(2).getImm())
1467        return false;
1468      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
1469        return true;
1470      break;
1471    case ARM::COPY: {
1472      // Walk down one instruction which is potentially an 'and'.
1473      const MachineInstr &Copy = *MI;
1474      MachineBasicBlock::iterator AND(
1475        llvm::next(MachineBasicBlock::iterator(MI)));
1476      if (AND == MI->getParent()->end()) return false;
1477      MI = AND;
1478      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
1479                               CmpMask, true);
1480    }
1481  }
1482
1483  return false;
1484}
1485
1486/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
1487/// comparison into one that sets the zero bit in the flags register. Update the
1488/// iterator *only* if a transformation took place.
1489bool ARMBaseInstrInfo::
1490OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
1491                     int CmpValue, const MachineRegisterInfo *MRI,
1492                     MachineBasicBlock::iterator &MII) const {
1493  if (CmpValue != 0)
1494    return false;
1495
1496  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
1497  if (llvm::next(DI) != MRI->def_end())
1498    // Only support one definition.
1499    return false;
1500
1501  MachineInstr *MI = &*DI;
1502
1503  // Masked compares sometimes use the same register as the corresponding 'and'.
1504  if (CmpMask != ~0) {
1505    if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
1506      MI = 0;
1507      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
1508           UE = MRI->use_end(); UI != UE; ++UI) {
1509        if (UI->getParent() != CmpInstr->getParent()) continue;
1510        MachineInstr *PotentialAND = &*UI;
1511        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
1512          continue;
1513        MI = PotentialAND;
1514        break;
1515      }
1516      if (!MI) return false;
1517    }
1518  }
1519
1520  // Conservatively refuse to convert an instruction which isn't in the same BB
1521  // as the comparison.
1522  if (MI->getParent() != CmpInstr->getParent())
1523    return false;
1524
1525  // Check that CPSR isn't set between the comparison instruction and the one we
1526  // want to change.
1527  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
1528    B = MI->getParent()->begin();
1529
1530  // Early exit if CmpInstr is at the beginning of the BB.
1531  if (I == B) return false;
1532
1533  --I;
1534  for (; I != E; --I) {
1535    const MachineInstr &Instr = *I;
1536
1537    for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
1538      const MachineOperand &MO = Instr.getOperand(IO);
1539      if (!MO.isReg()) continue;
1540
1541      // This instruction modifies or uses CPSR after the one we want to
1542      // change. We can't do this transformation.
1543      if (MO.getReg() == ARM::CPSR)
1544        return false;
1545    }
1546
1547    if (I == B)
1548      // The 'and' is below the comparison instruction.
1549      return false;
1550  }
1551
1552  // Set the "zero" bit in CPSR.
1553  switch (MI->getOpcode()) {
1554  default: break;
1555  case ARM::ADDri:
1556  case ARM::ANDri:
1557  case ARM::t2ANDri:
1558  case ARM::SUBri:
1559  case ARM::t2ADDri:
1560  case ARM::t2SUBri:
1561    MI->RemoveOperand(5);
1562    MachineInstrBuilder(MI)
1563      .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
1564    MII = llvm::next(MachineBasicBlock::iterator(CmpInstr));
1565    CmpInstr->eraseFromParent();
1566    return true;
1567  }
1568
1569  return false;
1570}
1571
1572unsigned
1573ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
1574                                 const MachineInstr *MI) const {
1575  if (!ItinData || ItinData->isEmpty())
1576    return 1;
1577
1578  const TargetInstrDesc &Desc = MI->getDesc();
1579  unsigned Class = Desc.getSchedClass();
1580  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
1581  if (UOps)
1582    return UOps;
1583
1584  unsigned Opc = MI->getOpcode();
1585  switch (Opc) {
1586  default:
1587    llvm_unreachable("Unexpected multi-uops instruction!");
1588    break;
1589  case ARM::VLDMQ:
1590  case ARM::VSTMQ:
1591    return 2;
1592
1593  // The number of uOps for load / store multiple are determined by the number
1594  // registers.
1595  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
1596  // same cycle. The scheduling for the first load / store must be done
1597  // separately by assuming the the address is not 64-bit aligned.
1598  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
1599  // is not 64-bit aligned, then AGU would take an extra cycle.
1600  // For VFP / NEON load / store multiple, the formula is
1601  // (#reg / 2) + (#reg % 2) + 1.
1602  case ARM::VLDMD:
1603  case ARM::VLDMS:
1604  case ARM::VLDMD_UPD:
1605  case ARM::VLDMS_UPD:
1606  case ARM::VSTMD:
1607  case ARM::VSTMS:
1608  case ARM::VSTMD_UPD:
1609  case ARM::VSTMS_UPD: {
1610    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
1611    return (NumRegs / 2) + (NumRegs % 2) + 1;
1612  }
1613  case ARM::LDM_RET:
1614  case ARM::LDM:
1615  case ARM::LDM_UPD:
1616  case ARM::STM:
1617  case ARM::STM_UPD:
1618  case ARM::tLDM:
1619  case ARM::tLDM_UPD:
1620  case ARM::tSTM_UPD:
1621  case ARM::tPOP_RET:
1622  case ARM::tPOP:
1623  case ARM::tPUSH:
1624  case ARM::t2LDM_RET:
1625  case ARM::t2LDM:
1626  case ARM::t2LDM_UPD:
1627  case ARM::t2STM:
1628  case ARM::t2STM_UPD: {
1629    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
1630    if (Subtarget.isCortexA8()) {
1631      if (NumRegs < 4)
1632        return 2;
1633      // 4 registers would be issued: 2, 2.
1634      // 5 registers would be issued: 2, 2, 1.
1635      UOps = (NumRegs / 2);
1636      if (NumRegs % 2)
1637        ++UOps;
1638      return UOps;
1639    } else if (Subtarget.isCortexA9()) {
1640      UOps = (NumRegs / 2);
1641      // If there are odd number of registers or if it's not 64-bit aligned,
1642      // then it takes an extra AGU (Address Generation Unit) cycle.
1643      if ((NumRegs % 2) ||
1644          !MI->hasOneMemOperand() ||
1645          (*MI->memoperands_begin())->getAlignment() < 8)
1646        ++UOps;
1647      return UOps;
1648    } else {
1649      // Assume the worst.
1650      return NumRegs;
1651    }
1652  }
1653  }
1654}
1655
1656int
1657ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
1658                                  const TargetInstrDesc &DefTID,
1659                                  unsigned DefClass,
1660                                  unsigned DefIdx, unsigned DefAlign) const {
1661  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
1662  if (RegNo <= 0)
1663    // Def is the address writeback.
1664    return ItinData->getOperandCycle(DefClass, DefIdx);
1665
1666  int DefCycle;
1667  if (Subtarget.isCortexA8()) {
1668    // (regno / 2) + (regno % 2) + 1
1669    DefCycle = RegNo / 2 + 1;
1670    if (RegNo % 2)
1671      ++DefCycle;
1672  } else if (Subtarget.isCortexA9()) {
1673    DefCycle = RegNo;
1674    bool isSLoad = false;
1675    switch (DefTID.getOpcode()) {
1676    default: break;
1677    case ARM::VLDMS:
1678    case ARM::VLDMS_UPD:
1679      isSLoad = true;
1680      break;
1681    }
1682    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
1683    // then it takes an extra cycle.
1684    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
1685      ++DefCycle;
1686  } else {
1687    // Assume the worst.
1688    DefCycle = RegNo + 2;
1689  }
1690
1691  return DefCycle;
1692}
1693
1694int
1695ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
1696                                 const TargetInstrDesc &DefTID,
1697                                 unsigned DefClass,
1698                                 unsigned DefIdx, unsigned DefAlign) const {
1699  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
1700  if (RegNo <= 0)
1701    // Def is the address writeback.
1702    return ItinData->getOperandCycle(DefClass, DefIdx);
1703
1704  int DefCycle;
1705  if (Subtarget.isCortexA8()) {
1706    // 4 registers would be issued: 1, 2, 1.
1707    // 5 registers would be issued: 1, 2, 2.
1708    DefCycle = RegNo / 2;
1709    if (DefCycle < 1)
1710      DefCycle = 1;
1711    // Result latency is issue cycle + 2: E2.
1712    DefCycle += 2;
1713  } else if (Subtarget.isCortexA9()) {
1714    DefCycle = (RegNo / 2);
1715    // If there are odd number of registers or if it's not 64-bit aligned,
1716    // then it takes an extra AGU (Address Generation Unit) cycle.
1717    if ((RegNo % 2) || DefAlign < 8)
1718      ++DefCycle;
1719    // Result latency is AGU cycles + 2.
1720    DefCycle += 2;
1721  } else {
1722    // Assume the worst.
1723    DefCycle = RegNo + 2;
1724  }
1725
1726  return DefCycle;
1727}
1728
1729int
1730ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
1731                                  const TargetInstrDesc &UseTID,
1732                                  unsigned UseClass,
1733                                  unsigned UseIdx, unsigned UseAlign) const {
1734  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
1735  if (RegNo <= 0)
1736    return ItinData->getOperandCycle(UseClass, UseIdx);
1737
1738  int UseCycle;
1739  if (Subtarget.isCortexA8()) {
1740    // (regno / 2) + (regno % 2) + 1
1741    UseCycle = RegNo / 2 + 1;
1742    if (RegNo % 2)
1743      ++UseCycle;
1744  } else if (Subtarget.isCortexA9()) {
1745    UseCycle = RegNo;
1746    bool isSStore = false;
1747    switch (UseTID.getOpcode()) {
1748    default: break;
1749    case ARM::VSTMS:
1750    case ARM::VSTMS_UPD:
1751      isSStore = true;
1752      break;
1753    }
1754    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
1755    // then it takes an extra cycle.
1756    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
1757      ++UseCycle;
1758  } else {
1759    // Assume the worst.
1760    UseCycle = RegNo + 2;
1761  }
1762
1763  return UseCycle;
1764}
1765
1766int
1767ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
1768                                 const TargetInstrDesc &UseTID,
1769                                 unsigned UseClass,
1770                                 unsigned UseIdx, unsigned UseAlign) const {
1771  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
1772  if (RegNo <= 0)
1773    return ItinData->getOperandCycle(UseClass, UseIdx);
1774
1775  int UseCycle;
1776  if (Subtarget.isCortexA8()) {
1777    UseCycle = RegNo / 2;
1778    if (UseCycle < 2)
1779      UseCycle = 2;
1780    // Read in E3.
1781    UseCycle += 2;
1782  } else if (Subtarget.isCortexA9()) {
1783    UseCycle = (RegNo / 2);
1784    // If there are odd number of registers or if it's not 64-bit aligned,
1785    // then it takes an extra AGU (Address Generation Unit) cycle.
1786    if ((RegNo % 2) || UseAlign < 8)
1787      ++UseCycle;
1788  } else {
1789    // Assume the worst.
1790    UseCycle = 1;
1791  }
1792  return UseCycle;
1793}
1794
1795int
1796ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
1797                                    const TargetInstrDesc &DefTID,
1798                                    unsigned DefIdx, unsigned DefAlign,
1799                                    const TargetInstrDesc &UseTID,
1800                                    unsigned UseIdx, unsigned UseAlign) const {
1801  unsigned DefClass = DefTID.getSchedClass();
1802  unsigned UseClass = UseTID.getSchedClass();
1803
1804  if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
1805    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
1806
1807  // This may be a def / use of a variable_ops instruction, the operand
1808  // latency might be determinable dynamically. Let the target try to
1809  // figure it out.
1810  int DefCycle = -1;
1811  bool LdmBypass = false;
1812  switch (DefTID.getOpcode()) {
1813  default:
1814    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
1815    break;
1816  case ARM::VLDMD:
1817  case ARM::VLDMS:
1818  case ARM::VLDMD_UPD:
1819  case ARM::VLDMS_UPD:  {
1820    DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
1821    break;
1822  }
1823  case ARM::LDM_RET:
1824  case ARM::LDM:
1825  case ARM::LDM_UPD:
1826  case ARM::tLDM:
1827  case ARM::tLDM_UPD:
1828  case ARM::tPUSH:
1829  case ARM::t2LDM_RET:
1830  case ARM::t2LDM:
1831  case ARM::t2LDM_UPD: {
1832    LdmBypass = 1;
1833    DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
1834    break;
1835  }
1836  }
1837
1838  if (DefCycle == -1)
1839    // We can't seem to determine the result latency of the def, assume it's 2.
1840    DefCycle = 2;
1841
1842  int UseCycle = -1;
1843  switch (UseTID.getOpcode()) {
1844  default:
1845    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
1846    break;
1847  case ARM::VSTMD:
1848  case ARM::VSTMS:
1849  case ARM::VSTMD_UPD:
1850  case ARM::VSTMS_UPD: {
1851    UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
1852    break;
1853  }
1854  case ARM::STM:
1855  case ARM::STM_UPD:
1856  case ARM::tSTM_UPD:
1857  case ARM::tPOP_RET:
1858  case ARM::tPOP:
1859  case ARM::t2STM:
1860  case ARM::t2STM_UPD: {
1861    UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
1862    break;
1863  }
1864  }
1865
1866  if (UseCycle == -1)
1867    // Assume it's read in the first stage.
1868    UseCycle = 1;
1869
1870  UseCycle = DefCycle - UseCycle + 1;
1871  if (UseCycle > 0) {
1872    if (LdmBypass) {
1873      // It's a variable_ops instruction so we can't use DefIdx here. Just use
1874      // first def operand.
1875      if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
1876                                          UseClass, UseIdx))
1877        --UseCycle;
1878    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
1879                                               UseClass, UseIdx))
1880      --UseCycle;
1881  }
1882
1883  return UseCycle;
1884}
1885
1886int
1887ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
1888                             const MachineInstr *DefMI, unsigned DefIdx,
1889                             const MachineInstr *UseMI, unsigned UseIdx) const {
1890  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
1891      DefMI->isRegSequence() || DefMI->isImplicitDef())
1892    return 1;
1893
1894  const TargetInstrDesc &DefTID = DefMI->getDesc();
1895  if (!ItinData || ItinData->isEmpty())
1896    return DefTID.mayLoad() ? 3 : 1;
1897
1898
1899  const TargetInstrDesc &UseTID = UseMI->getDesc();
1900  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
1901  if (DefMO.getReg() == ARM::CPSR) {
1902    if (DefMI->getOpcode() == ARM::FMSTAT) {
1903      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
1904      return Subtarget.isCortexA9() ? 1 : 20;
1905    }
1906
1907    // CPSR set and branch can be paired in the same cycle.
1908    if (UseTID.isBranch())
1909      return 0;
1910  }
1911
1912  unsigned DefAlign = DefMI->hasOneMemOperand()
1913    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
1914  unsigned UseAlign = UseMI->hasOneMemOperand()
1915    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
1916  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1917                                  UseTID, UseIdx, UseAlign);
1918
1919  if (Latency > 1 &&
1920      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
1921    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
1922    // variants are one cycle cheaper.
1923    switch (DefTID.getOpcode()) {
1924    default: break;
1925    case ARM::LDRrs:
1926    case ARM::LDRBrs: {
1927      unsigned ShOpVal = DefMI->getOperand(3).getImm();
1928      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
1929      if (ShImm == 0 ||
1930          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
1931        --Latency;
1932      break;
1933    }
1934    case ARM::t2LDRs:
1935    case ARM::t2LDRBs:
1936    case ARM::t2LDRHs:
1937    case ARM::t2LDRSHs: {
1938      // Thumb2 mode: lsl only.
1939      unsigned ShAmt = DefMI->getOperand(3).getImm();
1940      if (ShAmt == 0 || ShAmt == 2)
1941        --Latency;
1942      break;
1943    }
1944    }
1945  }
1946
1947  return Latency;
1948}
1949
1950int
1951ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
1952                                    SDNode *DefNode, unsigned DefIdx,
1953                                    SDNode *UseNode, unsigned UseIdx) const {
1954  if (!DefNode->isMachineOpcode())
1955    return 1;
1956
1957  const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
1958  if (!ItinData || ItinData->isEmpty())
1959    return DefTID.mayLoad() ? 3 : 1;
1960
1961  if (!UseNode->isMachineOpcode()) {
1962    int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
1963    if (Subtarget.isCortexA9())
1964      return Latency <= 2 ? 1 : Latency - 1;
1965    else
1966      return Latency <= 3 ? 1 : Latency - 2;
1967  }
1968
1969  const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
1970  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
1971  unsigned DefAlign = !DefMN->memoperands_empty()
1972    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
1973  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
1974  unsigned UseAlign = !UseMN->memoperands_empty()
1975    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
1976  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1977                                  UseTID, UseIdx, UseAlign);
1978
1979  if (Latency > 1 &&
1980      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
1981    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
1982    // variants are one cycle cheaper.
1983    switch (DefTID.getOpcode()) {
1984    default: break;
1985    case ARM::LDRrs:
1986    case ARM::LDRBrs: {
1987      unsigned ShOpVal =
1988        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
1989      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
1990      if (ShImm == 0 ||
1991          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
1992        --Latency;
1993      break;
1994    }
1995    case ARM::t2LDRs:
1996    case ARM::t2LDRBs:
1997    case ARM::t2LDRHs:
1998    case ARM::t2LDRSHs: {
1999      // Thumb2 mode: lsl only.
2000      unsigned ShAmt =
2001        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
2002      if (ShAmt == 0 || ShAmt == 2)
2003        --Latency;
2004      break;
2005    }
2006    }
2007  }
2008
2009  return Latency;
2010}
2011
2012int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
2013                                      const MachineInstr *MI,
2014                                      unsigned *PredCost) const {
2015  if (MI->isCopyLike() || MI->isInsertSubreg() ||
2016      MI->isRegSequence() || MI->isImplicitDef())
2017    return 1;
2018
2019  if (!ItinData || ItinData->isEmpty())
2020    return 1;
2021
2022  const TargetInstrDesc &TID = MI->getDesc();
2023  unsigned Class = TID.getSchedClass();
2024  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
2025  if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
2026    // When predicated, CPSR is an additional source operand for CPSR updating
2027    // instructions, this apparently increases their latencies.
2028    *PredCost = 1;
2029  if (UOps)
2030    return ItinData->getStageLatency(Class);
2031  return getNumMicroOps(ItinData, MI);
2032}
2033
2034int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
2035                                      SDNode *Node) const {
2036  if (!Node->isMachineOpcode())
2037    return 1;
2038
2039  if (!ItinData || ItinData->isEmpty())
2040    return 1;
2041
2042  unsigned Opcode = Node->getMachineOpcode();
2043  switch (Opcode) {
2044  default:
2045    return ItinData->getStageLatency(get(Opcode).getSchedClass());
2046  case ARM::VLDMQ:
2047  case ARM::VSTMQ:
2048    return 2;
2049  }
2050}
2051
2052bool ARMBaseInstrInfo::
2053hasHighOperandLatency(const InstrItineraryData *ItinData,
2054                      const MachineRegisterInfo *MRI,
2055                      const MachineInstr *DefMI, unsigned DefIdx,
2056                      const MachineInstr *UseMI, unsigned UseIdx) const {
2057  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
2058  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
2059  if (Subtarget.isCortexA8() &&
2060      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
2061    // CortexA8 VFP instructions are not pipelined.
2062    return true;
2063
2064  // Hoist VFP / NEON instructions with 4 or higher latency.
2065  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
2066  if (Latency <= 3)
2067    return false;
2068  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
2069         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
2070}
2071
2072bool ARMBaseInstrInfo::
2073hasLowDefLatency(const InstrItineraryData *ItinData,
2074                 const MachineInstr *DefMI, unsigned DefIdx) const {
2075  if (!ItinData || ItinData->isEmpty())
2076    return false;
2077
2078  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
2079  if (DDomain == ARMII::DomainGeneral) {
2080    unsigned DefClass = DefMI->getDesc().getSchedClass();
2081    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
2082    return (DefCycle != -1 && DefCycle <= 2);
2083  }
2084  return false;
2085}
2086