ARMBaseInstrInfo.cpp revision 16884415db751c75f2133bd04921393c792b1158
1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//                     The LLVM Compiler Infrastructure
4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source
6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// License. See LICENSE.TXT for details.
7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//===----------------------------------------------------------------------===//
9a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// This file contains the Base ARM implementation of the TargetInstrInfo class.
11a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//===----------------------------------------------------------------------===//
13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMBaseInstrInfo.h"
15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARM.h"
16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMAddressingModes.h"
17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMConstantPoolValue.h"
18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMHazardRecognizer.h"
19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMMachineFunctionInfo.h"
20a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMRegisterInfo.h"
21a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/Constants.h"
22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/Function.h"
23a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/GlobalValue.h"
24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/LiveVariables.h"
25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/MachineConstantPool.h"
26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/MachineFrameInfo.h"
27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/MachineInstrBuilder.h"
28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/MachineJumpTableInfo.h"
29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/MachineMemOperand.h"
30a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h"
31a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/CodeGen/PseudoSourceValue.h"
32a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/MC/MCAsmInfo.h"
33a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/Support/BranchProbability.h"
34a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/Support/CommandLine.h"
35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/Support/Debug.h"
36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/Support/ErrorHandling.h"
37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "llvm/ADT/STLExtras.h"
38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#define GET_INSTRINFO_MC_DESC
40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#define GET_INSTRINFO_CTOR
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "ARMGenInstrInfo.inc"
42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)using namespace llvm;
44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)static cl::opt<bool>
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)               cl::desc("Enable ARM 2-addr to 3-addr conv"));
48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
49a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)/// ARM_MLxEntry - Record information about MLA / MLS instructions.
50a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)struct ARM_MLxEntry {
51a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned MLxOpc;     // MLA / MLS opcode
52a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned MulOpc;     // Expanded multiplication opcode
53a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned AddSubOpc;  // Expanded add / sub opcode
54a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  bool NegAcc;         // True if the acc is negated before the add / sub.
55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  bool HasLane;        // True if instruction has an extra "lane" operand.
56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)};
57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
58a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)static const ARM_MLxEntry ARM_MLxTable[] = {
59a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // fp scalar ops
61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
63a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
65a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
66a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
69a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
70a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // fp SIMD ops
71a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
72a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
73a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
74a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
75a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
76a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
77a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
78a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
79a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)};
80a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
81a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
82a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
83a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    Subtarget(STI) {
84a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
85a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
86a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      assert(false && "Duplicated entries?");
87a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
88a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
89a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
90a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
91a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
92a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
93a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// currently defaults to no prepass hazard recognizer.
94a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ScheduleHazardRecognizer *ARMBaseInstrInfo::
95a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)CreateTargetHazardRecognizer(const TargetMachine *TM,
96a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                             const ScheduleDAG *DAG) const {
97a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (usePreRAHazardRecognizer()) {
98a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const InstrItineraryData *II = TM->getInstrItineraryData();
99a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
100a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
101a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
102a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
103a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
104a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ScheduleHazardRecognizer *ARMBaseInstrInfo::
105a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
106a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                   const ScheduleDAG *DAG) const {
107a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
108a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return (ScheduleHazardRecognizer *)
109a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
110a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
111a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
112a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
113a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)MachineInstr *
114a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
115a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                        MachineBasicBlock::iterator &MBBI,
116a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                        LiveVariables *LV) const {
117a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // FIXME: Thumb2 support.
118a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
119a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (!EnableARM3Addr)
120a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return NULL;
121a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
122a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MachineInstr *MI = MBBI;
123a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MachineFunction &MF = *MI->getParent()->getParent();
124a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  uint64_t TSFlags = MI->getDesc().TSFlags;
125a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  bool isPre = false;
126a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
127a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  default: return NULL;
128a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case ARMII::IndexModePre:
129a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    isPre = true;
130a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    break;
131a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case ARMII::IndexModePost:
132a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    break;
133a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
134a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
135a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
136a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // operation.
137a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
138a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (MemOpc == 0)
139a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return NULL;
140a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
141a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MachineInstr *UpdateMI = NULL;
142a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MachineInstr *MemMI = NULL;
143a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
144a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const MCInstrDesc &MCID = MI->getDesc();
145a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned NumOps = MCID.getNumOperands();
146a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  bool isLoad = !MCID.mayStore();
147a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
148a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const MachineOperand &Base = MI->getOperand(2);
149a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const MachineOperand &Offset = MI->getOperand(NumOps-3);
150a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned WBReg = WB.getReg();
151a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned BaseReg = Base.getReg();
152a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned OffReg = Offset.getReg();
153a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
154a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
155a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  switch (AddrMode) {
156a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  default:
157a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    assert(false && "Unknown indexed op!");
158a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return NULL;
159a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case ARMII::AddrMode2: {
160a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
161a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
162a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (OffReg == 0) {
163a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      if (ARM_AM::getSOImmVal(Amt) == -1)
164a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        // Can't encode it in a so_imm operand. This transformation will
165a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        // add more than 1 instruction. Abandon!
166a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        return NULL;
167a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
168a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
169a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addImm(Amt)
170a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addImm(Pred).addReg(0).addReg(0);
171a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    } else if (Amt != 0) {
172a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
173a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
174a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
175a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
176a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
177a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addImm(Pred).addReg(0).addReg(0);
178a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    } else
179a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
180a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
181a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addReg(OffReg)
182a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addImm(Pred).addReg(0).addReg(0);
183a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    break;
184a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
185a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case ARMII::AddrMode3 : {
186a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
187a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
188a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (OffReg == 0)
189a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
190a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
191a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
192a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addImm(Amt)
193a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addImm(Pred).addReg(0).addReg(0);
194a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    else
195a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
196a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
197a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addReg(OffReg)
198a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addImm(Pred).addReg(0).addReg(0);
199a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    break;
200a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
201a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
202a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
203a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  std::vector<MachineInstr*> NewMIs;
204a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (isPre) {
205a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (isLoad)
206a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      MemMI = BuildMI(MF, MI->getDebugLoc(),
207a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                      get(MemOpc), MI->getOperand(0).getReg())
208a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(WBReg).addImm(0).addImm(Pred);
209a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    else
210a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      MemMI = BuildMI(MF, MI->getDebugLoc(),
211a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
212a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
213a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    NewMIs.push_back(MemMI);
214a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    NewMIs.push_back(UpdateMI);
215a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  } else {
216a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (isLoad)
217a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      MemMI = BuildMI(MF, MI->getDebugLoc(),
218a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                      get(MemOpc), MI->getOperand(0).getReg())
219a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addImm(0).addImm(Pred);
220a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    else
221a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      MemMI = BuildMI(MF, MI->getDebugLoc(),
222a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
223a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
224a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (WB.isDead())
225a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      UpdateMI->getOperand(0).setIsDead();
226a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    NewMIs.push_back(UpdateMI);
227a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    NewMIs.push_back(MemMI);
228a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
229a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
230a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // Transfer LiveVariables states, kill / dead info.
231a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (LV) {
232a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
233a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      MachineOperand &MO = MI->getOperand(i);
234a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
235a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        unsigned Reg = MO.getReg();
236a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
237a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
238a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        if (MO.isDef()) {
239a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
240a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)          if (MO.isDead())
241a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            LV->addVirtualRegisterDead(Reg, NewMI);
242a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        }
243a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        if (MO.isUse() && MO.isKill()) {
244a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)          for (unsigned j = 0; j < 2; ++j) {
245a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            // Look at the two new MI's in reverse order.
246a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            MachineInstr *NewMI = NewMIs[j];
247a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            if (!NewMI->readsRegister(Reg))
248a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)              continue;
249a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            LV->addVirtualRegisterKilled(Reg, NewMI);
250a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            if (VI.removeKill(MI))
251a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)              VI.Kills.push_back(NewMI);
252a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            break;
253a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)          }
254a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        }
255a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      }
256a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    }
257a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
258a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
259a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MFI->insert(MBBI, NewMIs[1]);
260a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MFI->insert(MBBI, NewMIs[0]);
261a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return NewMIs[0];
262a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
263a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
264a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Branch analysis.
265a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool
266a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
267a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                MachineBasicBlock *&FBB,
268a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                SmallVectorImpl<MachineOperand> &Cond,
269a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                bool AllowModify) const {
270a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // If the block has no terminators, it just falls into the block after it.
271a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  MachineBasicBlock::iterator I = MBB.end();
272a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (I == MBB.begin())
273a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return false;
274a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  --I;
275a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  while (I->isDebugValue()) {
276a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (I == MBB.begin())
277a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      return false;
278a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    --I;
279a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
280  if (!isUnpredicatedTerminator(I))
281    return false;
282
283  // Get the last instruction in the block.
284  MachineInstr *LastInst = I;
285
286  // If there is only one terminator instruction, process it.
287  unsigned LastOpc = LastInst->getOpcode();
288  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
289    if (isUncondBranchOpcode(LastOpc)) {
290      TBB = LastInst->getOperand(0).getMBB();
291      return false;
292    }
293    if (isCondBranchOpcode(LastOpc)) {
294      // Block ends with fall-through condbranch.
295      TBB = LastInst->getOperand(0).getMBB();
296      Cond.push_back(LastInst->getOperand(1));
297      Cond.push_back(LastInst->getOperand(2));
298      return false;
299    }
300    return true;  // Can't handle indirect branch.
301  }
302
303  // Get the instruction before it if it is a terminator.
304  MachineInstr *SecondLastInst = I;
305  unsigned SecondLastOpc = SecondLastInst->getOpcode();
306
307  // If AllowModify is true and the block ends with two or more unconditional
308  // branches, delete all but the first unconditional branch.
309  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
310    while (isUncondBranchOpcode(SecondLastOpc)) {
311      LastInst->eraseFromParent();
312      LastInst = SecondLastInst;
313      LastOpc = LastInst->getOpcode();
314      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
315        // Return now the only terminator is an unconditional branch.
316        TBB = LastInst->getOperand(0).getMBB();
317        return false;
318      } else {
319        SecondLastInst = I;
320        SecondLastOpc = SecondLastInst->getOpcode();
321      }
322    }
323  }
324
325  // If there are three terminators, we don't know what sort of block this is.
326  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
327    return true;
328
329  // If the block ends with a B and a Bcc, handle it.
330  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
331    TBB =  SecondLastInst->getOperand(0).getMBB();
332    Cond.push_back(SecondLastInst->getOperand(1));
333    Cond.push_back(SecondLastInst->getOperand(2));
334    FBB = LastInst->getOperand(0).getMBB();
335    return false;
336  }
337
338  // If the block ends with two unconditional branches, handle it.  The second
339  // one is not executed, so remove it.
340  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
341    TBB = SecondLastInst->getOperand(0).getMBB();
342    I = LastInst;
343    if (AllowModify)
344      I->eraseFromParent();
345    return false;
346  }
347
348  // ...likewise if it ends with a branch table followed by an unconditional
349  // branch. The branch folder can create these, and we must get rid of them for
350  // correctness of Thumb constant islands.
351  if ((isJumpTableBranchOpcode(SecondLastOpc) ||
352       isIndirectBranchOpcode(SecondLastOpc)) &&
353      isUncondBranchOpcode(LastOpc)) {
354    I = LastInst;
355    if (AllowModify)
356      I->eraseFromParent();
357    return true;
358  }
359
360  // Otherwise, can't handle this.
361  return true;
362}
363
364
365unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
366  MachineBasicBlock::iterator I = MBB.end();
367  if (I == MBB.begin()) return 0;
368  --I;
369  while (I->isDebugValue()) {
370    if (I == MBB.begin())
371      return 0;
372    --I;
373  }
374  if (!isUncondBranchOpcode(I->getOpcode()) &&
375      !isCondBranchOpcode(I->getOpcode()))
376    return 0;
377
378  // Remove the branch.
379  I->eraseFromParent();
380
381  I = MBB.end();
382
383  if (I == MBB.begin()) return 1;
384  --I;
385  if (!isCondBranchOpcode(I->getOpcode()))
386    return 1;
387
388  // Remove the branch.
389  I->eraseFromParent();
390  return 2;
391}
392
393unsigned
394ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
395                               MachineBasicBlock *FBB,
396                               const SmallVectorImpl<MachineOperand> &Cond,
397                               DebugLoc DL) const {
398  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
399  int BOpc   = !AFI->isThumbFunction()
400    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
401  int BccOpc = !AFI->isThumbFunction()
402    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
403
404  // Shouldn't be a fall through.
405  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
406  assert((Cond.size() == 2 || Cond.size() == 0) &&
407         "ARM branch conditions have two components!");
408
409  if (FBB == 0) {
410    if (Cond.empty()) // Unconditional branch?
411      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
412    else
413      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
414        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
415    return 1;
416  }
417
418  // Two-way conditional branch.
419  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
420    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
421  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
422  return 2;
423}
424
425bool ARMBaseInstrInfo::
426ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
427  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
428  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
429  return false;
430}
431
432bool ARMBaseInstrInfo::
433PredicateInstruction(MachineInstr *MI,
434                     const SmallVectorImpl<MachineOperand> &Pred) const {
435  unsigned Opc = MI->getOpcode();
436  if (isUncondBranchOpcode(Opc)) {
437    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
438    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
439    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
440    return true;
441  }
442
443  int PIdx = MI->findFirstPredOperandIdx();
444  if (PIdx != -1) {
445    MachineOperand &PMO = MI->getOperand(PIdx);
446    PMO.setImm(Pred[0].getImm());
447    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
448    return true;
449  }
450  return false;
451}
452
453bool ARMBaseInstrInfo::
454SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
455                  const SmallVectorImpl<MachineOperand> &Pred2) const {
456  if (Pred1.size() > 2 || Pred2.size() > 2)
457    return false;
458
459  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
460  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
461  if (CC1 == CC2)
462    return true;
463
464  switch (CC1) {
465  default:
466    return false;
467  case ARMCC::AL:
468    return true;
469  case ARMCC::HS:
470    return CC2 == ARMCC::HI;
471  case ARMCC::LS:
472    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
473  case ARMCC::GE:
474    return CC2 == ARMCC::GT;
475  case ARMCC::LE:
476    return CC2 == ARMCC::LT;
477  }
478}
479
480bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
481                                    std::vector<MachineOperand> &Pred) const {
482  // FIXME: This confuses implicit_def with optional CPSR def.
483  const MCInstrDesc &MCID = MI->getDesc();
484  if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
485    return false;
486
487  bool Found = false;
488  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
489    const MachineOperand &MO = MI->getOperand(i);
490    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
491      Pred.push_back(MO);
492      Found = true;
493    }
494  }
495
496  return Found;
497}
498
499/// isPredicable - Return true if the specified instruction can be predicated.
500/// By default, this returns true for every instruction with a
501/// PredicateOperand.
502bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
503  const MCInstrDesc &MCID = MI->getDesc();
504  if (!MCID.isPredicable())
505    return false;
506
507  if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
508    ARMFunctionInfo *AFI =
509      MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
510    return AFI->isThumb2Function();
511  }
512  return true;
513}
514
515/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
516LLVM_ATTRIBUTE_NOINLINE
517static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
518                                unsigned JTI);
519static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
520                                unsigned JTI) {
521  assert(JTI < JT.size());
522  return JT[JTI].MBBs.size();
523}
524
525/// GetInstSize - Return the size of the specified MachineInstr.
526///
527unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
528  const MachineBasicBlock &MBB = *MI->getParent();
529  const MachineFunction *MF = MBB.getParent();
530  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
531
532  const MCInstrDesc &MCID = MI->getDesc();
533  if (MCID.getSize())
534    return MCID.getSize();
535
536    // If this machine instr is an inline asm, measure it.
537    if (MI->getOpcode() == ARM::INLINEASM)
538      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
539    if (MI->isLabel())
540      return 0;
541  unsigned Opc = MI->getOpcode();
542    switch (Opc) {
543    case TargetOpcode::IMPLICIT_DEF:
544    case TargetOpcode::KILL:
545    case TargetOpcode::PROLOG_LABEL:
546    case TargetOpcode::EH_LABEL:
547    case TargetOpcode::DBG_VALUE:
548      return 0;
549    case ARM::MOVi16_ga_pcrel:
550    case ARM::MOVTi16_ga_pcrel:
551    case ARM::t2MOVi16_ga_pcrel:
552    case ARM::t2MOVTi16_ga_pcrel:
553      return 4;
554    case ARM::MOVi32imm:
555    case ARM::t2MOVi32imm:
556      return 8;
557    case ARM::CONSTPOOL_ENTRY:
558      // If this machine instr is a constant pool entry, its size is recorded as
559      // operand #2.
560      return MI->getOperand(2).getImm();
561    case ARM::Int_eh_sjlj_longjmp:
562      return 16;
563    case ARM::tInt_eh_sjlj_longjmp:
564      return 10;
565    case ARM::Int_eh_sjlj_setjmp:
566    case ARM::Int_eh_sjlj_setjmp_nofp:
567      return 20;
568    case ARM::tInt_eh_sjlj_setjmp:
569    case ARM::t2Int_eh_sjlj_setjmp:
570    case ARM::t2Int_eh_sjlj_setjmp_nofp:
571      return 12;
572    case ARM::BR_JTr:
573    case ARM::BR_JTm:
574    case ARM::BR_JTadd:
575    case ARM::tBR_JTr:
576    case ARM::t2BR_JT:
577    case ARM::t2TBB_JT:
578    case ARM::t2TBH_JT: {
579      // These are jumptable branches, i.e. a branch followed by an inlined
580      // jumptable. The size is 4 + 4 * number of entries. For TBB, each
581      // entry is one byte; TBH two byte each.
582      unsigned EntrySize = (Opc == ARM::t2TBB_JT)
583        ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
584      unsigned NumOps = MCID.getNumOperands();
585      MachineOperand JTOP =
586        MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
587      unsigned JTI = JTOP.getIndex();
588      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
589      assert(MJTI != 0);
590      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
591      assert(JTI < JT.size());
592      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
593      // 4 aligned. The assembler / linker may add 2 byte padding just before
594      // the JT entries.  The size does not include this padding; the
595      // constant islands pass does separate bookkeeping for it.
596      // FIXME: If we know the size of the function is less than (1 << 16) *2
597      // bytes, we can use 16-bit entries instead. Then there won't be an
598      // alignment issue.
599      unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
600      unsigned NumEntries = getNumJTEntries(JT, JTI);
601      if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
602        // Make sure the instruction that follows TBB is 2-byte aligned.
603        // FIXME: Constant island pass should insert an "ALIGN" instruction
604        // instead.
605        ++NumEntries;
606      return NumEntries * EntrySize + InstSize;
607    }
608    default:
609      // Otherwise, pseudo-instruction sizes are zero.
610      return 0;
611    }
612  return 0; // Not reached
613}
614
615void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
616                                   MachineBasicBlock::iterator I, DebugLoc DL,
617                                   unsigned DestReg, unsigned SrcReg,
618                                   bool KillSrc) const {
619  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
620  bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
621
622  if (GPRDest && GPRSrc) {
623    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
624                                  .addReg(SrcReg, getKillRegState(KillSrc))));
625    return;
626  }
627
628  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
629  bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
630
631  unsigned Opc;
632  if (SPRDest && SPRSrc)
633    Opc = ARM::VMOVS;
634  else if (GPRDest && SPRSrc)
635    Opc = ARM::VMOVRS;
636  else if (SPRDest && GPRSrc)
637    Opc = ARM::VMOVSR;
638  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
639    Opc = ARM::VMOVD;
640  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
641    Opc = ARM::VMOVQ;
642  else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
643    Opc = ARM::VMOVQQ;
644  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
645    Opc = ARM::VMOVQQQQ;
646  else
647    llvm_unreachable("Impossible reg-to-reg copy");
648
649  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
650  MIB.addReg(SrcReg, getKillRegState(KillSrc));
651  if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
652    AddDefaultPred(MIB);
653}
654
655static const
656MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
657                             unsigned Reg, unsigned SubIdx, unsigned State,
658                             const TargetRegisterInfo *TRI) {
659  if (!SubIdx)
660    return MIB.addReg(Reg, State);
661
662  if (TargetRegisterInfo::isPhysicalRegister(Reg))
663    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
664  return MIB.addReg(Reg, State, SubIdx);
665}
666
667void ARMBaseInstrInfo::
668storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
669                    unsigned SrcReg, bool isKill, int FI,
670                    const TargetRegisterClass *RC,
671                    const TargetRegisterInfo *TRI) const {
672  DebugLoc DL;
673  if (I != MBB.end()) DL = I->getDebugLoc();
674  MachineFunction &MF = *MBB.getParent();
675  MachineFrameInfo &MFI = *MF.getFrameInfo();
676  unsigned Align = MFI.getObjectAlignment(FI);
677
678  MachineMemOperand *MMO =
679    MF.getMachineMemOperand(MachinePointerInfo(
680                                         PseudoSourceValue::getFixedStack(FI)),
681                            MachineMemOperand::MOStore,
682                            MFI.getObjectSize(FI),
683                            Align);
684
685  // tGPR is used sometimes in ARM instructions that need to avoid using
686  // certain registers.  Just treat it as GPR here. Likewise, rGPR.
687  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
688      || RC == ARM::rGPRRegisterClass)
689    RC = ARM::GPRRegisterClass;
690
691  switch (RC->getID()) {
692  case ARM::GPRRegClassID:
693    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
694                   .addReg(SrcReg, getKillRegState(isKill))
695                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
696    break;
697  case ARM::SPRRegClassID:
698    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
699                   .addReg(SrcReg, getKillRegState(isKill))
700                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
701    break;
702  case ARM::DPRRegClassID:
703  case ARM::DPR_VFP2RegClassID:
704  case ARM::DPR_8RegClassID:
705    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
706                   .addReg(SrcReg, getKillRegState(isKill))
707                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
708    break;
709  case ARM::QPRRegClassID:
710  case ARM::QPR_VFP2RegClassID:
711  case ARM::QPR_8RegClassID:
712    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
713      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
714                     .addFrameIndex(FI).addImm(16)
715                     .addReg(SrcReg, getKillRegState(isKill))
716                     .addMemOperand(MMO));
717    } else {
718      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
719                     .addReg(SrcReg, getKillRegState(isKill))
720                     .addFrameIndex(FI)
721                     .addMemOperand(MMO));
722    }
723    break;
724  case ARM::QQPRRegClassID:
725  case ARM::QQPR_VFP2RegClassID:
726    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
727      // FIXME: It's possible to only store part of the QQ register if the
728      // spilled def has a sub-register index.
729      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
730                     .addFrameIndex(FI).addImm(16)
731                     .addReg(SrcReg, getKillRegState(isKill))
732                     .addMemOperand(MMO));
733    } else {
734      MachineInstrBuilder MIB =
735        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
736                       .addFrameIndex(FI))
737        .addMemOperand(MMO);
738      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
739      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
740      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
741            AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
742    }
743    break;
744  case ARM::QQQQPRRegClassID: {
745    MachineInstrBuilder MIB =
746      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
747                     .addFrameIndex(FI))
748      .addMemOperand(MMO);
749    MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
750    MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
751    MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
752    MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
753    MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
754    MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
755    MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
756          AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
757    break;
758  }
759  default:
760    llvm_unreachable("Unknown regclass!");
761  }
762}
763
764unsigned
765ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
766                                     int &FrameIndex) const {
767  switch (MI->getOpcode()) {
768  default: break;
769  case ARM::STRrs:
770  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
771    if (MI->getOperand(1).isFI() &&
772        MI->getOperand(2).isReg() &&
773        MI->getOperand(3).isImm() &&
774        MI->getOperand(2).getReg() == 0 &&
775        MI->getOperand(3).getImm() == 0) {
776      FrameIndex = MI->getOperand(1).getIndex();
777      return MI->getOperand(0).getReg();
778    }
779    break;
780  case ARM::STRi12:
781  case ARM::t2STRi12:
782  case ARM::tSTRspi:
783  case ARM::VSTRD:
784  case ARM::VSTRS:
785    if (MI->getOperand(1).isFI() &&
786        MI->getOperand(2).isImm() &&
787        MI->getOperand(2).getImm() == 0) {
788      FrameIndex = MI->getOperand(1).getIndex();
789      return MI->getOperand(0).getReg();
790    }
791    break;
792  case ARM::VST1q64Pseudo:
793    if (MI->getOperand(0).isFI() &&
794        MI->getOperand(2).getSubReg() == 0) {
795      FrameIndex = MI->getOperand(0).getIndex();
796      return MI->getOperand(2).getReg();
797    }
798    break;
799  case ARM::VSTMQIA:
800    if (MI->getOperand(1).isFI() &&
801        MI->getOperand(0).getSubReg() == 0) {
802      FrameIndex = MI->getOperand(1).getIndex();
803      return MI->getOperand(0).getReg();
804    }
805    break;
806  }
807
808  return 0;
809}
810
811void ARMBaseInstrInfo::
812loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
813                     unsigned DestReg, int FI,
814                     const TargetRegisterClass *RC,
815                     const TargetRegisterInfo *TRI) const {
816  DebugLoc DL;
817  if (I != MBB.end()) DL = I->getDebugLoc();
818  MachineFunction &MF = *MBB.getParent();
819  MachineFrameInfo &MFI = *MF.getFrameInfo();
820  unsigned Align = MFI.getObjectAlignment(FI);
821  MachineMemOperand *MMO =
822    MF.getMachineMemOperand(
823                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
824                            MachineMemOperand::MOLoad,
825                            MFI.getObjectSize(FI),
826                            Align);
827
828  // tGPR is used sometimes in ARM instructions that need to avoid using
829  // certain registers.  Just treat it as GPR here.
830  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
831      || RC == ARM::rGPRRegisterClass)
832    RC = ARM::GPRRegisterClass;
833
834  switch (RC->getID()) {
835  case ARM::GPRRegClassID:
836    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
837                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
838    break;
839  case ARM::SPRRegClassID:
840    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
841                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
842    break;
843  case ARM::DPRRegClassID:
844  case ARM::DPR_VFP2RegClassID:
845  case ARM::DPR_8RegClassID:
846    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
847                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
848    break;
849  case ARM::QPRRegClassID:
850  case ARM::QPR_VFP2RegClassID:
851  case ARM::QPR_8RegClassID:
852    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
853      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
854                     .addFrameIndex(FI).addImm(16)
855                     .addMemOperand(MMO));
856    } else {
857      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
858                     .addFrameIndex(FI)
859                     .addMemOperand(MMO));
860    }
861    break;
862  case ARM::QQPRRegClassID:
863  case ARM::QQPR_VFP2RegClassID:
864    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
865      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
866                     .addFrameIndex(FI).addImm(16)
867                     .addMemOperand(MMO));
868    } else {
869      MachineInstrBuilder MIB =
870        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
871                       .addFrameIndex(FI))
872        .addMemOperand(MMO);
873      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
874      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
875      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
876            AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
877    }
878    break;
879  case ARM::QQQQPRRegClassID: {
880    MachineInstrBuilder MIB =
881      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
882                     .addFrameIndex(FI))
883      .addMemOperand(MMO);
884    MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
885    MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
886    MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
887    MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
888    MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
889    MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
890    MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
891    AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
892    break;
893  }
894  default:
895    llvm_unreachable("Unknown regclass!");
896  }
897}
898
899unsigned
900ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
901                                      int &FrameIndex) const {
902  switch (MI->getOpcode()) {
903  default: break;
904  case ARM::LDRrs:
905  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
906    if (MI->getOperand(1).isFI() &&
907        MI->getOperand(2).isReg() &&
908        MI->getOperand(3).isImm() &&
909        MI->getOperand(2).getReg() == 0 &&
910        MI->getOperand(3).getImm() == 0) {
911      FrameIndex = MI->getOperand(1).getIndex();
912      return MI->getOperand(0).getReg();
913    }
914    break;
915  case ARM::LDRi12:
916  case ARM::t2LDRi12:
917  case ARM::tLDRspi:
918  case ARM::VLDRD:
919  case ARM::VLDRS:
920    if (MI->getOperand(1).isFI() &&
921        MI->getOperand(2).isImm() &&
922        MI->getOperand(2).getImm() == 0) {
923      FrameIndex = MI->getOperand(1).getIndex();
924      return MI->getOperand(0).getReg();
925    }
926    break;
927  case ARM::VLD1q64Pseudo:
928    if (MI->getOperand(1).isFI() &&
929        MI->getOperand(0).getSubReg() == 0) {
930      FrameIndex = MI->getOperand(1).getIndex();
931      return MI->getOperand(0).getReg();
932    }
933    break;
934  case ARM::VLDMQIA:
935    if (MI->getOperand(1).isFI() &&
936        MI->getOperand(0).getSubReg() == 0) {
937      FrameIndex = MI->getOperand(1).getIndex();
938      return MI->getOperand(0).getReg();
939    }
940    break;
941  }
942
943  return 0;
944}
945
946MachineInstr*
947ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
948                                           int FrameIx, uint64_t Offset,
949                                           const MDNode *MDPtr,
950                                           DebugLoc DL) const {
951  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
952    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
953  return &*MIB;
954}
955
956/// Create a copy of a const pool value. Update CPI to the new index and return
957/// the label UID.
958static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
959  MachineConstantPool *MCP = MF.getConstantPool();
960  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
961
962  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
963  assert(MCPE.isMachineConstantPoolEntry() &&
964         "Expecting a machine constantpool entry!");
965  ARMConstantPoolValue *ACPV =
966    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
967
968  unsigned PCLabelId = AFI->createPICLabelUId();
969  ARMConstantPoolValue *NewCPV = 0;
970  // FIXME: The below assumes PIC relocation model and that the function
971  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
972  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
973  // instructions, so that's probably OK, but is PIC always correct when
974  // we get here?
975  if (ACPV->isGlobalValue())
976    NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
977                                      ARMCP::CPValue, 4);
978  else if (ACPV->isExtSymbol())
979    NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
980                                      ACPV->getSymbol(), PCLabelId, 4);
981  else if (ACPV->isBlockAddress())
982    NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
983                                      ARMCP::CPBlockAddress, 4);
984  else if (ACPV->isLSDA())
985    NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
986                                      ARMCP::CPLSDA, 4);
987  else
988    llvm_unreachable("Unexpected ARM constantpool value type!!");
989  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
990  return PCLabelId;
991}
992
993void ARMBaseInstrInfo::
994reMaterialize(MachineBasicBlock &MBB,
995              MachineBasicBlock::iterator I,
996              unsigned DestReg, unsigned SubIdx,
997              const MachineInstr *Orig,
998              const TargetRegisterInfo &TRI) const {
999  unsigned Opcode = Orig->getOpcode();
1000  switch (Opcode) {
1001  default: {
1002    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
1003    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
1004    MBB.insert(I, MI);
1005    break;
1006  }
1007  case ARM::tLDRpci_pic:
1008  case ARM::t2LDRpci_pic: {
1009    MachineFunction &MF = *MBB.getParent();
1010    unsigned CPI = Orig->getOperand(1).getIndex();
1011    unsigned PCLabelId = duplicateCPV(MF, CPI);
1012    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
1013                                      DestReg)
1014      .addConstantPoolIndex(CPI).addImm(PCLabelId);
1015    MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
1016    break;
1017  }
1018  }
1019}
1020
1021MachineInstr *
1022ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
1023  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
1024  switch(Orig->getOpcode()) {
1025  case ARM::tLDRpci_pic:
1026  case ARM::t2LDRpci_pic: {
1027    unsigned CPI = Orig->getOperand(1).getIndex();
1028    unsigned PCLabelId = duplicateCPV(MF, CPI);
1029    Orig->getOperand(1).setIndex(CPI);
1030    Orig->getOperand(2).setImm(PCLabelId);
1031    break;
1032  }
1033  }
1034  return MI;
1035}
1036
1037bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
1038                                        const MachineInstr *MI1,
1039                                        const MachineRegisterInfo *MRI) const {
1040  int Opcode = MI0->getOpcode();
1041  if (Opcode == ARM::t2LDRpci ||
1042      Opcode == ARM::t2LDRpci_pic ||
1043      Opcode == ARM::tLDRpci ||
1044      Opcode == ARM::tLDRpci_pic ||
1045      Opcode == ARM::MOV_ga_dyn ||
1046      Opcode == ARM::MOV_ga_pcrel ||
1047      Opcode == ARM::MOV_ga_pcrel_ldr ||
1048      Opcode == ARM::t2MOV_ga_dyn ||
1049      Opcode == ARM::t2MOV_ga_pcrel) {
1050    if (MI1->getOpcode() != Opcode)
1051      return false;
1052    if (MI0->getNumOperands() != MI1->getNumOperands())
1053      return false;
1054
1055    const MachineOperand &MO0 = MI0->getOperand(1);
1056    const MachineOperand &MO1 = MI1->getOperand(1);
1057    if (MO0.getOffset() != MO1.getOffset())
1058      return false;
1059
1060    if (Opcode == ARM::MOV_ga_dyn ||
1061        Opcode == ARM::MOV_ga_pcrel ||
1062        Opcode == ARM::MOV_ga_pcrel_ldr ||
1063        Opcode == ARM::t2MOV_ga_dyn ||
1064        Opcode == ARM::t2MOV_ga_pcrel)
1065      // Ignore the PC labels.
1066      return MO0.getGlobal() == MO1.getGlobal();
1067
1068    const MachineFunction *MF = MI0->getParent()->getParent();
1069    const MachineConstantPool *MCP = MF->getConstantPool();
1070    int CPI0 = MO0.getIndex();
1071    int CPI1 = MO1.getIndex();
1072    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1073    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1074    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1075    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1076    if (isARMCP0 && isARMCP1) {
1077      ARMConstantPoolValue *ACPV0 =
1078        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1079      ARMConstantPoolValue *ACPV1 =
1080        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1081      return ACPV0->hasSameValue(ACPV1);
1082    } else if (!isARMCP0 && !isARMCP1) {
1083      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1084    }
1085    return false;
1086  } else if (Opcode == ARM::PICLDR) {
1087    if (MI1->getOpcode() != Opcode)
1088      return false;
1089    if (MI0->getNumOperands() != MI1->getNumOperands())
1090      return false;
1091
1092    unsigned Addr0 = MI0->getOperand(1).getReg();
1093    unsigned Addr1 = MI1->getOperand(1).getReg();
1094    if (Addr0 != Addr1) {
1095      if (!MRI ||
1096          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
1097          !TargetRegisterInfo::isVirtualRegister(Addr1))
1098        return false;
1099
1100      // This assumes SSA form.
1101      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1102      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1103      // Check if the loaded value, e.g. a constantpool of a global address, are
1104      // the same.
1105      if (!produceSameValue(Def0, Def1, MRI))
1106        return false;
1107    }
1108
1109    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
1110      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
1111      const MachineOperand &MO0 = MI0->getOperand(i);
1112      const MachineOperand &MO1 = MI1->getOperand(i);
1113      if (!MO0.isIdenticalTo(MO1))
1114        return false;
1115    }
1116    return true;
1117  }
1118
1119  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1120}
1121
1122/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1123/// determine if two loads are loading from the same base address. It should
1124/// only return true if the base pointers are the same and the only differences
1125/// between the two addresses is the offset. It also returns the offsets by
1126/// reference.
1127bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1128                                               int64_t &Offset1,
1129                                               int64_t &Offset2) const {
1130  // Don't worry about Thumb: just ARM and Thumb2.
1131  if (Subtarget.isThumb1Only()) return false;
1132
1133  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1134    return false;
1135
1136  switch (Load1->getMachineOpcode()) {
1137  default:
1138    return false;
1139  case ARM::LDRi12:
1140  case ARM::LDRBi12:
1141  case ARM::LDRD:
1142  case ARM::LDRH:
1143  case ARM::LDRSB:
1144  case ARM::LDRSH:
1145  case ARM::VLDRD:
1146  case ARM::VLDRS:
1147  case ARM::t2LDRi8:
1148  case ARM::t2LDRDi8:
1149  case ARM::t2LDRSHi8:
1150  case ARM::t2LDRi12:
1151  case ARM::t2LDRSHi12:
1152    break;
1153  }
1154
1155  switch (Load2->getMachineOpcode()) {
1156  default:
1157    return false;
1158  case ARM::LDRi12:
1159  case ARM::LDRBi12:
1160  case ARM::LDRD:
1161  case ARM::LDRH:
1162  case ARM::LDRSB:
1163  case ARM::LDRSH:
1164  case ARM::VLDRD:
1165  case ARM::VLDRS:
1166  case ARM::t2LDRi8:
1167  case ARM::t2LDRDi8:
1168  case ARM::t2LDRSHi8:
1169  case ARM::t2LDRi12:
1170  case ARM::t2LDRSHi12:
1171    break;
1172  }
1173
1174  // Check if base addresses and chain operands match.
1175  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1176      Load1->getOperand(4) != Load2->getOperand(4))
1177    return false;
1178
1179  // Index should be Reg0.
1180  if (Load1->getOperand(3) != Load2->getOperand(3))
1181    return false;
1182
1183  // Determine the offsets.
1184  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1185      isa<ConstantSDNode>(Load2->getOperand(1))) {
1186    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1187    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1188    return true;
1189  }
1190
1191  return false;
1192}
1193
1194/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1195/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1196/// be scheduled togther. On some targets if two loads are loading from
1197/// addresses in the same cache line, it's better if they are scheduled
1198/// together. This function takes two integers that represent the load offsets
1199/// from the common base address. It returns true if it decides it's desirable
1200/// to schedule the two loads together. "NumLoads" is the number of loads that
1201/// have already been scheduled after Load1.
1202bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1203                                               int64_t Offset1, int64_t Offset2,
1204                                               unsigned NumLoads) const {
1205  // Don't worry about Thumb: just ARM and Thumb2.
1206  if (Subtarget.isThumb1Only()) return false;
1207
1208  assert(Offset2 > Offset1);
1209
1210  if ((Offset2 - Offset1) / 8 > 64)
1211    return false;
1212
1213  if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
1214    return false;  // FIXME: overly conservative?
1215
1216  // Four loads in a row should be sufficient.
1217  if (NumLoads >= 3)
1218    return false;
1219
1220  return true;
1221}
1222
1223bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
1224                                            const MachineBasicBlock *MBB,
1225                                            const MachineFunction &MF) const {
1226  // Debug info is never a scheduling boundary. It's necessary to be explicit
1227  // due to the special treatment of IT instructions below, otherwise a
1228  // dbg_value followed by an IT will result in the IT instruction being
1229  // considered a scheduling hazard, which is wrong. It should be the actual
1230  // instruction preceding the dbg_value instruction(s), just like it is
1231  // when debug info is not present.
1232  if (MI->isDebugValue())
1233    return false;
1234
1235  // Terminators and labels can't be scheduled around.
1236  if (MI->getDesc().isTerminator() || MI->isLabel())
1237    return true;
1238
1239  // Treat the start of the IT block as a scheduling boundary, but schedule
1240  // t2IT along with all instructions following it.
1241  // FIXME: This is a big hammer. But the alternative is to add all potential
1242  // true and anti dependencies to IT block instructions as implicit operands
1243  // to the t2IT instruction. The added compile time and complexity does not
1244  // seem worth it.
1245  MachineBasicBlock::const_iterator I = MI;
1246  // Make sure to skip any dbg_value instructions
1247  while (++I != MBB->end() && I->isDebugValue())
1248    ;
1249  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1250    return true;
1251
1252  // Don't attempt to schedule around any instruction that defines
1253  // a stack-oriented pointer, as it's unlikely to be profitable. This
1254  // saves compile time, because it doesn't require every single
1255  // stack slot reference to depend on the instruction that does the
1256  // modification.
1257  if (MI->definesRegister(ARM::SP))
1258    return true;
1259
1260  return false;
1261}
1262
1263bool ARMBaseInstrInfo::
1264isProfitableToIfCvt(MachineBasicBlock &MBB,
1265                    unsigned NumCycles, unsigned ExtraPredCycles,
1266                    const BranchProbability &Probability) const {
1267  if (!NumCycles)
1268    return false;
1269
1270  // Attempt to estimate the relative costs of predication versus branching.
1271  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
1272  UnpredCost /= Probability.getDenominator();
1273  UnpredCost += 1; // The branch itself
1274  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1275
1276  return (NumCycles + ExtraPredCycles) <= UnpredCost;
1277}
1278
1279bool ARMBaseInstrInfo::
1280isProfitableToIfCvt(MachineBasicBlock &TMBB,
1281                    unsigned TCycles, unsigned TExtra,
1282                    MachineBasicBlock &FMBB,
1283                    unsigned FCycles, unsigned FExtra,
1284                    const BranchProbability &Probability) const {
1285  if (!TCycles || !FCycles)
1286    return false;
1287
1288  // Attempt to estimate the relative costs of predication versus branching.
1289  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
1290  TUnpredCost /= Probability.getDenominator();
1291
1292  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
1293  unsigned FUnpredCost = Comp * FCycles;
1294  FUnpredCost /= Probability.getDenominator();
1295
1296  unsigned UnpredCost = TUnpredCost + FUnpredCost;
1297  UnpredCost += 1; // The branch itself
1298  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1299
1300  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
1301}
1302
1303/// getInstrPredicate - If instruction is predicated, returns its predicate
1304/// condition, otherwise returns AL. It also returns the condition code
1305/// register by reference.
1306ARMCC::CondCodes
1307llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
1308  int PIdx = MI->findFirstPredOperandIdx();
1309  if (PIdx == -1) {
1310    PredReg = 0;
1311    return ARMCC::AL;
1312  }
1313
1314  PredReg = MI->getOperand(PIdx+1).getReg();
1315  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
1316}
1317
1318
1319int llvm::getMatchingCondBranchOpcode(int Opc) {
1320  if (Opc == ARM::B)
1321    return ARM::Bcc;
1322  else if (Opc == ARM::tB)
1323    return ARM::tBcc;
1324  else if (Opc == ARM::t2B)
1325      return ARM::t2Bcc;
1326
1327  llvm_unreachable("Unknown unconditional branch opcode!");
1328  return 0;
1329}
1330
1331
1332void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
1333                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
1334                               unsigned DestReg, unsigned BaseReg, int NumBytes,
1335                               ARMCC::CondCodes Pred, unsigned PredReg,
1336                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
1337  bool isSub = NumBytes < 0;
1338  if (isSub) NumBytes = -NumBytes;
1339
1340  while (NumBytes) {
1341    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
1342    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
1343    assert(ThisVal && "Didn't extract field correctly");
1344
1345    // We will handle these bits from offset, clear them.
1346    NumBytes &= ~ThisVal;
1347
1348    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
1349
1350    // Build the new ADD / SUB.
1351    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
1352    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
1353      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
1354      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
1355      .setMIFlags(MIFlags);
1356    BaseReg = DestReg;
1357  }
1358}
1359
1360bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
1361                                unsigned FrameReg, int &Offset,
1362                                const ARMBaseInstrInfo &TII) {
1363  unsigned Opcode = MI.getOpcode();
1364  const MCInstrDesc &Desc = MI.getDesc();
1365  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
1366  bool isSub = false;
1367
1368  // Memory operands in inline assembly always use AddrMode2.
1369  if (Opcode == ARM::INLINEASM)
1370    AddrMode = ARMII::AddrMode2;
1371
1372  if (Opcode == ARM::ADDri) {
1373    Offset += MI.getOperand(FrameRegIdx+1).getImm();
1374    if (Offset == 0) {
1375      // Turn it into a move.
1376      MI.setDesc(TII.get(ARM::MOVr));
1377      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1378      MI.RemoveOperand(FrameRegIdx+1);
1379      Offset = 0;
1380      return true;
1381    } else if (Offset < 0) {
1382      Offset = -Offset;
1383      isSub = true;
1384      MI.setDesc(TII.get(ARM::SUBri));
1385    }
1386
1387    // Common case: small offset, fits into instruction.
1388    if (ARM_AM::getSOImmVal(Offset) != -1) {
1389      // Replace the FrameIndex with sp / fp
1390      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1391      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
1392      Offset = 0;
1393      return true;
1394    }
1395
1396    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
1397    // as possible.
1398    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
1399    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
1400
1401    // We will handle these bits from offset, clear them.
1402    Offset &= ~ThisImmVal;
1403
1404    // Get the properly encoded SOImmVal field.
1405    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
1406           "Bit extraction didn't work?");
1407    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
1408 } else {
1409    unsigned ImmIdx = 0;
1410    int InstrOffs = 0;
1411    unsigned NumBits = 0;
1412    unsigned Scale = 1;
1413    switch (AddrMode) {
1414    case ARMII::AddrMode_i12: {
1415      ImmIdx = FrameRegIdx + 1;
1416      InstrOffs = MI.getOperand(ImmIdx).getImm();
1417      NumBits = 12;
1418      break;
1419    }
1420    case ARMII::AddrMode2: {
1421      ImmIdx = FrameRegIdx+2;
1422      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
1423      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1424        InstrOffs *= -1;
1425      NumBits = 12;
1426      break;
1427    }
1428    case ARMII::AddrMode3: {
1429      ImmIdx = FrameRegIdx+2;
1430      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
1431      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1432        InstrOffs *= -1;
1433      NumBits = 8;
1434      break;
1435    }
1436    case ARMII::AddrMode4:
1437    case ARMII::AddrMode6:
1438      // Can't fold any offset even if it's zero.
1439      return false;
1440    case ARMII::AddrMode5: {
1441      ImmIdx = FrameRegIdx+1;
1442      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
1443      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
1444        InstrOffs *= -1;
1445      NumBits = 8;
1446      Scale = 4;
1447      break;
1448    }
1449    default:
1450      llvm_unreachable("Unsupported addressing mode!");
1451      break;
1452    }
1453
1454    Offset += InstrOffs * Scale;
1455    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
1456    if (Offset < 0) {
1457      Offset = -Offset;
1458      isSub = true;
1459    }
1460
1461    // Attempt to fold address comp. if opcode has offset bits
1462    if (NumBits > 0) {
1463      // Common case: small offset, fits into instruction.
1464      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
1465      int ImmedOffset = Offset / Scale;
1466      unsigned Mask = (1 << NumBits) - 1;
1467      if ((unsigned)Offset <= Mask * Scale) {
1468        // Replace the FrameIndex with sp
1469        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1470        // FIXME: When addrmode2 goes away, this will simplify (like the
1471        // T2 version), as the LDR.i12 versions don't need the encoding
1472        // tricks for the offset value.
1473        if (isSub) {
1474          if (AddrMode == ARMII::AddrMode_i12)
1475            ImmedOffset = -ImmedOffset;
1476          else
1477            ImmedOffset |= 1 << NumBits;
1478        }
1479        ImmOp.ChangeToImmediate(ImmedOffset);
1480        Offset = 0;
1481        return true;
1482      }
1483
1484      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
1485      ImmedOffset = ImmedOffset & Mask;
1486      if (isSub) {
1487        if (AddrMode == ARMII::AddrMode_i12)
1488          ImmedOffset = -ImmedOffset;
1489        else
1490          ImmedOffset |= 1 << NumBits;
1491      }
1492      ImmOp.ChangeToImmediate(ImmedOffset);
1493      Offset &= ~(Mask*Scale);
1494    }
1495  }
1496
1497  Offset = (isSub) ? -Offset : Offset;
1498  return Offset == 0;
1499}
1500
1501bool ARMBaseInstrInfo::
1502AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
1503               int &CmpValue) const {
1504  switch (MI->getOpcode()) {
1505  default: break;
1506  case ARM::CMPri:
1507  case ARM::t2CMPri:
1508    SrcReg = MI->getOperand(0).getReg();
1509    CmpMask = ~0;
1510    CmpValue = MI->getOperand(1).getImm();
1511    return true;
1512  case ARM::TSTri:
1513  case ARM::t2TSTri:
1514    SrcReg = MI->getOperand(0).getReg();
1515    CmpMask = MI->getOperand(1).getImm();
1516    CmpValue = 0;
1517    return true;
1518  }
1519
1520  return false;
1521}
1522
1523/// isSuitableForMask - Identify a suitable 'and' instruction that
1524/// operates on the given source register and applies the same mask
1525/// as a 'tst' instruction. Provide a limited look-through for copies.
1526/// When successful, MI will hold the found instruction.
1527static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
1528                              int CmpMask, bool CommonUse) {
1529  switch (MI->getOpcode()) {
1530    case ARM::ANDri:
1531    case ARM::t2ANDri:
1532      if (CmpMask != MI->getOperand(2).getImm())
1533        return false;
1534      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
1535        return true;
1536      break;
1537    case ARM::COPY: {
1538      // Walk down one instruction which is potentially an 'and'.
1539      const MachineInstr &Copy = *MI;
1540      MachineBasicBlock::iterator AND(
1541        llvm::next(MachineBasicBlock::iterator(MI)));
1542      if (AND == MI->getParent()->end()) return false;
1543      MI = AND;
1544      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
1545                               CmpMask, true);
1546    }
1547  }
1548
1549  return false;
1550}
1551
1552/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
1553/// comparison into one that sets the zero bit in the flags register.
1554bool ARMBaseInstrInfo::
1555OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
1556                     int CmpValue, const MachineRegisterInfo *MRI) const {
1557  if (CmpValue != 0)
1558    return false;
1559
1560  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
1561  if (llvm::next(DI) != MRI->def_end())
1562    // Only support one definition.
1563    return false;
1564
1565  MachineInstr *MI = &*DI;
1566
1567  // Masked compares sometimes use the same register as the corresponding 'and'.
1568  if (CmpMask != ~0) {
1569    if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
1570      MI = 0;
1571      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
1572           UE = MRI->use_end(); UI != UE; ++UI) {
1573        if (UI->getParent() != CmpInstr->getParent()) continue;
1574        MachineInstr *PotentialAND = &*UI;
1575        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
1576          continue;
1577        MI = PotentialAND;
1578        break;
1579      }
1580      if (!MI) return false;
1581    }
1582  }
1583
1584  // Conservatively refuse to convert an instruction which isn't in the same BB
1585  // as the comparison.
1586  if (MI->getParent() != CmpInstr->getParent())
1587    return false;
1588
1589  // Check that CPSR isn't set between the comparison instruction and the one we
1590  // want to change.
1591  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
1592    B = MI->getParent()->begin();
1593
1594  // Early exit if CmpInstr is at the beginning of the BB.
1595  if (I == B) return false;
1596
1597  --I;
1598  for (; I != E; --I) {
1599    const MachineInstr &Instr = *I;
1600
1601    for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
1602      const MachineOperand &MO = Instr.getOperand(IO);
1603      if (!MO.isReg()) continue;
1604
1605      // This instruction modifies or uses CPSR after the one we want to
1606      // change. We can't do this transformation.
1607      if (MO.getReg() == ARM::CPSR)
1608        return false;
1609    }
1610
1611    if (I == B)
1612      // The 'and' is below the comparison instruction.
1613      return false;
1614  }
1615
1616  // Set the "zero" bit in CPSR.
1617  switch (MI->getOpcode()) {
1618  default: break;
1619  case ARM::RSBrr:
1620  case ARM::RSBri:
1621  case ARM::RSCrr:
1622  case ARM::RSCri:
1623  case ARM::ADDrr:
1624  case ARM::ADDri:
1625  case ARM::ADCrr:
1626  case ARM::ADCri:
1627  case ARM::SUBrr:
1628  case ARM::SUBri:
1629  case ARM::SBCrr:
1630  case ARM::SBCri:
1631  case ARM::t2RSBri:
1632  case ARM::t2ADDrr:
1633  case ARM::t2ADDri:
1634  case ARM::t2ADCrr:
1635  case ARM::t2ADCri:
1636  case ARM::t2SUBrr:
1637  case ARM::t2SUBri:
1638  case ARM::t2SBCrr:
1639  case ARM::t2SBCri:
1640  case ARM::ANDrr:
1641  case ARM::ANDri:
1642  case ARM::t2ANDrr:
1643  case ARM::t2ANDri:
1644  case ARM::ORRrr:
1645  case ARM::ORRri:
1646  case ARM::t2ORRrr:
1647  case ARM::t2ORRri:
1648  case ARM::EORrr:
1649  case ARM::EORri:
1650  case ARM::t2EORrr:
1651  case ARM::t2EORri: {
1652    // Scan forward for the use of CPSR, if it's a conditional code requires
1653    // checking of V bit, then this is not safe to do. If we can't find the
1654    // CPSR use (i.e. used in another block), then it's not safe to perform
1655    // the optimization.
1656    bool isSafe = false;
1657    I = CmpInstr;
1658    E = MI->getParent()->end();
1659    while (!isSafe && ++I != E) {
1660      const MachineInstr &Instr = *I;
1661      for (unsigned IO = 0, EO = Instr.getNumOperands();
1662           !isSafe && IO != EO; ++IO) {
1663        const MachineOperand &MO = Instr.getOperand(IO);
1664        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
1665          continue;
1666        if (MO.isDef()) {
1667          isSafe = true;
1668          break;
1669        }
1670        // Condition code is after the operand before CPSR.
1671        ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
1672        switch (CC) {
1673        default:
1674          isSafe = true;
1675          break;
1676        case ARMCC::VS:
1677        case ARMCC::VC:
1678        case ARMCC::GE:
1679        case ARMCC::LT:
1680        case ARMCC::GT:
1681        case ARMCC::LE:
1682          return false;
1683        }
1684      }
1685    }
1686
1687    if (!isSafe)
1688      return false;
1689
1690    // Toggle the optional operand to CPSR.
1691    MI->getOperand(5).setReg(ARM::CPSR);
1692    MI->getOperand(5).setIsDef(true);
1693    CmpInstr->eraseFromParent();
1694    return true;
1695  }
1696  }
1697
1698  return false;
1699}
1700
1701bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
1702                                     MachineInstr *DefMI, unsigned Reg,
1703                                     MachineRegisterInfo *MRI) const {
1704  // Fold large immediates into add, sub, or, xor.
1705  unsigned DefOpc = DefMI->getOpcode();
1706  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
1707    return false;
1708  if (!DefMI->getOperand(1).isImm())
1709    // Could be t2MOVi32imm <ga:xx>
1710    return false;
1711
1712  if (!MRI->hasOneNonDBGUse(Reg))
1713    return false;
1714
1715  unsigned UseOpc = UseMI->getOpcode();
1716  unsigned NewUseOpc = 0;
1717  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
1718  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
1719  bool Commute = false;
1720  switch (UseOpc) {
1721  default: return false;
1722  case ARM::SUBrr:
1723  case ARM::ADDrr:
1724  case ARM::ORRrr:
1725  case ARM::EORrr:
1726  case ARM::t2SUBrr:
1727  case ARM::t2ADDrr:
1728  case ARM::t2ORRrr:
1729  case ARM::t2EORrr: {
1730    Commute = UseMI->getOperand(2).getReg() != Reg;
1731    switch (UseOpc) {
1732    default: break;
1733    case ARM::SUBrr: {
1734      if (Commute)
1735        return false;
1736      ImmVal = -ImmVal;
1737      NewUseOpc = ARM::SUBri;
1738      // Fallthrough
1739    }
1740    case ARM::ADDrr:
1741    case ARM::ORRrr:
1742    case ARM::EORrr: {
1743      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
1744        return false;
1745      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
1746      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
1747      switch (UseOpc) {
1748      default: break;
1749      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
1750      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
1751      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
1752      }
1753      break;
1754    }
1755    case ARM::t2SUBrr: {
1756      if (Commute)
1757        return false;
1758      ImmVal = -ImmVal;
1759      NewUseOpc = ARM::t2SUBri;
1760      // Fallthrough
1761    }
1762    case ARM::t2ADDrr:
1763    case ARM::t2ORRrr:
1764    case ARM::t2EORrr: {
1765      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
1766        return false;
1767      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
1768      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
1769      switch (UseOpc) {
1770      default: break;
1771      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
1772      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
1773      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
1774      }
1775      break;
1776    }
1777    }
1778  }
1779  }
1780
1781  unsigned OpIdx = Commute ? 2 : 1;
1782  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
1783  bool isKill = UseMI->getOperand(OpIdx).isKill();
1784  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
1785  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
1786                                      *UseMI, UseMI->getDebugLoc(),
1787                                      get(NewUseOpc), NewReg)
1788                              .addReg(Reg1, getKillRegState(isKill))
1789                              .addImm(SOImmValV1)));
1790  UseMI->setDesc(get(NewUseOpc));
1791  UseMI->getOperand(1).setReg(NewReg);
1792  UseMI->getOperand(1).setIsKill();
1793  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
1794  DefMI->eraseFromParent();
1795  return true;
1796}
1797
1798unsigned
1799ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
1800                                 const MachineInstr *MI) const {
1801  if (!ItinData || ItinData->isEmpty())
1802    return 1;
1803
1804  const MCInstrDesc &Desc = MI->getDesc();
1805  unsigned Class = Desc.getSchedClass();
1806  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
1807  if (UOps)
1808    return UOps;
1809
1810  unsigned Opc = MI->getOpcode();
1811  switch (Opc) {
1812  default:
1813    llvm_unreachable("Unexpected multi-uops instruction!");
1814    break;
1815  case ARM::VLDMQIA:
1816  case ARM::VSTMQIA:
1817    return 2;
1818
1819  // The number of uOps for load / store multiple are determined by the number
1820  // registers.
1821  //
1822  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
1823  // same cycle. The scheduling for the first load / store must be done
1824  // separately by assuming the the address is not 64-bit aligned.
1825  //
1826  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
1827  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
1828  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
1829  case ARM::VLDMDIA:
1830  case ARM::VLDMDIA_UPD:
1831  case ARM::VLDMDDB_UPD:
1832  case ARM::VLDMSIA:
1833  case ARM::VLDMSIA_UPD:
1834  case ARM::VLDMSDB_UPD:
1835  case ARM::VSTMDIA:
1836  case ARM::VSTMDIA_UPD:
1837  case ARM::VSTMDDB_UPD:
1838  case ARM::VSTMSIA:
1839  case ARM::VSTMSIA_UPD:
1840  case ARM::VSTMSDB_UPD: {
1841    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
1842    return (NumRegs / 2) + (NumRegs % 2) + 1;
1843  }
1844
1845  case ARM::LDMIA_RET:
1846  case ARM::LDMIA:
1847  case ARM::LDMDA:
1848  case ARM::LDMDB:
1849  case ARM::LDMIB:
1850  case ARM::LDMIA_UPD:
1851  case ARM::LDMDA_UPD:
1852  case ARM::LDMDB_UPD:
1853  case ARM::LDMIB_UPD:
1854  case ARM::STMIA:
1855  case ARM::STMDA:
1856  case ARM::STMDB:
1857  case ARM::STMIB:
1858  case ARM::STMIA_UPD:
1859  case ARM::STMDA_UPD:
1860  case ARM::STMDB_UPD:
1861  case ARM::STMIB_UPD:
1862  case ARM::tLDMIA:
1863  case ARM::tLDMIA_UPD:
1864  case ARM::tSTMIA:
1865  case ARM::tSTMIA_UPD:
1866  case ARM::tPOP_RET:
1867  case ARM::tPOP:
1868  case ARM::tPUSH:
1869  case ARM::t2LDMIA_RET:
1870  case ARM::t2LDMIA:
1871  case ARM::t2LDMDB:
1872  case ARM::t2LDMIA_UPD:
1873  case ARM::t2LDMDB_UPD:
1874  case ARM::t2STMIA:
1875  case ARM::t2STMDB:
1876  case ARM::t2STMIA_UPD:
1877  case ARM::t2STMDB_UPD: {
1878    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
1879    if (Subtarget.isCortexA8()) {
1880      if (NumRegs < 4)
1881        return 2;
1882      // 4 registers would be issued: 2, 2.
1883      // 5 registers would be issued: 2, 2, 1.
1884      UOps = (NumRegs / 2);
1885      if (NumRegs % 2)
1886        ++UOps;
1887      return UOps;
1888    } else if (Subtarget.isCortexA9()) {
1889      UOps = (NumRegs / 2);
1890      // If there are odd number of registers or if it's not 64-bit aligned,
1891      // then it takes an extra AGU (Address Generation Unit) cycle.
1892      if ((NumRegs % 2) ||
1893          !MI->hasOneMemOperand() ||
1894          (*MI->memoperands_begin())->getAlignment() < 8)
1895        ++UOps;
1896      return UOps;
1897    } else {
1898      // Assume the worst.
1899      return NumRegs;
1900    }
1901  }
1902  }
1903}
1904
1905int
1906ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
1907                                  const MCInstrDesc &DefMCID,
1908                                  unsigned DefClass,
1909                                  unsigned DefIdx, unsigned DefAlign) const {
1910  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
1911  if (RegNo <= 0)
1912    // Def is the address writeback.
1913    return ItinData->getOperandCycle(DefClass, DefIdx);
1914
1915  int DefCycle;
1916  if (Subtarget.isCortexA8()) {
1917    // (regno / 2) + (regno % 2) + 1
1918    DefCycle = RegNo / 2 + 1;
1919    if (RegNo % 2)
1920      ++DefCycle;
1921  } else if (Subtarget.isCortexA9()) {
1922    DefCycle = RegNo;
1923    bool isSLoad = false;
1924
1925    switch (DefMCID.getOpcode()) {
1926    default: break;
1927    case ARM::VLDMSIA:
1928    case ARM::VLDMSIA_UPD:
1929    case ARM::VLDMSDB_UPD:
1930      isSLoad = true;
1931      break;
1932    }
1933
1934    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
1935    // then it takes an extra cycle.
1936    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
1937      ++DefCycle;
1938  } else {
1939    // Assume the worst.
1940    DefCycle = RegNo + 2;
1941  }
1942
1943  return DefCycle;
1944}
1945
1946int
1947ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
1948                                 const MCInstrDesc &DefMCID,
1949                                 unsigned DefClass,
1950                                 unsigned DefIdx, unsigned DefAlign) const {
1951  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
1952  if (RegNo <= 0)
1953    // Def is the address writeback.
1954    return ItinData->getOperandCycle(DefClass, DefIdx);
1955
1956  int DefCycle;
1957  if (Subtarget.isCortexA8()) {
1958    // 4 registers would be issued: 1, 2, 1.
1959    // 5 registers would be issued: 1, 2, 2.
1960    DefCycle = RegNo / 2;
1961    if (DefCycle < 1)
1962      DefCycle = 1;
1963    // Result latency is issue cycle + 2: E2.
1964    DefCycle += 2;
1965  } else if (Subtarget.isCortexA9()) {
1966    DefCycle = (RegNo / 2);
1967    // If there are odd number of registers or if it's not 64-bit aligned,
1968    // then it takes an extra AGU (Address Generation Unit) cycle.
1969    if ((RegNo % 2) || DefAlign < 8)
1970      ++DefCycle;
1971    // Result latency is AGU cycles + 2.
1972    DefCycle += 2;
1973  } else {
1974    // Assume the worst.
1975    DefCycle = RegNo + 2;
1976  }
1977
1978  return DefCycle;
1979}
1980
1981int
1982ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
1983                                  const MCInstrDesc &UseMCID,
1984                                  unsigned UseClass,
1985                                  unsigned UseIdx, unsigned UseAlign) const {
1986  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
1987  if (RegNo <= 0)
1988    return ItinData->getOperandCycle(UseClass, UseIdx);
1989
1990  int UseCycle;
1991  if (Subtarget.isCortexA8()) {
1992    // (regno / 2) + (regno % 2) + 1
1993    UseCycle = RegNo / 2 + 1;
1994    if (RegNo % 2)
1995      ++UseCycle;
1996  } else if (Subtarget.isCortexA9()) {
1997    UseCycle = RegNo;
1998    bool isSStore = false;
1999
2000    switch (UseMCID.getOpcode()) {
2001    default: break;
2002    case ARM::VSTMSIA:
2003    case ARM::VSTMSIA_UPD:
2004    case ARM::VSTMSDB_UPD:
2005      isSStore = true;
2006      break;
2007    }
2008
2009    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
2010    // then it takes an extra cycle.
2011    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
2012      ++UseCycle;
2013  } else {
2014    // Assume the worst.
2015    UseCycle = RegNo + 2;
2016  }
2017
2018  return UseCycle;
2019}
2020
2021int
2022ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
2023                                 const MCInstrDesc &UseMCID,
2024                                 unsigned UseClass,
2025                                 unsigned UseIdx, unsigned UseAlign) const {
2026  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
2027  if (RegNo <= 0)
2028    return ItinData->getOperandCycle(UseClass, UseIdx);
2029
2030  int UseCycle;
2031  if (Subtarget.isCortexA8()) {
2032    UseCycle = RegNo / 2;
2033    if (UseCycle < 2)
2034      UseCycle = 2;
2035    // Read in E3.
2036    UseCycle += 2;
2037  } else if (Subtarget.isCortexA9()) {
2038    UseCycle = (RegNo / 2);
2039    // If there are odd number of registers or if it's not 64-bit aligned,
2040    // then it takes an extra AGU (Address Generation Unit) cycle.
2041    if ((RegNo % 2) || UseAlign < 8)
2042      ++UseCycle;
2043  } else {
2044    // Assume the worst.
2045    UseCycle = 1;
2046  }
2047  return UseCycle;
2048}
2049
2050int
2051ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
2052                                    const MCInstrDesc &DefMCID,
2053                                    unsigned DefIdx, unsigned DefAlign,
2054                                    const MCInstrDesc &UseMCID,
2055                                    unsigned UseIdx, unsigned UseAlign) const {
2056  unsigned DefClass = DefMCID.getSchedClass();
2057  unsigned UseClass = UseMCID.getSchedClass();
2058
2059  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
2060    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
2061
2062  // This may be a def / use of a variable_ops instruction, the operand
2063  // latency might be determinable dynamically. Let the target try to
2064  // figure it out.
2065  int DefCycle = -1;
2066  bool LdmBypass = false;
2067  switch (DefMCID.getOpcode()) {
2068  default:
2069    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
2070    break;
2071
2072  case ARM::VLDMDIA:
2073  case ARM::VLDMDIA_UPD:
2074  case ARM::VLDMDDB_UPD:
2075  case ARM::VLDMSIA:
2076  case ARM::VLDMSIA_UPD:
2077  case ARM::VLDMSDB_UPD:
2078    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
2079    break;
2080
2081  case ARM::LDMIA_RET:
2082  case ARM::LDMIA:
2083  case ARM::LDMDA:
2084  case ARM::LDMDB:
2085  case ARM::LDMIB:
2086  case ARM::LDMIA_UPD:
2087  case ARM::LDMDA_UPD:
2088  case ARM::LDMDB_UPD:
2089  case ARM::LDMIB_UPD:
2090  case ARM::tLDMIA:
2091  case ARM::tLDMIA_UPD:
2092  case ARM::tPUSH:
2093  case ARM::t2LDMIA_RET:
2094  case ARM::t2LDMIA:
2095  case ARM::t2LDMDB:
2096  case ARM::t2LDMIA_UPD:
2097  case ARM::t2LDMDB_UPD:
2098    LdmBypass = 1;
2099    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
2100    break;
2101  }
2102
2103  if (DefCycle == -1)
2104    // We can't seem to determine the result latency of the def, assume it's 2.
2105    DefCycle = 2;
2106
2107  int UseCycle = -1;
2108  switch (UseMCID.getOpcode()) {
2109  default:
2110    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
2111    break;
2112
2113  case ARM::VSTMDIA:
2114  case ARM::VSTMDIA_UPD:
2115  case ARM::VSTMDDB_UPD:
2116  case ARM::VSTMSIA:
2117  case ARM::VSTMSIA_UPD:
2118  case ARM::VSTMSDB_UPD:
2119    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
2120    break;
2121
2122  case ARM::STMIA:
2123  case ARM::STMDA:
2124  case ARM::STMDB:
2125  case ARM::STMIB:
2126  case ARM::STMIA_UPD:
2127  case ARM::STMDA_UPD:
2128  case ARM::STMDB_UPD:
2129  case ARM::STMIB_UPD:
2130  case ARM::tSTMIA:
2131  case ARM::tSTMIA_UPD:
2132  case ARM::tPOP_RET:
2133  case ARM::tPOP:
2134  case ARM::t2STMIA:
2135  case ARM::t2STMDB:
2136  case ARM::t2STMIA_UPD:
2137  case ARM::t2STMDB_UPD:
2138    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
2139    break;
2140  }
2141
2142  if (UseCycle == -1)
2143    // Assume it's read in the first stage.
2144    UseCycle = 1;
2145
2146  UseCycle = DefCycle - UseCycle + 1;
2147  if (UseCycle > 0) {
2148    if (LdmBypass) {
2149      // It's a variable_ops instruction so we can't use DefIdx here. Just use
2150      // first def operand.
2151      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
2152                                          UseClass, UseIdx))
2153        --UseCycle;
2154    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
2155                                               UseClass, UseIdx)) {
2156      --UseCycle;
2157    }
2158  }
2159
2160  return UseCycle;
2161}
2162
2163int
2164ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
2165                             const MachineInstr *DefMI, unsigned DefIdx,
2166                             const MachineInstr *UseMI, unsigned UseIdx) const {
2167  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
2168      DefMI->isRegSequence() || DefMI->isImplicitDef())
2169    return 1;
2170
2171  const MCInstrDesc &DefMCID = DefMI->getDesc();
2172  if (!ItinData || ItinData->isEmpty())
2173    return DefMCID.mayLoad() ? 3 : 1;
2174
2175  const MCInstrDesc &UseMCID = UseMI->getDesc();
2176  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
2177  if (DefMO.getReg() == ARM::CPSR) {
2178    if (DefMI->getOpcode() == ARM::FMSTAT) {
2179      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
2180      return Subtarget.isCortexA9() ? 1 : 20;
2181    }
2182
2183    // CPSR set and branch can be paired in the same cycle.
2184    if (UseMCID.isBranch())
2185      return 0;
2186  }
2187
2188  unsigned DefAlign = DefMI->hasOneMemOperand()
2189    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
2190  unsigned UseAlign = UseMI->hasOneMemOperand()
2191    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
2192  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
2193                                  UseMCID, UseIdx, UseAlign);
2194
2195  if (Latency > 1 &&
2196      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
2197    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
2198    // variants are one cycle cheaper.
2199    switch (DefMCID.getOpcode()) {
2200    default: break;
2201    case ARM::LDRrs:
2202    case ARM::LDRBrs: {
2203      unsigned ShOpVal = DefMI->getOperand(3).getImm();
2204      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2205      if (ShImm == 0 ||
2206          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
2207        --Latency;
2208      break;
2209    }
2210    case ARM::t2LDRs:
2211    case ARM::t2LDRBs:
2212    case ARM::t2LDRHs:
2213    case ARM::t2LDRSHs: {
2214      // Thumb2 mode: lsl only.
2215      unsigned ShAmt = DefMI->getOperand(3).getImm();
2216      if (ShAmt == 0 || ShAmt == 2)
2217        --Latency;
2218      break;
2219    }
2220    }
2221  }
2222
2223  if (DefAlign < 8 && Subtarget.isCortexA9())
2224    switch (DefMCID.getOpcode()) {
2225    default: break;
2226    case ARM::VLD1q8:
2227    case ARM::VLD1q16:
2228    case ARM::VLD1q32:
2229    case ARM::VLD1q64:
2230    case ARM::VLD1q8_UPD:
2231    case ARM::VLD1q16_UPD:
2232    case ARM::VLD1q32_UPD:
2233    case ARM::VLD1q64_UPD:
2234    case ARM::VLD2d8:
2235    case ARM::VLD2d16:
2236    case ARM::VLD2d32:
2237    case ARM::VLD2q8:
2238    case ARM::VLD2q16:
2239    case ARM::VLD2q32:
2240    case ARM::VLD2d8_UPD:
2241    case ARM::VLD2d16_UPD:
2242    case ARM::VLD2d32_UPD:
2243    case ARM::VLD2q8_UPD:
2244    case ARM::VLD2q16_UPD:
2245    case ARM::VLD2q32_UPD:
2246    case ARM::VLD3d8:
2247    case ARM::VLD3d16:
2248    case ARM::VLD3d32:
2249    case ARM::VLD1d64T:
2250    case ARM::VLD3d8_UPD:
2251    case ARM::VLD3d16_UPD:
2252    case ARM::VLD3d32_UPD:
2253    case ARM::VLD1d64T_UPD:
2254    case ARM::VLD3q8_UPD:
2255    case ARM::VLD3q16_UPD:
2256    case ARM::VLD3q32_UPD:
2257    case ARM::VLD4d8:
2258    case ARM::VLD4d16:
2259    case ARM::VLD4d32:
2260    case ARM::VLD1d64Q:
2261    case ARM::VLD4d8_UPD:
2262    case ARM::VLD4d16_UPD:
2263    case ARM::VLD4d32_UPD:
2264    case ARM::VLD1d64Q_UPD:
2265    case ARM::VLD4q8_UPD:
2266    case ARM::VLD4q16_UPD:
2267    case ARM::VLD4q32_UPD:
2268    case ARM::VLD1DUPq8:
2269    case ARM::VLD1DUPq16:
2270    case ARM::VLD1DUPq32:
2271    case ARM::VLD1DUPq8_UPD:
2272    case ARM::VLD1DUPq16_UPD:
2273    case ARM::VLD1DUPq32_UPD:
2274    case ARM::VLD2DUPd8:
2275    case ARM::VLD2DUPd16:
2276    case ARM::VLD2DUPd32:
2277    case ARM::VLD2DUPd8_UPD:
2278    case ARM::VLD2DUPd16_UPD:
2279    case ARM::VLD2DUPd32_UPD:
2280    case ARM::VLD4DUPd8:
2281    case ARM::VLD4DUPd16:
2282    case ARM::VLD4DUPd32:
2283    case ARM::VLD4DUPd8_UPD:
2284    case ARM::VLD4DUPd16_UPD:
2285    case ARM::VLD4DUPd32_UPD:
2286    case ARM::VLD1LNd8:
2287    case ARM::VLD1LNd16:
2288    case ARM::VLD1LNd32:
2289    case ARM::VLD1LNd8_UPD:
2290    case ARM::VLD1LNd16_UPD:
2291    case ARM::VLD1LNd32_UPD:
2292    case ARM::VLD2LNd8:
2293    case ARM::VLD2LNd16:
2294    case ARM::VLD2LNd32:
2295    case ARM::VLD2LNq16:
2296    case ARM::VLD2LNq32:
2297    case ARM::VLD2LNd8_UPD:
2298    case ARM::VLD2LNd16_UPD:
2299    case ARM::VLD2LNd32_UPD:
2300    case ARM::VLD2LNq16_UPD:
2301    case ARM::VLD2LNq32_UPD:
2302    case ARM::VLD4LNd8:
2303    case ARM::VLD4LNd16:
2304    case ARM::VLD4LNd32:
2305    case ARM::VLD4LNq16:
2306    case ARM::VLD4LNq32:
2307    case ARM::VLD4LNd8_UPD:
2308    case ARM::VLD4LNd16_UPD:
2309    case ARM::VLD4LNd32_UPD:
2310    case ARM::VLD4LNq16_UPD:
2311    case ARM::VLD4LNq32_UPD:
2312      // If the address is not 64-bit aligned, the latencies of these
2313      // instructions increases by one.
2314      ++Latency;
2315      break;
2316    }
2317
2318  return Latency;
2319}
2320
2321int
2322ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
2323                                    SDNode *DefNode, unsigned DefIdx,
2324                                    SDNode *UseNode, unsigned UseIdx) const {
2325  if (!DefNode->isMachineOpcode())
2326    return 1;
2327
2328  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
2329
2330  if (isZeroCost(DefMCID.Opcode))
2331    return 0;
2332
2333  if (!ItinData || ItinData->isEmpty())
2334    return DefMCID.mayLoad() ? 3 : 1;
2335
2336  if (!UseNode->isMachineOpcode()) {
2337    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
2338    if (Subtarget.isCortexA9())
2339      return Latency <= 2 ? 1 : Latency - 1;
2340    else
2341      return Latency <= 3 ? 1 : Latency - 2;
2342  }
2343
2344  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
2345  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
2346  unsigned DefAlign = !DefMN->memoperands_empty()
2347    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
2348  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
2349  unsigned UseAlign = !UseMN->memoperands_empty()
2350    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
2351  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
2352                                  UseMCID, UseIdx, UseAlign);
2353
2354  if (Latency > 1 &&
2355      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
2356    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
2357    // variants are one cycle cheaper.
2358    switch (DefMCID.getOpcode()) {
2359    default: break;
2360    case ARM::LDRrs:
2361    case ARM::LDRBrs: {
2362      unsigned ShOpVal =
2363        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
2364      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2365      if (ShImm == 0 ||
2366          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
2367        --Latency;
2368      break;
2369    }
2370    case ARM::t2LDRs:
2371    case ARM::t2LDRBs:
2372    case ARM::t2LDRHs:
2373    case ARM::t2LDRSHs: {
2374      // Thumb2 mode: lsl only.
2375      unsigned ShAmt =
2376        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
2377      if (ShAmt == 0 || ShAmt == 2)
2378        --Latency;
2379      break;
2380    }
2381    }
2382  }
2383
2384  if (DefAlign < 8 && Subtarget.isCortexA9())
2385    switch (DefMCID.getOpcode()) {
2386    default: break;
2387    case ARM::VLD1q8Pseudo:
2388    case ARM::VLD1q16Pseudo:
2389    case ARM::VLD1q32Pseudo:
2390    case ARM::VLD1q64Pseudo:
2391    case ARM::VLD1q8Pseudo_UPD:
2392    case ARM::VLD1q16Pseudo_UPD:
2393    case ARM::VLD1q32Pseudo_UPD:
2394    case ARM::VLD1q64Pseudo_UPD:
2395    case ARM::VLD2d8Pseudo:
2396    case ARM::VLD2d16Pseudo:
2397    case ARM::VLD2d32Pseudo:
2398    case ARM::VLD2q8Pseudo:
2399    case ARM::VLD2q16Pseudo:
2400    case ARM::VLD2q32Pseudo:
2401    case ARM::VLD2d8Pseudo_UPD:
2402    case ARM::VLD2d16Pseudo_UPD:
2403    case ARM::VLD2d32Pseudo_UPD:
2404    case ARM::VLD2q8Pseudo_UPD:
2405    case ARM::VLD2q16Pseudo_UPD:
2406    case ARM::VLD2q32Pseudo_UPD:
2407    case ARM::VLD3d8Pseudo:
2408    case ARM::VLD3d16Pseudo:
2409    case ARM::VLD3d32Pseudo:
2410    case ARM::VLD1d64TPseudo:
2411    case ARM::VLD3d8Pseudo_UPD:
2412    case ARM::VLD3d16Pseudo_UPD:
2413    case ARM::VLD3d32Pseudo_UPD:
2414    case ARM::VLD1d64TPseudo_UPD:
2415    case ARM::VLD3q8Pseudo_UPD:
2416    case ARM::VLD3q16Pseudo_UPD:
2417    case ARM::VLD3q32Pseudo_UPD:
2418    case ARM::VLD3q8oddPseudo:
2419    case ARM::VLD3q16oddPseudo:
2420    case ARM::VLD3q32oddPseudo:
2421    case ARM::VLD3q8oddPseudo_UPD:
2422    case ARM::VLD3q16oddPseudo_UPD:
2423    case ARM::VLD3q32oddPseudo_UPD:
2424    case ARM::VLD4d8Pseudo:
2425    case ARM::VLD4d16Pseudo:
2426    case ARM::VLD4d32Pseudo:
2427    case ARM::VLD1d64QPseudo:
2428    case ARM::VLD4d8Pseudo_UPD:
2429    case ARM::VLD4d16Pseudo_UPD:
2430    case ARM::VLD4d32Pseudo_UPD:
2431    case ARM::VLD1d64QPseudo_UPD:
2432    case ARM::VLD4q8Pseudo_UPD:
2433    case ARM::VLD4q16Pseudo_UPD:
2434    case ARM::VLD4q32Pseudo_UPD:
2435    case ARM::VLD4q8oddPseudo:
2436    case ARM::VLD4q16oddPseudo:
2437    case ARM::VLD4q32oddPseudo:
2438    case ARM::VLD4q8oddPseudo_UPD:
2439    case ARM::VLD4q16oddPseudo_UPD:
2440    case ARM::VLD4q32oddPseudo_UPD:
2441    case ARM::VLD1DUPq8Pseudo:
2442    case ARM::VLD1DUPq16Pseudo:
2443    case ARM::VLD1DUPq32Pseudo:
2444    case ARM::VLD1DUPq8Pseudo_UPD:
2445    case ARM::VLD1DUPq16Pseudo_UPD:
2446    case ARM::VLD1DUPq32Pseudo_UPD:
2447    case ARM::VLD2DUPd8Pseudo:
2448    case ARM::VLD2DUPd16Pseudo:
2449    case ARM::VLD2DUPd32Pseudo:
2450    case ARM::VLD2DUPd8Pseudo_UPD:
2451    case ARM::VLD2DUPd16Pseudo_UPD:
2452    case ARM::VLD2DUPd32Pseudo_UPD:
2453    case ARM::VLD4DUPd8Pseudo:
2454    case ARM::VLD4DUPd16Pseudo:
2455    case ARM::VLD4DUPd32Pseudo:
2456    case ARM::VLD4DUPd8Pseudo_UPD:
2457    case ARM::VLD4DUPd16Pseudo_UPD:
2458    case ARM::VLD4DUPd32Pseudo_UPD:
2459    case ARM::VLD1LNq8Pseudo:
2460    case ARM::VLD1LNq16Pseudo:
2461    case ARM::VLD1LNq32Pseudo:
2462    case ARM::VLD1LNq8Pseudo_UPD:
2463    case ARM::VLD1LNq16Pseudo_UPD:
2464    case ARM::VLD1LNq32Pseudo_UPD:
2465    case ARM::VLD2LNd8Pseudo:
2466    case ARM::VLD2LNd16Pseudo:
2467    case ARM::VLD2LNd32Pseudo:
2468    case ARM::VLD2LNq16Pseudo:
2469    case ARM::VLD2LNq32Pseudo:
2470    case ARM::VLD2LNd8Pseudo_UPD:
2471    case ARM::VLD2LNd16Pseudo_UPD:
2472    case ARM::VLD2LNd32Pseudo_UPD:
2473    case ARM::VLD2LNq16Pseudo_UPD:
2474    case ARM::VLD2LNq32Pseudo_UPD:
2475    case ARM::VLD4LNd8Pseudo:
2476    case ARM::VLD4LNd16Pseudo:
2477    case ARM::VLD4LNd32Pseudo:
2478    case ARM::VLD4LNq16Pseudo:
2479    case ARM::VLD4LNq32Pseudo:
2480    case ARM::VLD4LNd8Pseudo_UPD:
2481    case ARM::VLD4LNd16Pseudo_UPD:
2482    case ARM::VLD4LNd32Pseudo_UPD:
2483    case ARM::VLD4LNq16Pseudo_UPD:
2484    case ARM::VLD4LNq32Pseudo_UPD:
2485      // If the address is not 64-bit aligned, the latencies of these
2486      // instructions increases by one.
2487      ++Latency;
2488      break;
2489    }
2490
2491  return Latency;
2492}
2493
2494int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
2495                                      const MachineInstr *MI,
2496                                      unsigned *PredCost) const {
2497  if (MI->isCopyLike() || MI->isInsertSubreg() ||
2498      MI->isRegSequence() || MI->isImplicitDef())
2499    return 1;
2500
2501  if (!ItinData || ItinData->isEmpty())
2502    return 1;
2503
2504  const MCInstrDesc &MCID = MI->getDesc();
2505  unsigned Class = MCID.getSchedClass();
2506  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
2507  if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
2508    // When predicated, CPSR is an additional source operand for CPSR updating
2509    // instructions, this apparently increases their latencies.
2510    *PredCost = 1;
2511  if (UOps)
2512    return ItinData->getStageLatency(Class);
2513  return getNumMicroOps(ItinData, MI);
2514}
2515
2516int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
2517                                      SDNode *Node) const {
2518  if (!Node->isMachineOpcode())
2519    return 1;
2520
2521  if (!ItinData || ItinData->isEmpty())
2522    return 1;
2523
2524  unsigned Opcode = Node->getMachineOpcode();
2525  switch (Opcode) {
2526  default:
2527    return ItinData->getStageLatency(get(Opcode).getSchedClass());
2528  case ARM::VLDMQIA:
2529  case ARM::VSTMQIA:
2530    return 2;
2531  }
2532}
2533
2534bool ARMBaseInstrInfo::
2535hasHighOperandLatency(const InstrItineraryData *ItinData,
2536                      const MachineRegisterInfo *MRI,
2537                      const MachineInstr *DefMI, unsigned DefIdx,
2538                      const MachineInstr *UseMI, unsigned UseIdx) const {
2539  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
2540  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
2541  if (Subtarget.isCortexA8() &&
2542      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
2543    // CortexA8 VFP instructions are not pipelined.
2544    return true;
2545
2546  // Hoist VFP / NEON instructions with 4 or higher latency.
2547  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
2548  if (Latency <= 3)
2549    return false;
2550  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
2551         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
2552}
2553
2554bool ARMBaseInstrInfo::
2555hasLowDefLatency(const InstrItineraryData *ItinData,
2556                 const MachineInstr *DefMI, unsigned DefIdx) const {
2557  if (!ItinData || ItinData->isEmpty())
2558    return false;
2559
2560  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
2561  if (DDomain == ARMII::DomainGeneral) {
2562    unsigned DefClass = DefMI->getDesc().getSchedClass();
2563    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
2564    return (DefCycle != -1 && DefCycle <= 2);
2565  }
2566  return false;
2567}
2568
2569bool
2570ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
2571                                     unsigned &AddSubOpc,
2572                                     bool &NegAcc, bool &HasLane) const {
2573  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
2574  if (I == MLxEntryMap.end())
2575    return false;
2576
2577  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
2578  MulOpc = Entry.MulOpc;
2579  AddSubOpc = Entry.AddSubOpc;
2580  NegAcc = Entry.NegAcc;
2581  HasLane = Entry.HasLane;
2582  return true;
2583}
2584