Thumb2SizeReduction.cpp revision dd364419ee64cd5bb234af006ce0cb285e4a84ca
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE "t2-reduce-size"
11#include "ARM.h"
12#include "ARMBaseRegisterInfo.h"
13#include "ARMBaseInstrInfo.h"
14#include "ARMSubtarget.h"
15#include "Thumb2InstrInfo.h"
16#include "MCTargetDesc/ARMAddressingModes.h"
17#include "llvm/CodeGen/MachineInstr.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineFunctionPass.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/Statistic.h"
25using namespace llvm;
26
27STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
28STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
29STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
30
31static cl::opt<int> ReduceLimit("t2-reduce-limit",
32                                cl::init(-1), cl::Hidden);
33static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
34                                     cl::init(-1), cl::Hidden);
35static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
36                                     cl::init(-1), cl::Hidden);
37
38namespace {
39  /// ReduceTable - A static table with information on mapping from wide
40  /// opcodes to narrow
41  struct ReduceEntry {
42    uint16_t WideOpc;      // Wide opcode
43    uint16_t NarrowOpc1;   // Narrow opcode to transform to
44    uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
45    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
46    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
47    unsigned LowRegs1 : 1; // Only possible if low-registers are used
48    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
49    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
50                           // 1 - No cc field.
51                           // 2 - Always set CPSR.
52    unsigned PredCC2  : 2;
53    unsigned PartFlag : 1; // 16-bit instruction does partial flag update
54    unsigned Special  : 1; // Needs to be dealt with specially
55  };
56
57  static const ReduceEntry ReduceTable[] = {
58    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
59    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
60    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
61    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
62    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
63    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
64    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
65    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
66    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 1,0 },
67    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 1,0 },
68    //FIXME: Disable CMN, as CCodes are backwards from compare expectations
69    //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,    1,   0,  2,0, 0,0 },
70    { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,    1,   0,  2,0, 0,0 },
71    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0,0 },
72    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0,1 },
73    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 1,0 },
74    // FIXME: adr.n immediate offset must be multiple of 4.
75    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,    1,   0,  1,0, 0,0 },
76    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 1,0 },
77    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 1,0 },
78    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
79    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 1,0 },
80    // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
81    // likely to cause issue in the loop. As a size / performance workaround,
82    // they are not marked as such.
83    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
84    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
85    // FIXME: Do we need the 16-bit 'S' variant?
86    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
87    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
88    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
89    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
90    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0,0 },
91    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0,0 },
92    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0,0 },
93    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 1,0 },
94    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0,1 },
95    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 0,1 },
96    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0,0 },
97    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0,0 },
98    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0,0 },
99    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0,0 },
100    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0,0 },
101    { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0,1 },
102    { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0,1 },
103    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0,0 },
104    { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0,1 },
105    { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0,1 },
106
107    // FIXME: Clean this up after splitting each Thumb load / store opcode
108    // into multiple ones.
109    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
110    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
111    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
112    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
113    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
114    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
115    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 0,1 },
116    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 0,1 },
117
118    // At this point it is safe to translate acquire loads to normal loads.
119    // There is no risk of reordering loads.
120    { ARM::ATOMIC_t2LDRi12,
121                    ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
122    { ARM::ATOMIC_t2LDRs,
123                    ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
124    { ARM::ATOMIC_t2LDRBi12,
125                    ARM::tLDRBi,  0,             5,   0,    1,   0,  0,0, 0,1 },
126    { ARM::ATOMIC_t2LDRBs,
127                    ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
128    { ARM::ATOMIC_t2LDRHi12,
129                    ARM::tLDRHi,  0,             5,   0,    1,   0,  0,0, 0,1 },
130    { ARM::ATOMIC_t2LDRHs,
131                    ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
132
133    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 0,1 },
134    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
135    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
136    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
137    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
138    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
139
140    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 0,1 },
141    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 0,1 },
142    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 0,1 },
143    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
144    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 0,1 },
145    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 0,1 },
146  };
147
148  class Thumb2SizeReduce : public MachineFunctionPass {
149  public:
150    static char ID;
151    Thumb2SizeReduce();
152
153    const Thumb2InstrInfo *TII;
154    const ARMSubtarget *STI;
155
156    virtual bool runOnMachineFunction(MachineFunction &MF);
157
158    virtual const char *getPassName() const {
159      return "Thumb2 instruction size reduction pass";
160    }
161
162  private:
163    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
164    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
165
166    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
167                             bool IsSelfLoop);
168
169    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
170                         bool is2Addr, ARMCC::CondCodes Pred,
171                         bool LiveCPSR, bool &HasCC, bool &CCDead);
172
173    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
174                         const ReduceEntry &Entry);
175
176    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
177                       const ReduceEntry &Entry, bool LiveCPSR,
178                       MachineInstr *CPSRDef, bool IsSelfLoop);
179
180    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
181    /// instruction.
182    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
183                       const ReduceEntry &Entry,
184                       bool LiveCPSR, MachineInstr *CPSRDef,
185                       bool IsSelfLoop);
186
187    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
188    /// non-two-address instruction.
189    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
190                        const ReduceEntry &Entry,
191                        bool LiveCPSR, MachineInstr *CPSRDef,
192                        bool IsSelfLoop);
193
194    /// ReduceMBB - Reduce width of instructions in the specified basic block.
195    bool ReduceMBB(MachineBasicBlock &MBB);
196  };
197  char Thumb2SizeReduce::ID = 0;
198}
199
200Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
201  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
202    unsigned FromOpc = ReduceTable[i].WideOpc;
203    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
204      assert(false && "Duplicated entries?");
205  }
206}
207
208static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
209  for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
210    if (*Regs == ARM::CPSR)
211      return true;
212  return false;
213}
214
215/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
216/// the 's' 16-bit instruction partially update CPSR. Abort the
217/// transformation to avoid adding false dependency on last CPSR setting
218/// instruction which hurts the ability for out-of-order execution engine
219/// to do register renaming magic.
220/// This function checks if there is a read-of-write dependency between the
221/// last instruction that defines the CPSR and the current instruction. If there
222/// is, then there is no harm done since the instruction cannot be retired
223/// before the CPSR setting instruction anyway.
224/// Note, we are not doing full dependency analysis here for the sake of compile
225/// time. We're not looking for cases like:
226/// r0 = muls ...
227/// r1 = add.w r0, ...
228/// ...
229///    = mul.w r1
230/// In this case it would have been ok to narrow the mul.w to muls since there
231/// are indirect RAW dependency between the muls and the mul.w
232bool
233Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
234                                      bool FirstInSelfLoop) {
235  // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
236  if (!STI->avoidCPSRPartialUpdate())
237    return false;
238
239  if (!Def)
240    // If this BB loops back to itself, conservatively avoid narrowing the
241    // first instruction that does partial flag update.
242    return FirstInSelfLoop;
243
244  SmallSet<unsigned, 2> Defs;
245  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
246    const MachineOperand &MO = Def->getOperand(i);
247    if (!MO.isReg() || MO.isUndef() || MO.isUse())
248      continue;
249    unsigned Reg = MO.getReg();
250    if (Reg == 0 || Reg == ARM::CPSR)
251      continue;
252    Defs.insert(Reg);
253  }
254
255  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
256    const MachineOperand &MO = Use->getOperand(i);
257    if (!MO.isReg() || MO.isUndef() || MO.isDef())
258      continue;
259    unsigned Reg = MO.getReg();
260    if (Defs.count(Reg))
261      return false;
262  }
263
264  // No read-after-write dependency. The narrowing will add false dependency.
265  return true;
266}
267
268bool
269Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
270                                  bool is2Addr, ARMCC::CondCodes Pred,
271                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
272  if ((is2Addr  && Entry.PredCC2 == 0) ||
273      (!is2Addr && Entry.PredCC1 == 0)) {
274    if (Pred == ARMCC::AL) {
275      // Not predicated, must set CPSR.
276      if (!HasCC) {
277        // Original instruction was not setting CPSR, but CPSR is not
278        // currently live anyway. It's ok to set it. The CPSR def is
279        // dead though.
280        if (!LiveCPSR) {
281          HasCC = true;
282          CCDead = true;
283          return true;
284        }
285        return false;
286      }
287    } else {
288      // Predicated, must not set CPSR.
289      if (HasCC)
290        return false;
291    }
292  } else if ((is2Addr  && Entry.PredCC2 == 2) ||
293             (!is2Addr && Entry.PredCC1 == 2)) {
294    /// Old opcode has an optional def of CPSR.
295    if (HasCC)
296      return true;
297    // If old opcode does not implicitly define CPSR, then it's not ok since
298    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
299    if (!HasImplicitCPSRDef(MI->getDesc()))
300      return false;
301    HasCC = true;
302  } else {
303    // 16-bit instruction does not set CPSR.
304    if (HasCC)
305      return false;
306  }
307
308  return true;
309}
310
311static bool VerifyLowRegs(MachineInstr *MI) {
312  unsigned Opc = MI->getOpcode();
313  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
314                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
315                 Opc == ARM::t2LDMDB_UPD);
316  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
317  bool isSPOk = isPCOk || isLROk;
318  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
319    const MachineOperand &MO = MI->getOperand(i);
320    if (!MO.isReg() || MO.isImplicit())
321      continue;
322    unsigned Reg = MO.getReg();
323    if (Reg == 0 || Reg == ARM::CPSR)
324      continue;
325    if (isPCOk && Reg == ARM::PC)
326      continue;
327    if (isLROk && Reg == ARM::LR)
328      continue;
329    if (Reg == ARM::SP) {
330      if (isSPOk)
331        continue;
332      if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
333        // Special case for these ldr / str with sp as base register.
334        continue;
335    }
336    if (!isARMLowRegister(Reg))
337      return false;
338  }
339  return true;
340}
341
342bool
343Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
344                                  const ReduceEntry &Entry) {
345  if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
346    return false;
347
348  unsigned Scale = 1;
349  bool HasImmOffset = false;
350  bool HasShift = false;
351  bool HasOffReg = true;
352  bool isLdStMul = false;
353  unsigned Opc = Entry.NarrowOpc1;
354  unsigned OpNum = 3; // First 'rest' of operands.
355  uint8_t  ImmLimit = Entry.Imm1Limit;
356
357  switch (Entry.WideOpc) {
358  default:
359    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
360  case ARM::t2LDRi12: case ARM::ATOMIC_t2LDRi12:
361  case ARM::t2STRi12:
362    if (MI->getOperand(1).getReg() == ARM::SP) {
363      Opc = Entry.NarrowOpc2;
364      ImmLimit = Entry.Imm2Limit;
365      HasOffReg = false;
366    }
367
368    Scale = 4;
369    HasImmOffset = true;
370    HasOffReg = false;
371    break;
372  case ARM::t2LDRBi12: case ARM::ATOMIC_t2LDRBi12:
373  case ARM::t2STRBi12:
374    HasImmOffset = true;
375    HasOffReg = false;
376    break;
377  case ARM::t2LDRHi12:
378  case ARM::t2STRHi12:
379    Scale = 2;
380    HasImmOffset = true;
381    HasOffReg = false;
382    break;
383  case ARM::t2LDRs: case ARM::ATOMIC_t2LDRs:
384  case ARM::t2LDRBs: case ARM::ATOMIC_t2LDRBs:
385  case ARM::t2LDRHs: case ARM::ATOMIC_t2LDRHs:
386  case ARM::t2LDRSBs:
387  case ARM::t2LDRSHs:
388  case ARM::t2STRs:
389  case ARM::t2STRBs:
390  case ARM::t2STRHs:
391    HasShift = true;
392    OpNum = 4;
393    break;
394  case ARM::t2LDMIA:
395  case ARM::t2LDMDB: {
396    unsigned BaseReg = MI->getOperand(0).getReg();
397    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
398      return false;
399
400    // For the non-writeback version (this one), the base register must be
401    // one of the registers being loaded.
402    bool isOK = false;
403    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
404      if (MI->getOperand(i).getReg() == BaseReg) {
405        isOK = true;
406        break;
407      }
408    }
409
410    if (!isOK)
411      return false;
412
413    OpNum = 0;
414    isLdStMul = true;
415    break;
416  }
417  case ARM::t2LDMIA_RET: {
418    unsigned BaseReg = MI->getOperand(1).getReg();
419    if (BaseReg != ARM::SP)
420      return false;
421    Opc = Entry.NarrowOpc2; // tPOP_RET
422    OpNum = 2;
423    isLdStMul = true;
424    break;
425  }
426  case ARM::t2LDMIA_UPD:
427  case ARM::t2LDMDB_UPD:
428  case ARM::t2STMIA_UPD:
429  case ARM::t2STMDB_UPD: {
430    OpNum = 0;
431
432    unsigned BaseReg = MI->getOperand(1).getReg();
433    if (BaseReg == ARM::SP &&
434        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
435         Entry.WideOpc == ARM::t2STMDB_UPD)) {
436      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
437      OpNum = 2;
438    } else if (!isARMLowRegister(BaseReg) ||
439               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
440                Entry.WideOpc != ARM::t2STMIA_UPD)) {
441      return false;
442    }
443
444    isLdStMul = true;
445    break;
446  }
447  }
448
449  unsigned OffsetReg = 0;
450  bool OffsetKill = false;
451  if (HasShift) {
452    OffsetReg  = MI->getOperand(2).getReg();
453    OffsetKill = MI->getOperand(2).isKill();
454
455    if (MI->getOperand(3).getImm())
456      // Thumb1 addressing mode doesn't support shift.
457      return false;
458  }
459
460  unsigned OffsetImm = 0;
461  if (HasImmOffset) {
462    OffsetImm = MI->getOperand(2).getImm();
463    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
464
465    if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
466      // Make sure the immediate field fits.
467      return false;
468  }
469
470  // Add the 16-bit load / store instruction.
471  DebugLoc dl = MI->getDebugLoc();
472  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
473  if (!isLdStMul) {
474    MIB.addOperand(MI->getOperand(0));
475    MIB.addOperand(MI->getOperand(1));
476
477    if (HasImmOffset)
478      MIB.addImm(OffsetImm / Scale);
479
480    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
481
482    if (HasOffReg)
483      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
484  }
485
486  // Transfer the rest of operands.
487  for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
488    MIB.addOperand(MI->getOperand(OpNum));
489
490  // Transfer memoperands.
491  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
492
493  // Transfer MI flags.
494  MIB.setMIFlags(MI->getFlags());
495
496  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
497
498  MBB.erase_instr(MI);
499  ++NumLdSts;
500  return true;
501}
502
503bool
504Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
505                                const ReduceEntry &Entry,
506                                bool LiveCPSR, MachineInstr *CPSRDef,
507                                bool IsSelfLoop) {
508  unsigned Opc = MI->getOpcode();
509  if (Opc == ARM::t2ADDri) {
510    // If the source register is SP, try to reduce to tADDrSPi, otherwise
511    // it's a normal reduce.
512    if (MI->getOperand(1).getReg() != ARM::SP) {
513      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
514        return true;
515      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
516    }
517    // Try to reduce to tADDrSPi.
518    unsigned Imm = MI->getOperand(2).getImm();
519    // The immediate must be in range, the destination register must be a low
520    // reg, the predicate must be "always" and the condition flags must not
521    // be being set.
522    if (Imm & 3 || Imm > 1020)
523      return false;
524    if (!isARMLowRegister(MI->getOperand(0).getReg()))
525      return false;
526    if (MI->getOperand(3).getImm() != ARMCC::AL)
527      return false;
528    const MCInstrDesc &MCID = MI->getDesc();
529    if (MCID.hasOptionalDef() &&
530        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
531      return false;
532
533    MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
534                                      TII->get(ARM::tADDrSPi))
535      .addOperand(MI->getOperand(0))
536      .addOperand(MI->getOperand(1))
537      .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
538    AddDefaultPred(MIB);
539
540    // Transfer MI flags.
541    MIB.setMIFlags(MI->getFlags());
542
543    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
544
545    MBB.erase_instr(MI);
546    ++NumNarrows;
547    return true;
548  }
549
550  if (Entry.LowRegs1 && !VerifyLowRegs(MI))
551    return false;
552
553  if (MI->mayLoad() || MI->mayStore())
554    return ReduceLoadStore(MBB, MI, Entry);
555
556  switch (Opc) {
557  default: break;
558  case ARM::t2ADDSri:
559  case ARM::t2ADDSrr: {
560    unsigned PredReg = 0;
561    if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
562      switch (Opc) {
563      default: break;
564      case ARM::t2ADDSri: {
565        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
566          return true;
567        // fallthrough
568      }
569      case ARM::t2ADDSrr:
570        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
571      }
572    }
573    break;
574  }
575  case ARM::t2RSBri:
576  case ARM::t2RSBSri:
577  case ARM::t2SXTB:
578  case ARM::t2SXTH:
579  case ARM::t2UXTB:
580  case ARM::t2UXTH:
581    if (MI->getOperand(2).getImm() == 0)
582      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
583    break;
584  case ARM::t2MOVi16:
585    // Can convert only 'pure' immediate operands, not immediates obtained as
586    // globals' addresses.
587    if (MI->getOperand(1).isImm())
588      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
589    break;
590  case ARM::t2CMPrr: {
591    // Try to reduce to the lo-reg only version first. Why there are two
592    // versions of the instruction is a mystery.
593    // It would be nice to just have two entries in the master table that
594    // are prioritized, but the table assumes a unique entry for each
595    // source insn opcode. So for now, we hack a local entry record to use.
596    static const ReduceEntry NarrowEntry =
597      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
598    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
599      return true;
600    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
601  }
602  }
603  return false;
604}
605
606bool
607Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
608                                const ReduceEntry &Entry,
609                                bool LiveCPSR, MachineInstr *CPSRDef,
610                                bool IsSelfLoop) {
611
612  if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
613    return false;
614
615  unsigned Reg0 = MI->getOperand(0).getReg();
616  unsigned Reg1 = MI->getOperand(1).getReg();
617  // t2MUL is "special". The tied source operand is second, not first.
618  if (MI->getOpcode() == ARM::t2MUL) {
619    unsigned Reg2 = MI->getOperand(2).getReg();
620    // Early exit if the regs aren't all low regs.
621    if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
622        || !isARMLowRegister(Reg2))
623      return false;
624    if (Reg0 != Reg2) {
625      // If the other operand also isn't the same as the destination, we
626      // can't reduce.
627      if (Reg1 != Reg0)
628        return false;
629      // Try to commute the operands to make it a 2-address instruction.
630      MachineInstr *CommutedMI = TII->commuteInstruction(MI);
631      if (!CommutedMI)
632        return false;
633    }
634  } else if (Reg0 != Reg1) {
635    // Try to commute the operands to make it a 2-address instruction.
636    unsigned CommOpIdx1, CommOpIdx2;
637    if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
638        CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
639      return false;
640    MachineInstr *CommutedMI = TII->commuteInstruction(MI);
641    if (!CommutedMI)
642      return false;
643  }
644  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
645    return false;
646  if (Entry.Imm2Limit) {
647    unsigned Imm = MI->getOperand(2).getImm();
648    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
649    if (Imm > Limit)
650      return false;
651  } else {
652    unsigned Reg2 = MI->getOperand(2).getReg();
653    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
654      return false;
655  }
656
657  // Check if it's possible / necessary to transfer the predicate.
658  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
659  unsigned PredReg = 0;
660  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
661  bool SkipPred = false;
662  if (Pred != ARMCC::AL) {
663    if (!NewMCID.isPredicable())
664      // Can't transfer predicate, fail.
665      return false;
666  } else {
667    SkipPred = !NewMCID.isPredicable();
668  }
669
670  bool HasCC = false;
671  bool CCDead = false;
672  const MCInstrDesc &MCID = MI->getDesc();
673  if (MCID.hasOptionalDef()) {
674    unsigned NumOps = MCID.getNumOperands();
675    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
676    if (HasCC && MI->getOperand(NumOps-1).isDead())
677      CCDead = true;
678  }
679  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
680    return false;
681
682  // Avoid adding a false dependency on partial flag update by some 16-bit
683  // instructions which has the 's' bit set.
684  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
685      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
686    return false;
687
688  // Add the 16-bit instruction.
689  DebugLoc dl = MI->getDebugLoc();
690  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
691  MIB.addOperand(MI->getOperand(0));
692  if (NewMCID.hasOptionalDef()) {
693    if (HasCC)
694      AddDefaultT1CC(MIB, CCDead);
695    else
696      AddNoT1CC(MIB);
697  }
698
699  // Transfer the rest of operands.
700  unsigned NumOps = MCID.getNumOperands();
701  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
702    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
703      continue;
704    if (SkipPred && MCID.OpInfo[i].isPredicate())
705      continue;
706    MIB.addOperand(MI->getOperand(i));
707  }
708
709  // Transfer MI flags.
710  MIB.setMIFlags(MI->getFlags());
711
712  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
713
714  MBB.erase_instr(MI);
715  ++Num2Addrs;
716  return true;
717}
718
719bool
720Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
721                                 const ReduceEntry &Entry,
722                                 bool LiveCPSR, MachineInstr *CPSRDef,
723                                 bool IsSelfLoop) {
724  if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
725    return false;
726
727  unsigned Limit = ~0U;
728  if (Entry.Imm1Limit)
729    Limit = (1 << Entry.Imm1Limit) - 1;
730
731  const MCInstrDesc &MCID = MI->getDesc();
732  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
733    if (MCID.OpInfo[i].isPredicate())
734      continue;
735    const MachineOperand &MO = MI->getOperand(i);
736    if (MO.isReg()) {
737      unsigned Reg = MO.getReg();
738      if (!Reg || Reg == ARM::CPSR)
739        continue;
740      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
741        return false;
742    } else if (MO.isImm() &&
743               !MCID.OpInfo[i].isPredicate()) {
744      if (((unsigned)MO.getImm()) > Limit)
745        return false;
746    }
747  }
748
749  // Check if it's possible / necessary to transfer the predicate.
750  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
751  unsigned PredReg = 0;
752  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
753  bool SkipPred = false;
754  if (Pred != ARMCC::AL) {
755    if (!NewMCID.isPredicable())
756      // Can't transfer predicate, fail.
757      return false;
758  } else {
759    SkipPred = !NewMCID.isPredicable();
760  }
761
762  bool HasCC = false;
763  bool CCDead = false;
764  if (MCID.hasOptionalDef()) {
765    unsigned NumOps = MCID.getNumOperands();
766    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
767    if (HasCC && MI->getOperand(NumOps-1).isDead())
768      CCDead = true;
769  }
770  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
771    return false;
772
773  // Avoid adding a false dependency on partial flag update by some 16-bit
774  // instructions which has the 's' bit set.
775  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
776      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
777    return false;
778
779  // Add the 16-bit instruction.
780  DebugLoc dl = MI->getDebugLoc();
781  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
782  MIB.addOperand(MI->getOperand(0));
783  if (NewMCID.hasOptionalDef()) {
784    if (HasCC)
785      AddDefaultT1CC(MIB, CCDead);
786    else
787      AddNoT1CC(MIB);
788  }
789
790  // Transfer the rest of operands.
791  unsigned NumOps = MCID.getNumOperands();
792  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
793    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
794      continue;
795    if ((MCID.getOpcode() == ARM::t2RSBSri ||
796         MCID.getOpcode() == ARM::t2RSBri ||
797         MCID.getOpcode() == ARM::t2SXTB ||
798         MCID.getOpcode() == ARM::t2SXTH ||
799         MCID.getOpcode() == ARM::t2UXTB ||
800         MCID.getOpcode() == ARM::t2UXTH) && i == 2)
801      // Skip the zero immediate operand, it's now implicit.
802      continue;
803    bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
804    if (SkipPred && isPred)
805        continue;
806    const MachineOperand &MO = MI->getOperand(i);
807    if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
808      // Skip implicit def of CPSR. Either it's modeled as an optional
809      // def now or it's already an implicit def on the new instruction.
810      continue;
811    MIB.addOperand(MO);
812  }
813  if (!MCID.isPredicable() && NewMCID.isPredicable())
814    AddDefaultPred(MIB);
815
816  // Transfer MI flags.
817  MIB.setMIFlags(MI->getFlags());
818
819  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
820
821  MBB.erase_instr(MI);
822  ++NumNarrows;
823  return true;
824}
825
826static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
827  bool HasDef = false;
828  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
829    const MachineOperand &MO = MI.getOperand(i);
830    if (!MO.isReg() || MO.isUndef() || MO.isUse())
831      continue;
832    if (MO.getReg() != ARM::CPSR)
833      continue;
834
835    DefCPSR = true;
836    if (!MO.isDead())
837      HasDef = true;
838  }
839
840  return HasDef || LiveCPSR;
841}
842
843static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
844  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
845    const MachineOperand &MO = MI.getOperand(i);
846    if (!MO.isReg() || MO.isUndef() || MO.isDef())
847      continue;
848    if (MO.getReg() != ARM::CPSR)
849      continue;
850    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
851    if (MO.isKill()) {
852      LiveCPSR = false;
853      break;
854    }
855  }
856
857  return LiveCPSR;
858}
859
860bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
861  bool Modified = false;
862
863  // Yes, CPSR could be livein.
864  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
865  MachineInstr *CPSRDef = 0;
866  MachineInstr *BundleMI = 0;
867
868  // If this BB loops back to itself, conservatively avoid narrowing the
869  // first instruction that does partial flag update.
870  bool IsSelfLoop = MBB.isSuccessor(&MBB);
871  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
872  MachineBasicBlock::instr_iterator NextMII;
873  for (; MII != E; MII = NextMII) {
874    NextMII = llvm::next(MII);
875
876    MachineInstr *MI = &*MII;
877    if (MI->isBundle()) {
878      BundleMI = MI;
879      continue;
880    }
881
882    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
883
884    unsigned Opcode = MI->getOpcode();
885    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
886    if (OPI != ReduceOpcodeMap.end()) {
887      const ReduceEntry &Entry = ReduceTable[OPI->second];
888      // Ignore "special" cases for now.
889      if (Entry.Special) {
890        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
891          Modified = true;
892          MachineBasicBlock::instr_iterator I = prior(NextMII);
893          MI = &*I;
894        }
895        goto ProcessNext;
896      }
897
898      // Try to transform to a 16-bit two-address instruction.
899      if (Entry.NarrowOpc2 &&
900          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
901        Modified = true;
902        MachineBasicBlock::instr_iterator I = prior(NextMII);
903        MI = &*I;
904        goto ProcessNext;
905      }
906
907      // Try to transform to a 16-bit non-two-address instruction.
908      if (Entry.NarrowOpc1 &&
909          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
910        Modified = true;
911        MachineBasicBlock::instr_iterator I = prior(NextMII);
912        MI = &*I;
913      }
914    }
915
916  ProcessNext:
917    if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
918      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
919      // marker is only on the BUNDLE instruction. Process the BUNDLE
920      // instruction as we finish with the bundled instruction to work around
921      // the inconsistency.
922      if (BundleMI->killsRegister(ARM::CPSR))
923        LiveCPSR = false;
924      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
925      if (MO && !MO->isDead())
926        LiveCPSR = true;
927    }
928
929    bool DefCPSR = false;
930    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
931    if (MI->isCall()) {
932      // Calls don't really set CPSR.
933      CPSRDef = 0;
934      IsSelfLoop = false;
935    } else if (DefCPSR) {
936      // This is the last CPSR defining instruction.
937      CPSRDef = MI;
938      IsSelfLoop = false;
939    }
940  }
941
942  return Modified;
943}
944
945bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
946  const TargetMachine &TM = MF.getTarget();
947  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
948  STI = &TM.getSubtarget<ARMSubtarget>();
949
950  bool Modified = false;
951  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
952    Modified |= ReduceMBB(*I);
953  return Modified;
954}
955
956/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
957/// reduction pass.
958FunctionPass *llvm::createThumb2SizeReductionPass() {
959  return new Thumb2SizeReduce();
960}
961