Thumb2SizeReduction.cpp revision 8442d00a0ea750200b34a7fa94b5b72033da65b3
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE "t2-reduce-size"
11#include "ARM.h"
12#include "ARMBaseRegisterInfo.h"
13#include "ARMBaseInstrInfo.h"
14#include "Thumb2InstrInfo.h"
15#include "llvm/CodeGen/MachineInstr.h"
16#include "llvm/CodeGen/MachineInstrBuilder.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/Statistic.h"
23using namespace llvm;
24
25STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
26STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
27
28static cl::opt<int> ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden);
29
30namespace {
31  /// ReduceTable - A static table with information on mapping from wide
32  /// opcodes to narrow
33  struct ReduceEntry {
34    unsigned WideOpc;      // Wide opcode
35    unsigned NarrowOpc1;   // Narrow opcode to transform to
36    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
37    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
38    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
39    unsigned LowRegs1 : 1; // Only possible if low-registers are used
40    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
41    unsigned PredCC1  : 1; // 0 - If predicated, cc is on and vice versa.
42                           // 1 - No cc field.
43    unsigned PredCC2  : 1;
44    unsigned Special  : 1; // Needs to be dealt with specially
45  };
46
47  static const ReduceEntry ReduceTable[] = {
48    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
49    { ARM::t2ADCrr, ARM::tADC,    0,             0,   0,    1,   0,  0,0, 0 },
50    // FIXME: t2ADDS variants.
51    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
52    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
53    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
54    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
55    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 0 },
56    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 0 },
57    { ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  1,0, 0 },
58    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  1,0, 0 },
59    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  1,0, 0 },
60    { ARM::t2CMPzri,ARM::tCMPzi8, 0,             8,   0,    1,   0,  1,0, 0 },
61    { ARM::t2CMPzrr,ARM::tCMPzhir,0,             0,   0,    0,   0,  1,0, 0 },
62    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
63    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
64    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 0 },
65    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
66    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
67    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
68    // FIXME: Do we need the 16-bit 'S' variant?
69    // FIXME: t2MOVcc
70    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
71    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
72    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
73    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
74    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0 },
75    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0 },
76    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0 },
77    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 0 },
78    // FIXME: T2RSBri immediate must be zero. Also need entry for T2RSBS
79    //{ ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0 },
80    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
81    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
82    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
83    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
84    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  1,0, 0 },
85    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
86    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 }
87  };
88
89  class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
90  public:
91    static char ID;
92    Thumb2SizeReduce();
93
94    const TargetInstrInfo *TII;
95
96    virtual bool runOnMachineFunction(MachineFunction &MF);
97
98    virtual const char *getPassName() const {
99      return "Thumb2 instruction size reduction pass";
100    }
101
102  private:
103    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
104    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
105
106    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
107    /// instruction.
108    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
109                       const ReduceEntry &Entry,
110                       bool LiveCPSR);
111
112    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
113    /// non-two-address instruction.
114    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
115                        const ReduceEntry &Entry,
116                        bool LiveCPSR);
117
118    /// ReduceMBB - Reduce width of instructions in the specified basic block.
119    bool ReduceMBB(MachineBasicBlock &MBB);
120  };
121  char Thumb2SizeReduce::ID = 0;
122}
123
124Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
125  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
126    unsigned FromOpc = ReduceTable[i].WideOpc;
127    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
128      assert(false && "Duplicated entries?");
129  }
130}
131
132static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
133                            bool is2Addr, ARMCC::CondCodes Pred,
134                            bool LiveCPSR, bool &HasCC, bool &CCDead) {
135  if ((is2Addr  && Entry.PredCC2 == 0) ||
136      (!is2Addr && Entry.PredCC1 == 0)) {
137    if (Pred == ARMCC::AL) {
138      // Not predicated, must set CPSR.
139      if (!HasCC) {
140        // Original instruction was not setting CPSR, but CPSR is not
141        // currently live anyway. It's ok to set it. The CPSR def is
142        // dead though.
143        if (!LiveCPSR) {
144          HasCC = true;
145          CCDead = true;
146          return true;
147        }
148        return false;
149      }
150    } else {
151      // Predicated, must not set CPSR.
152      if (HasCC)
153        return false;
154    }
155  } else {
156    // 16-bit instruction does not set CPSR.
157    if (HasCC)
158      return false;
159  }
160
161  return true;
162}
163
164bool
165Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
166                                const ReduceEntry &Entry,
167                                bool LiveCPSR) {
168  const TargetInstrDesc &TID = MI->getDesc();
169  unsigned Reg0 = MI->getOperand(0).getReg();
170  unsigned Reg1 = MI->getOperand(1).getReg();
171  if (Reg0 != Reg1)
172    return false;
173  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
174    return false;
175  if (Entry.Imm2Limit) {
176    unsigned Imm = MI->getOperand(2).getImm();
177    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
178    if (Imm > Limit)
179      return false;
180  } else {
181    unsigned Reg2 = MI->getOperand(2).getReg();
182    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
183      return false;
184  }
185
186  // Check if it's possible / necessary to transfer the predicate.
187  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
188  unsigned PredReg = 0;
189  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
190  bool SkipPred = false;
191  if (Pred != ARMCC::AL) {
192    if (!NewTID.isPredicable())
193      // Can't transfer predicate, fail.
194      return false;
195  } else {
196    SkipPred = !NewTID.isPredicable();
197  }
198
199  bool HasCC = false;
200  bool CCDead = false;
201  if (TID.hasOptionalDef()) {
202    unsigned NumOps = TID.getNumOperands();
203    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
204    if (HasCC && MI->getOperand(NumOps-1).isDead())
205      CCDead = true;
206  }
207  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
208    return false;
209
210  // Add the 16-bit instruction.
211  DebugLoc dl = MI->getDebugLoc();
212  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc2));
213  MIB.addOperand(MI->getOperand(0));
214  if (HasCC)
215    AddDefaultT1CC(MIB, CCDead);
216
217  // Transfer the rest of operands.
218  unsigned NumOps = TID.getNumOperands();
219  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
220    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
221      continue;
222    if (SkipPred && TID.OpInfo[i].isPredicate())
223      continue;
224    MIB.addOperand(MI->getOperand(i));
225  }
226
227  DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;
228
229  MBB.erase(MI);
230  ++Num2Addrs;
231  return true;
232}
233
234bool
235Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
236                                 const ReduceEntry &Entry,
237                                 bool LiveCPSR) {
238  unsigned Limit = ~0U;
239  if (Entry.Imm1Limit)
240    Limit = (1 << Entry.Imm1Limit) - 1;
241
242  const TargetInstrDesc &TID = MI->getDesc();
243  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
244    if (TID.OpInfo[i].isPredicate())
245      continue;
246    const MachineOperand &MO = MI->getOperand(i);
247    if (MO.isReg()) {
248      unsigned Reg = MO.getReg();
249      if (!Reg || Reg == ARM::CPSR)
250        continue;
251      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
252        return false;
253    } else if (MO.isImm()) {
254      if (MO.getImm() > Limit)
255        return false;
256    }
257  }
258
259  // Check if it's possible / necessary to transfer the predicate.
260  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
261  unsigned PredReg = 0;
262  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
263  bool SkipPred = false;
264  if (Pred != ARMCC::AL) {
265    if (!NewTID.isPredicable())
266      // Can't transfer predicate, fail.
267      return false;
268  } else {
269    SkipPred = !NewTID.isPredicable();
270  }
271
272  bool HasCC = false;
273  bool CCDead = false;
274  if (TID.hasOptionalDef()) {
275    unsigned NumOps = TID.getNumOperands();
276    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
277    if (HasCC && MI->getOperand(NumOps-1).isDead())
278      CCDead = true;
279  }
280  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
281    return false;
282
283  // Add the 16-bit instruction.
284  DebugLoc dl = MI->getDebugLoc();
285  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1));
286  MIB.addOperand(MI->getOperand(0));
287  if (HasCC)
288    AddDefaultT1CC(MIB, CCDead);
289
290  // Transfer the rest of operands.
291  unsigned NumOps = TID.getNumOperands();
292  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
293    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
294      continue;
295    if (SkipPred && TID.OpInfo[i].isPredicate())
296      continue;
297    MIB.addOperand(MI->getOperand(i));
298  }
299
300
301  DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;
302
303  MBB.erase(MI);
304  ++NumNarrows;
305  return true;
306}
307
308static bool UpdateCPSRLiveness(MachineInstr &MI, bool LiveCPSR) {
309  bool HasDef = false;
310  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
311    const MachineOperand &MO = MI.getOperand(i);
312    if (!MO.isReg() || MO.isUndef())
313      continue;
314    if (MO.getReg() != ARM::CPSR)
315      continue;
316    if (MO.isDef()) {
317      if (!MO.isDead())
318        HasDef = true;
319      continue;
320    }
321
322    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
323    if (MO.isKill()) {
324      LiveCPSR = false;
325      break;
326    }
327  }
328
329  return HasDef || LiveCPSR;
330}
331
332bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
333  bool Modified = false;
334
335  bool LiveCPSR = false;
336  // Yes, CPSR could be livein.
337  for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
338         E = MBB.livein_end(); I != E; ++I) {
339    if (*I == ARM::CPSR) {
340      LiveCPSR = true;
341      break;
342    }
343  }
344
345  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
346  MachineBasicBlock::iterator NextMII = next(MII);
347  for (; MII != E; MII = NextMII) {
348    NextMII = next(MII);
349
350    MachineInstr *MI = &*MII;
351    unsigned Opcode = MI->getOpcode();
352    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
353    if (OPI != ReduceOpcodeMap.end()) {
354      const ReduceEntry &Entry = ReduceTable[OPI->second];
355      // Ignore "special" cases for now.
356      if (Entry.Special)
357        goto ProcessNext;
358
359      // Try to transform to a 16-bit two-address instruction.
360      if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
361        Modified = true;
362        MachineBasicBlock::iterator I = prior(NextMII);
363        MI = &*I;
364        goto ProcessNext;
365      }
366
367      // Try to transform ro a 16-bit non-two-address instruction.
368      if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR))
369        Modified = true;
370    }
371
372  ProcessNext:
373    LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR);
374
375    if (ReduceLimit != -1 && ((int)(NumNarrows + Num2Addrs) > ReduceLimit))
376      break;
377  }
378
379  return Modified;
380}
381
382bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
383  const TargetMachine &TM = MF.getTarget();
384  TII = TM.getInstrInfo();
385
386  bool Modified = false;
387  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
388    Modified |= ReduceMBB(*I);
389  return Modified;
390}
391
392/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
393/// reduction pass.
394FunctionPass *llvm::createThumb2SizeReductionPass() {
395  return new Thumb2SizeReduce();
396}
397