Thumb2SizeReduction.cpp revision 2a7b41ba4d3eb3c6003f6768dc20b28d83eac265
147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//
347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//                     The LLVM Compiler Infrastructure
447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//
54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source
64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details.
747877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//
847877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//===----------------------------------------------------------------------===//
947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner
1047877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#define DEBUG_TYPE "t2-reduce-size"
1147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARM.h"
1247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMAddressingModes.h"
1347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMBaseRegisterInfo.h"
1447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMBaseInstrInfo.h"
1547877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMSubtarget.h"
1647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "Thumb2InstrInfo.h"
1731442f9dc5512b6a29cdb332b12ae09a1c9e8176Chris Lattner#include "llvm/CodeGen/MachineInstr.h"
180f54dcbf07c69e41ecaa6b4fbf0d94956d8e9ff5Devang Patel#include "llvm/CodeGen/MachineInstrBuilder.h"
1947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "llvm/CodeGen/MachineFunctionPass.h"
205a29c9eed157af51a8d338b5a225b146881819e8Gordon Henriksen#include "llvm/Support/CommandLine.h"
2147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "llvm/Support/Debug.h"
221532f3ddd77c362dd5f613af06b4de636e3c5b0eDale Johannesen#include "llvm/Support/raw_ostream.h"
2347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "llvm/ADT/DenseMap.h"
2431442f9dc5512b6a29cdb332b12ae09a1c9e8176Chris Lattner#include "llvm/ADT/Statistic.h"
25cb3718832375a581c5ea23f15918f3ea447a446cOwen Andersonusing namespace llvm;
2647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner
2747877050e7ea02c3514497aba54eef1d4cee8452Chris LattnerSTATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
282c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan GohmanSTATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
292c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan GohmanSTATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
302c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan Gohman
312c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan Gohmanstatic cl::opt<int> ReduceLimit("t2-reduce-limit",
3285ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattner                                cl::init(-1), cl::Hidden);
3385ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattnerstatic cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
3485ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattner                                     cl::init(-1), cl::Hidden);
3585ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattnerstatic cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
368bd6035750f1b290832a3b1c90766d9b45ed8d6bEvan Cheng                                     cl::init(-1), cl::Hidden);
378bd6035750f1b290832a3b1c90766d9b45ed8d6bEvan Cheng
3893f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksennamespace {
3993f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen  /// ReduceTable - A static table with information on mapping from wide
4085ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattner  /// opcodes to narrow
41459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner  struct ReduceEntry {
42459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner    unsigned WideOpc;      // Wide opcode
43459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner    unsigned NarrowOpc1;   // Narrow opcode to transform to
44459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
45459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
46459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
47dc756858f92a397ed30362ba8251fec56479735fDan Gohman    unsigned LowRegs1 : 1; // Only possible if low-registers are used
48dc756858f92a397ed30362ba8251fec56479735fDan Gohman    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
49dc756858f92a397ed30362ba8251fec56479735fDan Gohman    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
50eb0d6abee36c274cf081948795f4675d8f33fc6fDan Gohman                           // 1 - No cc field.
51dc756858f92a397ed30362ba8251fec56479735fDan Gohman                           // 2 - Always set CPSR.
52dc756858f92a397ed30362ba8251fec56479735fDan Gohman    unsigned PredCC2  : 2;
532c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan Gohman    unsigned PartFlag : 1; // 16-bit instruction does partial flag update
5404523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    unsigned Special  : 1; // Needs to be dealt with specially
55bfae83139dcb4fffd50b939e1b1224b0126f04d4Dan Gohman  };
56cb3718832375a581c5ea23f15918f3ea447a446cOwen Anderson
5704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling  static const ReduceEntry ReduceTable[] = {
5898a366d547772010e94609e4584489b3e5ce0043Bill Wendling    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
5902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
60be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
6104523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
6204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
639d4209fb82cab74bae76511e3f21ef1c24ec948aJim Laskey    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
649d4209fb82cab74bae76511e3f21ef1c24ec948aJim Laskey    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
6502dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
6602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 1,0 },
6704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 1,0 },
6804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    //FIXME: Disable CMN, as CCodes are backwards from compare expectations
69be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,    1,   0,  2,0, 0,0 },
7004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0,0 },
7104523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0,1 },
7298a366d547772010e94609e4584489b3e5ce0043Bill Wendling    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 1,0 },
73bbf1db72133e9cf986e4da6260736335533067dbEvan Cheng    // FIXME: adr.n immediate offset must be multiple of 4.
74d703ed6aed98c8156829399efbafb13a3cca0b69Evan Cheng    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,    1,   0,  1,0, 0,0 },
7547877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 1,0 },
7604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 1,0 },
7704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
7804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 1,0 },
79be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
8004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    // likely to cause issue in the loop. As a size / performance workaround,
8104523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    // they are not marked as such.
8204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
8304523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
8404523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    // FIXME: Do we need the 16-bit 'S' variant?
8504523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
8604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0,0 },
8747877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0,0 },
8804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
8904523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
9004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
9102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0,0 },
9204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0,0 },
9304523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0,0 },
9404523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 1,0 },
95bfae83139dcb4fffd50b939e1b1224b0126f04d4Dan Gohman    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0,1 },
9604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 0,1 },
9798a366d547772010e94609e4584489b3e5ce0043Bill Wendling    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0,0 },
9804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0,0 },
99be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0,0 },
10002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0,0 },
1015eca075b74d62c621b160aa216b4cd50829a2cc7Gordon Henriksen    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0,0 },
10204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0,0 },
10347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0,0 },
10447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0,0 },
10504523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0,0 },
10647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0,0 },
10747877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner
10847877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    // FIXME: Clean this up after splitting each Thumb load / store opcode
10947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    // into multiple ones.
11047877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
11147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
11247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
11347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
11447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
115bfae83139dcb4fffd50b939e1b1224b0126f04d4Dan Gohman    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
11647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 0,1 },
11798a366d547772010e94609e4584489b3e5ce0043Bill Wendling    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 0,1 },
11802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 0,1 },
119be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
12002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
12102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
122be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
12302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
12402dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman
125be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 0,1 },
12602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 0,1 },
12702dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 0,1 },
12802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
12902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 0,1 },
13002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 0,1 },
13102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman  };
13202dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman
13302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman  class Thumb2SizeReduce : public MachineFunctionPass {
13402dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman  public:
13598a366d547772010e94609e4584489b3e5ce0043Bill Wendling    static char ID;
13698a366d547772010e94609e4584489b3e5ce0043Bill Wendling    Thumb2SizeReduce();
13702dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman
138be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling    const Thumb2InstrInfo *TII;
13998a366d547772010e94609e4584489b3e5ce0043Bill Wendling    const ARMSubtarget *STI;
14047877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner
14102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    virtual bool runOnMachineFunction(MachineFunction &MF);
14247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner
14398a366d547772010e94609e4584489b3e5ce0043Bill Wendling    virtual const char *getPassName() const {
144c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner      return "Thumb2 instruction size reduction pass";
145c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner    }
1463b0da26e202cbbeb22508231f4278bda8e995391Daniel Dunbar
147c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner  private:
14802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
14993f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
15002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman
1511532f3ddd77c362dd5f613af06b4de636e3c5b0eDale Johannesen    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
152b6d5b1439047609c050576f3dc52b722e76bd30bDale Johannesen
15302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
15447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner                         bool is2Addr, ARMCC::CondCodes Pred,
15547877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner                         bool LiveCPSR, bool &HasCC, bool &CCDead);
15604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling
15798a366d547772010e94609e4584489b3e5ce0043Bill Wendling    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
158c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner                         const ReduceEntry &Entry);
159c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner
160e9e6bdf27fca46dc9eca2ebdf73e03747d1859abBill Wendling    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
1612b58ce5ab4e22e796303d68fb246d4031cb5d4caBill Wendling                       const ReduceEntry &Entry, bool LiveCPSR,
162c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner                       MachineInstr *CPSRDef);
163f4db3a51c7d806f7dcef5d9625e7cdf7f122dca9Daniel Dunbar
164f4db3a51c7d806f7dcef5d9625e7cdf7f122dca9Daniel Dunbar    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
1653b0da26e202cbbeb22508231f4278bda8e995391Daniel Dunbar    /// instruction.
166c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
16702dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman                       const ReduceEntry &Entry,
16802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman                       bool LiveCPSR, MachineInstr *CPSRDef);
169dc756858f92a397ed30362ba8251fec56479735fDan Gohman
170eb0d6abee36c274cf081948795f4675d8f33fc6fDan Gohman    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
17198a366d547772010e94609e4584489b3e5ce0043Bill Wendling    /// non-two-address instruction.
172dc756858f92a397ed30362ba8251fec56479735fDan Gohman    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
173dc756858f92a397ed30362ba8251fec56479735fDan Gohman                        const ReduceEntry &Entry,
17447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner                        bool LiveCPSR, MachineInstr *CPSRDef);
175be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling
17647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    /// ReduceMBB - Reduce width of instructions in the specified basic block.
17704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling    bool ReduceMBB(MachineBasicBlock &MBB);
17847877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner  };
17947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner  char Thumb2SizeReduce::ID = 0;
18004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling}
1810f940c95d4506f8d04fa2aeda8a79cadb3105fe3Bill Wendling
18298a366d547772010e94609e4584489b3e5ce0043Bill WendlingThumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
183cc8f603f531c906782e4966107ae29667eb6632cBill Wendling  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
1843c42f1211874665e8ea6eea55a45024b557afa61Chris Lattner    unsigned FromOpc = ReduceTable[i].WideOpc;
1858f0d99e463a2dcb5a40d14f0481a0e322bcf79e4Evan Cheng    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
1860f940c95d4506f8d04fa2aeda8a79cadb3105fe3Bill Wendling      assert(false && "Duplicated entries?");
187b013f5094c3bdb36472da996d63d5c0f75b6d4d3Anton Korobeynikov  }
188be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling}
189b013f5094c3bdb36472da996d63d5c0f75b6d4d3Anton Korobeynikov
190b013f5094c3bdb36472da996d63d5c0f75b6d4d3Anton Korobeynikovstatic bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
1913f32d65912b4da23793dab618d981be2ce11c331Evan Cheng  for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
19247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner    if (*Regs == ARM::CPSR)
1933f32d65912b4da23793dab618d981be2ce11c331Evan Cheng      return true;
1943f32d65912b4da23793dab618d981be2ce11c331Evan Cheng  return false;
19598a366d547772010e94609e4584489b3e5ce0043Bill Wendling}
1962c1d7726f25b6b219a1518b411351b99d25c1a02Bill Wendling
1972c1d7726f25b6b219a1518b411351b99d25c1a02Bill Wendling/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
1983f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// the 's' 16-bit instruction partially update CPSR. Abort the
19902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// transformation to avoid adding false dependency on last CPSR setting
20004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling/// instruction which hurts the ability for out-of-order execution engine
20102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// to do register renaming magic.
2023f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// This function checks if there is a read-of-write dependency between the
203be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling/// last instruction that defines the CPSR and the current instruction. If there
2043f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// is, then there is no harm done since the instruction cannot be retired
2053f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// before the CPSR setting instruction anyway.
20602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// Note, we are not doing full dependency analysis here for the sake of compile
2073f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// time. We're not looking for cases like:
20802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// r0 = muls ...
209ada779fb11eb411536aa8219a176ca0ce4d58fd1Christopher Lamb/// r1 = add.w r0, ...
21002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// ...
211ada779fb11eb411536aa8219a176ca0ce4d58fd1Christopher Lamb///    = mul.w r1
212ada779fb11eb411536aa8219a176ca0ce4d58fd1Christopher Lamb/// In this case it would have been ok to narrow the mul.w to muls since there
21304523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling/// are indirect RAW dependency between the muls and the mul.w
21447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattnerbool
21547877050e7ea02c3514497aba54eef1d4cee8452Chris LattnerThumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
21602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman  if (!Def || !STI->avoidCPSRPartialUpdate())
2173f32d65912b4da23793dab618d981be2ce11c331Evan Cheng    return false;
21804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling
21902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman  SmallSet<unsigned, 2> Defs;
220e7e7d0d7e39d0c7c659d26b97e8081fce0fcd749Dale Johannesen  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
22198a366d547772010e94609e4584489b3e5ce0043Bill Wendling    const MachineOperand &MO = Def->getOperand(i);
22272f159640382a16e036b63dcb9c0b427e6d5dc0aDale Johannesen    if (!MO.isReg() || MO.isUndef() || MO.isUse())
223e7e7d0d7e39d0c7c659d26b97e8081fce0fcd749Dale Johannesen      continue;
2245ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman    unsigned Reg = MO.getReg();
2255ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman    if (Reg == 0 || Reg == ARM::CPSR)
2265ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman      continue;
2275ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman    Defs.insert(Reg);
22823b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman  }
22998a366d547772010e94609e4584489b3e5ce0043Bill Wendling
23023b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
23123b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman    const MachineOperand &MO = Use->getOperand(i);
23223b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman    if (!MO.isReg() || MO.isUndef() || MO.isDef())
23323b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman      continue;
23423b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman    unsigned Reg = MO.getReg();
23593f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen    if (Defs.count(Reg))
2363f32d65912b4da23793dab618d981be2ce11c331Evan Cheng      return false;
23793f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen  }
23893f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen
23902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman  // No read-after-write dependency. The narrowing will add false dependency.
24093f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen  return true;
2415eca075b74d62c621b160aa216b4cd50829a2cc7Gordon Henriksen}
24204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling
24302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohmanbool
24447877050e7ea02c3514497aba54eef1d4cee8452Chris LattnerThumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
245                                  bool is2Addr, ARMCC::CondCodes Pred,
246                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
247  if ((is2Addr  && Entry.PredCC2 == 0) ||
248      (!is2Addr && Entry.PredCC1 == 0)) {
249    if (Pred == ARMCC::AL) {
250      // Not predicated, must set CPSR.
251      if (!HasCC) {
252        // Original instruction was not setting CPSR, but CPSR is not
253        // currently live anyway. It's ok to set it. The CPSR def is
254        // dead though.
255        if (!LiveCPSR) {
256          HasCC = true;
257          CCDead = true;
258          return true;
259        }
260        return false;
261      }
262    } else {
263      // Predicated, must not set CPSR.
264      if (HasCC)
265        return false;
266    }
267  } else if ((is2Addr  && Entry.PredCC2 == 2) ||
268             (!is2Addr && Entry.PredCC1 == 2)) {
269    /// Old opcode has an optional def of CPSR.
270    if (HasCC)
271      return true;
272    // If old opcode does not implicitly define CPSR, then it's not ok since
273    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
274    if (!HasImplicitCPSRDef(MI->getDesc()))
275      return false;
276    HasCC = true;
277  } else {
278    // 16-bit instruction does not set CPSR.
279    if (HasCC)
280      return false;
281  }
282
283  return true;
284}
285
286static bool VerifyLowRegs(MachineInstr *MI) {
287  unsigned Opc = MI->getOpcode();
288  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
289                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
290                 Opc == ARM::t2LDMDB_UPD);
291  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
292  bool isSPOk = isPCOk || isLROk;
293  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
294    const MachineOperand &MO = MI->getOperand(i);
295    if (!MO.isReg() || MO.isImplicit())
296      continue;
297    unsigned Reg = MO.getReg();
298    if (Reg == 0 || Reg == ARM::CPSR)
299      continue;
300    if (isPCOk && Reg == ARM::PC)
301      continue;
302    if (isLROk && Reg == ARM::LR)
303      continue;
304    if (Reg == ARM::SP) {
305      if (isSPOk)
306        continue;
307      if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
308        // Special case for these ldr / str with sp as base register.
309        continue;
310    }
311    if (!isARMLowRegister(Reg))
312      return false;
313  }
314  return true;
315}
316
317bool
318Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
319                                  const ReduceEntry &Entry) {
320  if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
321    return false;
322
323  unsigned Scale = 1;
324  bool HasImmOffset = false;
325  bool HasShift = false;
326  bool HasOffReg = true;
327  bool isLdStMul = false;
328  unsigned Opc = Entry.NarrowOpc1;
329  unsigned OpNum = 3; // First 'rest' of operands.
330  uint8_t  ImmLimit = Entry.Imm1Limit;
331
332  switch (Entry.WideOpc) {
333  default:
334    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
335  case ARM::t2LDRi12:
336  case ARM::t2STRi12:
337    if (MI->getOperand(1).getReg() == ARM::SP) {
338      Opc = Entry.NarrowOpc2;
339      ImmLimit = Entry.Imm2Limit;
340      HasOffReg = false;
341    }
342
343    Scale = 4;
344    HasImmOffset = true;
345    HasOffReg = false;
346    break;
347  case ARM::t2LDRBi12:
348  case ARM::t2STRBi12:
349    HasImmOffset = true;
350    HasOffReg = false;
351    break;
352  case ARM::t2LDRHi12:
353  case ARM::t2STRHi12:
354    Scale = 2;
355    HasImmOffset = true;
356    HasOffReg = false;
357    break;
358  case ARM::t2LDRs:
359  case ARM::t2LDRBs:
360  case ARM::t2LDRHs:
361  case ARM::t2LDRSBs:
362  case ARM::t2LDRSHs:
363  case ARM::t2STRs:
364  case ARM::t2STRBs:
365  case ARM::t2STRHs:
366    HasShift = true;
367    OpNum = 4;
368    break;
369  case ARM::t2LDMIA:
370  case ARM::t2LDMDB: {
371    unsigned BaseReg = MI->getOperand(0).getReg();
372    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
373      return false;
374
375    // For the non-writeback version (this one), the base register must be
376    // one of the registers being loaded.
377    bool isOK = false;
378    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
379      if (MI->getOperand(i).getReg() == BaseReg) {
380        isOK = true;
381        break;
382      }
383    }
384
385    if (!isOK)
386      return false;
387
388    OpNum = 0;
389    isLdStMul = true;
390    break;
391  }
392  case ARM::t2LDMIA_RET: {
393    unsigned BaseReg = MI->getOperand(1).getReg();
394    if (BaseReg != ARM::SP)
395      return false;
396    Opc = Entry.NarrowOpc2; // tPOP_RET
397    OpNum = 2;
398    isLdStMul = true;
399    break;
400  }
401  case ARM::t2LDMIA_UPD:
402  case ARM::t2LDMDB_UPD:
403  case ARM::t2STMIA_UPD:
404  case ARM::t2STMDB_UPD: {
405    OpNum = 0;
406
407    unsigned BaseReg = MI->getOperand(1).getReg();
408    if (BaseReg == ARM::SP &&
409        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
410         Entry.WideOpc == ARM::t2STMDB_UPD)) {
411      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
412      OpNum = 2;
413    } else if (!isARMLowRegister(BaseReg) ||
414               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
415                Entry.WideOpc != ARM::t2STMIA_UPD)) {
416      return false;
417    }
418
419    isLdStMul = true;
420    break;
421  }
422  }
423
424  unsigned OffsetReg = 0;
425  bool OffsetKill = false;
426  if (HasShift) {
427    OffsetReg  = MI->getOperand(2).getReg();
428    OffsetKill = MI->getOperand(2).isKill();
429
430    if (MI->getOperand(3).getImm())
431      // Thumb1 addressing mode doesn't support shift.
432      return false;
433  }
434
435  unsigned OffsetImm = 0;
436  if (HasImmOffset) {
437    OffsetImm = MI->getOperand(2).getImm();
438    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
439
440    if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
441      // Make sure the immediate field fits.
442      return false;
443  }
444
445  // Add the 16-bit load / store instruction.
446  DebugLoc dl = MI->getDebugLoc();
447  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
448  if (!isLdStMul) {
449    MIB.addOperand(MI->getOperand(0));
450    MIB.addOperand(MI->getOperand(1));
451
452    if (HasImmOffset)
453      MIB.addImm(OffsetImm / Scale);
454
455    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
456
457    if (HasOffReg)
458      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
459  }
460
461  // Transfer the rest of operands.
462  for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
463    MIB.addOperand(MI->getOperand(OpNum));
464
465  // Transfer memoperands.
466  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
467
468  // Transfer MI flags.
469  MIB.setMIFlags(MI->getFlags());
470
471  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
472
473  MBB.erase(MI);
474  ++NumLdSts;
475  return true;
476}
477
478bool
479Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
480                                const ReduceEntry &Entry,
481                                bool LiveCPSR, MachineInstr *CPSRDef) {
482  unsigned Opc = MI->getOpcode();
483  if (Opc == ARM::t2ADDri) {
484    // If the source register is SP, try to reduce to tADDrSPi, otherwise
485    // it's a normal reduce.
486    if (MI->getOperand(1).getReg() != ARM::SP) {
487      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
488        return true;
489      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
490    }
491    // Try to reduce to tADDrSPi.
492    unsigned Imm = MI->getOperand(2).getImm();
493    // The immediate must be in range, the destination register must be a low
494    // reg, the predicate must be "always" and the condition flags must not
495    // be being set.
496    if (Imm & 3 || Imm > 1024)
497      return false;
498    if (!isARMLowRegister(MI->getOperand(0).getReg()))
499      return false;
500    if (MI->getOperand(3).getImm() != ARMCC::AL)
501      return false;
502    const MCInstrDesc &MCID = MI->getDesc();
503    if (MCID.hasOptionalDef() &&
504        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
505      return false;
506
507    MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
508                                      TII->get(ARM::tADDrSPi))
509      .addOperand(MI->getOperand(0))
510      .addOperand(MI->getOperand(1))
511      .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
512
513    // Transfer MI flags.
514    MIB.setMIFlags(MI->getFlags());
515
516    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
517
518    MBB.erase(MI);
519    ++NumNarrows;
520    return true;
521  }
522
523  if (Entry.LowRegs1 && !VerifyLowRegs(MI))
524    return false;
525
526  const MCInstrDesc &MCID = MI->getDesc();
527  if (MCID.mayLoad() || MCID.mayStore())
528    return ReduceLoadStore(MBB, MI, Entry);
529
530  switch (Opc) {
531  default: break;
532  case ARM::t2ADDSri:
533  case ARM::t2ADDSrr: {
534    unsigned PredReg = 0;
535    if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
536      switch (Opc) {
537      default: break;
538      case ARM::t2ADDSri: {
539        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
540          return true;
541        // fallthrough
542      }
543      case ARM::t2ADDSrr:
544        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
545      }
546    }
547    break;
548  }
549  case ARM::t2RSBri:
550  case ARM::t2RSBSri:
551    if (MI->getOperand(2).getImm() == 0)
552      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
553    break;
554  case ARM::t2MOVi16:
555    // Can convert only 'pure' immediate operands, not immediates obtained as
556    // globals' addresses.
557    if (MI->getOperand(1).isImm())
558      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
559    break;
560  case ARM::t2CMPrr: {
561    // Try to reduce to the lo-reg only version first. Why there are two
562    // versions of the instruction is a mystery.
563    // It would be nice to just have two entries in the master table that
564    // are prioritized, but the table assumes a unique entry for each
565    // source insn opcode. So for now, we hack a local entry record to use.
566    static const ReduceEntry NarrowEntry =
567      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
568    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
569      return true;
570    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
571  }
572  }
573  return false;
574}
575
576bool
577Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
578                                const ReduceEntry &Entry,
579                                bool LiveCPSR, MachineInstr *CPSRDef) {
580
581  if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
582    return false;
583
584  unsigned Reg0 = MI->getOperand(0).getReg();
585  unsigned Reg1 = MI->getOperand(1).getReg();
586  if (Reg0 != Reg1) {
587    // Try to commute the operands to make it a 2-address instruction.
588    unsigned CommOpIdx1, CommOpIdx2;
589    if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
590        CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
591      return false;
592    MachineInstr *CommutedMI = TII->commuteInstruction(MI);
593    if (!CommutedMI)
594      return false;
595  }
596  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
597    return false;
598  if (Entry.Imm2Limit) {
599    unsigned Imm = MI->getOperand(2).getImm();
600    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
601    if (Imm > Limit)
602      return false;
603  } else {
604    unsigned Reg2 = MI->getOperand(2).getReg();
605    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
606      return false;
607  }
608
609  // Check if it's possible / necessary to transfer the predicate.
610  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
611  unsigned PredReg = 0;
612  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
613  bool SkipPred = false;
614  if (Pred != ARMCC::AL) {
615    if (!NewMCID.isPredicable())
616      // Can't transfer predicate, fail.
617      return false;
618  } else {
619    SkipPred = !NewMCID.isPredicable();
620  }
621
622  bool HasCC = false;
623  bool CCDead = false;
624  const MCInstrDesc &MCID = MI->getDesc();
625  if (MCID.hasOptionalDef()) {
626    unsigned NumOps = MCID.getNumOperands();
627    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
628    if (HasCC && MI->getOperand(NumOps-1).isDead())
629      CCDead = true;
630  }
631  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
632    return false;
633
634  // Avoid adding a false dependency on partial flag update by some 16-bit
635  // instructions which has the 's' bit set.
636  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
637      canAddPseudoFlagDep(CPSRDef, MI))
638    return false;
639
640  // Add the 16-bit instruction.
641  DebugLoc dl = MI->getDebugLoc();
642  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
643  MIB.addOperand(MI->getOperand(0));
644  if (NewMCID.hasOptionalDef()) {
645    if (HasCC)
646      AddDefaultT1CC(MIB, CCDead);
647    else
648      AddNoT1CC(MIB);
649  }
650
651  // Transfer the rest of operands.
652  unsigned NumOps = MCID.getNumOperands();
653  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
654    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
655      continue;
656    if (SkipPred && MCID.OpInfo[i].isPredicate())
657      continue;
658    MIB.addOperand(MI->getOperand(i));
659  }
660
661  // Transfer MI flags.
662  MIB.setMIFlags(MI->getFlags());
663
664  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
665
666  MBB.erase(MI);
667  ++Num2Addrs;
668  return true;
669}
670
671bool
672Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
673                                 const ReduceEntry &Entry,
674                                 bool LiveCPSR, MachineInstr *CPSRDef) {
675  if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
676    return false;
677
678  unsigned Limit = ~0U;
679  if (Entry.Imm1Limit)
680    Limit = (1 << Entry.Imm1Limit) - 1;
681
682  const MCInstrDesc &MCID = MI->getDesc();
683  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
684    if (MCID.OpInfo[i].isPredicate())
685      continue;
686    const MachineOperand &MO = MI->getOperand(i);
687    if (MO.isReg()) {
688      unsigned Reg = MO.getReg();
689      if (!Reg || Reg == ARM::CPSR)
690        continue;
691      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
692        return false;
693    } else if (MO.isImm() &&
694               !MCID.OpInfo[i].isPredicate()) {
695      if (((unsigned)MO.getImm()) > Limit)
696        return false;
697    }
698  }
699
700  // Check if it's possible / necessary to transfer the predicate.
701  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
702  unsigned PredReg = 0;
703  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
704  bool SkipPred = false;
705  if (Pred != ARMCC::AL) {
706    if (!NewMCID.isPredicable())
707      // Can't transfer predicate, fail.
708      return false;
709  } else {
710    SkipPred = !NewMCID.isPredicable();
711  }
712
713  bool HasCC = false;
714  bool CCDead = false;
715  if (MCID.hasOptionalDef()) {
716    unsigned NumOps = MCID.getNumOperands();
717    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
718    if (HasCC && MI->getOperand(NumOps-1).isDead())
719      CCDead = true;
720  }
721  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
722    return false;
723
724  // Avoid adding a false dependency on partial flag update by some 16-bit
725  // instructions which has the 's' bit set.
726  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
727      canAddPseudoFlagDep(CPSRDef, MI))
728    return false;
729
730  // Add the 16-bit instruction.
731  DebugLoc dl = MI->getDebugLoc();
732  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
733  MIB.addOperand(MI->getOperand(0));
734  if (NewMCID.hasOptionalDef()) {
735    if (HasCC)
736      AddDefaultT1CC(MIB, CCDead);
737    else
738      AddNoT1CC(MIB);
739  }
740
741  // Transfer the rest of operands.
742  unsigned NumOps = MCID.getNumOperands();
743  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
744    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
745      continue;
746    if ((MCID.getOpcode() == ARM::t2RSBSri ||
747         MCID.getOpcode() == ARM::t2RSBri) && i == 2)
748      // Skip the zero immediate operand, it's now implicit.
749      continue;
750    bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
751    if (SkipPred && isPred)
752        continue;
753    const MachineOperand &MO = MI->getOperand(i);
754    if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
755      // Skip implicit def of CPSR. Either it's modeled as an optional
756      // def now or it's already an implicit def on the new instruction.
757      continue;
758    MIB.addOperand(MO);
759  }
760  if (!MCID.isPredicable() && NewMCID.isPredicable())
761    AddDefaultPred(MIB);
762
763  // Transfer MI flags.
764  MIB.setMIFlags(MI->getFlags());
765
766  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
767
768  MBB.erase(MI);
769  ++NumNarrows;
770  return true;
771}
772
773static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
774  bool HasDef = false;
775  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
776    const MachineOperand &MO = MI.getOperand(i);
777    if (!MO.isReg() || MO.isUndef() || MO.isUse())
778      continue;
779    if (MO.getReg() != ARM::CPSR)
780      continue;
781
782    DefCPSR = true;
783    if (!MO.isDead())
784      HasDef = true;
785  }
786
787  return HasDef || LiveCPSR;
788}
789
790static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
791  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
792    const MachineOperand &MO = MI.getOperand(i);
793    if (!MO.isReg() || MO.isUndef() || MO.isDef())
794      continue;
795    if (MO.getReg() != ARM::CPSR)
796      continue;
797    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
798    if (MO.isKill()) {
799      LiveCPSR = false;
800      break;
801    }
802  }
803
804  return LiveCPSR;
805}
806
807bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
808  bool Modified = false;
809
810  // Yes, CPSR could be livein.
811  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
812  MachineInstr *CPSRDef = 0;
813
814  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
815  MachineBasicBlock::iterator NextMII;
816  for (; MII != E; MII = NextMII) {
817    NextMII = llvm::next(MII);
818
819    MachineInstr *MI = &*MII;
820    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
821
822    unsigned Opcode = MI->getOpcode();
823    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
824    if (OPI != ReduceOpcodeMap.end()) {
825      const ReduceEntry &Entry = ReduceTable[OPI->second];
826      // Ignore "special" cases for now.
827      if (Entry.Special) {
828        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
829          Modified = true;
830          MachineBasicBlock::iterator I = prior(NextMII);
831          MI = &*I;
832        }
833        goto ProcessNext;
834      }
835
836      // Try to transform to a 16-bit two-address instruction.
837      if (Entry.NarrowOpc2 &&
838          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
839        Modified = true;
840        MachineBasicBlock::iterator I = prior(NextMII);
841        MI = &*I;
842        goto ProcessNext;
843      }
844
845      // Try to transform to a 16-bit non-two-address instruction.
846      if (Entry.NarrowOpc1 &&
847          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
848        Modified = true;
849        MachineBasicBlock::iterator I = prior(NextMII);
850        MI = &*I;
851      }
852    }
853
854  ProcessNext:
855    bool DefCPSR = false;
856    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
857    if (MI->getDesc().isCall())
858      // Calls don't really set CPSR.
859      CPSRDef = 0;
860    else if (DefCPSR)
861      // This is the last CPSR defining instruction.
862      CPSRDef = MI;
863  }
864
865  return Modified;
866}
867
868bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
869  const TargetMachine &TM = MF.getTarget();
870  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
871  STI = &TM.getSubtarget<ARMSubtarget>();
872
873  bool Modified = false;
874  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
875    Modified |= ReduceMBB(*I);
876  return Modified;
877}
878
879/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
880/// reduction pass.
881FunctionPass *llvm::createThumb2SizeReductionPass() {
882  return new Thumb2SizeReduce();
883}
884