Thumb2SizeReduction.cpp revision 2a7b41ba4d3eb3c6003f6768dc20b28d83eac265
147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner// 347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner// The LLVM Compiler Infrastructure 447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner// 54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source 64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details. 747877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner// 847877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner//===----------------------------------------------------------------------===// 947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner 1047877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#define DEBUG_TYPE "t2-reduce-size" 1147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARM.h" 1247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMAddressingModes.h" 1347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMBaseRegisterInfo.h" 1447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMBaseInstrInfo.h" 1547877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "ARMSubtarget.h" 1647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "Thumb2InstrInfo.h" 1731442f9dc5512b6a29cdb332b12ae09a1c9e8176Chris Lattner#include "llvm/CodeGen/MachineInstr.h" 180f54dcbf07c69e41ecaa6b4fbf0d94956d8e9ff5Devang Patel#include "llvm/CodeGen/MachineInstrBuilder.h" 1947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "llvm/CodeGen/MachineFunctionPass.h" 205a29c9eed157af51a8d338b5a225b146881819e8Gordon Henriksen#include "llvm/Support/CommandLine.h" 2147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "llvm/Support/Debug.h" 221532f3ddd77c362dd5f613af06b4de636e3c5b0eDale Johannesen#include "llvm/Support/raw_ostream.h" 2347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner#include "llvm/ADT/DenseMap.h" 2431442f9dc5512b6a29cdb332b12ae09a1c9e8176Chris Lattner#include "llvm/ADT/Statistic.h" 25cb3718832375a581c5ea23f15918f3ea447a446cOwen Andersonusing namespace llvm; 2647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner 2747877050e7ea02c3514497aba54eef1d4cee8452Chris LattnerSTATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 282c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan GohmanSTATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 292c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan GohmanSTATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 302c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan Gohman 312c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan Gohmanstatic cl::opt<int> ReduceLimit("t2-reduce-limit", 3285ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattner cl::init(-1), cl::Hidden); 3385ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattnerstatic cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 3485ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattner cl::init(-1), cl::Hidden); 3585ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattnerstatic cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 368bd6035750f1b290832a3b1c90766d9b45ed8d6bEvan Cheng cl::init(-1), cl::Hidden); 378bd6035750f1b290832a3b1c90766d9b45ed8d6bEvan Cheng 3893f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksennamespace { 3993f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen /// ReduceTable - A static table with information on mapping from wide 4085ef2546303fabe32de3f2519a978fa2a7fd5958Chris Lattner /// opcodes to narrow 41459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner struct ReduceEntry { 42459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner unsigned WideOpc; // Wide opcode 43459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner unsigned NarrowOpc1; // Narrow opcode to transform to 44459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner unsigned NarrowOpc2; // Narrow opcode when it's two-address 45459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner uint8_t Imm1Limit; // Limit of immediate field (bits) 46459525df1e003597077197b5f802bd5d9cd7d94cChris Lattner uint8_t Imm2Limit; // Limit of immediate field when it's two-address 47dc756858f92a397ed30362ba8251fec56479735fDan Gohman unsigned LowRegs1 : 1; // Only possible if low-registers are used 48dc756858f92a397ed30362ba8251fec56479735fDan Gohman unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 49dc756858f92a397ed30362ba8251fec56479735fDan Gohman unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 50eb0d6abee36c274cf081948795f4675d8f33fc6fDan Gohman // 1 - No cc field. 51dc756858f92a397ed30362ba8251fec56479735fDan Gohman // 2 - Always set CPSR. 52dc756858f92a397ed30362ba8251fec56479735fDan Gohman unsigned PredCC2 : 2; 532c4bf119be8aa05cdc3dc88c57006353f07f0d2cDan Gohman unsigned PartFlag : 1; // 16-bit instruction does partial flag update 5404523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling unsigned Special : 1; // Needs to be dealt with specially 55bfae83139dcb4fffd50b939e1b1224b0126f04d4Dan Gohman }; 56cb3718832375a581c5ea23f15918f3ea447a446cOwen Anderson 5704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling static const ReduceEntry ReduceTable[] = { 5898a366d547772010e94609e4584489b3e5ce0043Bill Wendling // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S 5902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, 60be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 }, 6104523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, 6204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, 639d4209fb82cab74bae76511e3f21ef1c24ec948aJim Laskey { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, 649d4209fb82cab74bae76511e3f21ef1c24ec948aJim Laskey { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, 6502dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, 6602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, 6704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, 6804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling //FIXME: Disable CMN, as CCodes are backwards from compare expectations 69be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, 7004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, 7104523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, 7298a366d547772010e94609e4584489b3e5ce0043Bill Wendling { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, 73bbf1db72133e9cf986e4da6260736335533067dbEvan Cheng // FIXME: adr.n immediate offset must be multiple of 4. 74d703ed6aed98c8156829399efbafb13a3cca0b69Evan Cheng //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, 7547877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, 7604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, 7704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, 7804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, 79be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less 8004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling // likely to cause issue in the loop. As a size / performance workaround, 8104523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling // they are not marked as such. 8204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, 8304523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, 8404523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling // FIXME: Do we need the 16-bit 'S' variant? 8504523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 }, 8604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 }, 8747877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 }, 8804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, 8904523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, 9004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, 9102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, 9204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, 9304523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, 9404523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, 95bfae83139dcb4fffd50b939e1b1224b0126f04d4Dan Gohman { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, 9604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, 9798a366d547772010e94609e4584489b3e5ce0043Bill Wendling { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, 9804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, 99be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, 10002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, 1015eca075b74d62c621b160aa216b4cd50829a2cc7Gordon Henriksen { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, 10204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, 10347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, 10447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, 10504523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, 10647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, 10747877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner 10847877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner // FIXME: Clean this up after splitting each Thumb load / store opcode 10947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner // into multiple ones. 11047877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, 11147877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, 11247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, 11347877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, 11447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, 115bfae83139dcb4fffd50b939e1b1224b0126f04d4Dan Gohman { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, 11647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, 11798a366d547772010e94609e4584489b3e5ce0043Bill Wendling { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, 11802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, 119be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, 12002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, 12102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, 122be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, 12302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, 12402dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman 125be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, 12602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, 12702dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, 12802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent 12902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, 13002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, 13102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman }; 13202dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman 13302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman class Thumb2SizeReduce : public MachineFunctionPass { 13402dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman public: 13598a366d547772010e94609e4584489b3e5ce0043Bill Wendling static char ID; 13698a366d547772010e94609e4584489b3e5ce0043Bill Wendling Thumb2SizeReduce(); 13702dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman 138be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling const Thumb2InstrInfo *TII; 13998a366d547772010e94609e4584489b3e5ce0043Bill Wendling const ARMSubtarget *STI; 14047877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner 14102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman virtual bool runOnMachineFunction(MachineFunction &MF); 14247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner 14398a366d547772010e94609e4584489b3e5ce0043Bill Wendling virtual const char *getPassName() const { 144c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner return "Thumb2 instruction size reduction pass"; 145c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner } 1463b0da26e202cbbeb22508231f4278bda8e995391Daniel Dunbar 147c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner private: 14802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 14993f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen DenseMap<unsigned, unsigned> ReduceOpcodeMap; 15002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman 1511532f3ddd77c362dd5f613af06b4de636e3c5b0eDale Johannesen bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); 152b6d5b1439047609c050576f3dc52b722e76bd30bDale Johannesen 15302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 15447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner bool is2Addr, ARMCC::CondCodes Pred, 15547877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner bool LiveCPSR, bool &HasCC, bool &CCDead); 15604523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling 15798a366d547772010e94609e4584489b3e5ce0043Bill Wendling bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 158c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner const ReduceEntry &Entry); 159c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner 160e9e6bdf27fca46dc9eca2ebdf73e03747d1859abBill Wendling bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 1612b58ce5ab4e22e796303d68fb246d4031cb5d4caBill Wendling const ReduceEntry &Entry, bool LiveCPSR, 162c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner MachineInstr *CPSRDef); 163f4db3a51c7d806f7dcef5d9625e7cdf7f122dca9Daniel Dunbar 164f4db3a51c7d806f7dcef5d9625e7cdf7f122dca9Daniel Dunbar /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 1653b0da26e202cbbeb22508231f4278bda8e995391Daniel Dunbar /// instruction. 166c8d288f8fa9e46199a29e1954550c980f184bd1cChris Lattner bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 16702dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman const ReduceEntry &Entry, 16802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman bool LiveCPSR, MachineInstr *CPSRDef); 169dc756858f92a397ed30362ba8251fec56479735fDan Gohman 170eb0d6abee36c274cf081948795f4675d8f33fc6fDan Gohman /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 17198a366d547772010e94609e4584489b3e5ce0043Bill Wendling /// non-two-address instruction. 172dc756858f92a397ed30362ba8251fec56479735fDan Gohman bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 173dc756858f92a397ed30362ba8251fec56479735fDan Gohman const ReduceEntry &Entry, 17447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner bool LiveCPSR, MachineInstr *CPSRDef); 175be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling 17647877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner /// ReduceMBB - Reduce width of instructions in the specified basic block. 17704523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling bool ReduceMBB(MachineBasicBlock &MBB); 17847877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner }; 17947877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner char Thumb2SizeReduce::ID = 0; 18004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling} 1810f940c95d4506f8d04fa2aeda8a79cadb3105fe3Bill Wendling 18298a366d547772010e94609e4584489b3e5ce0043Bill WendlingThumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { 183cc8f603f531c906782e4966107ae29667eb6632cBill Wendling for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 1843c42f1211874665e8ea6eea55a45024b557afa61Chris Lattner unsigned FromOpc = ReduceTable[i].WideOpc; 1858f0d99e463a2dcb5a40d14f0481a0e322bcf79e4Evan Cheng if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 1860f940c95d4506f8d04fa2aeda8a79cadb3105fe3Bill Wendling assert(false && "Duplicated entries?"); 187b013f5094c3bdb36472da996d63d5c0f75b6d4d3Anton Korobeynikov } 188be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling} 189b013f5094c3bdb36472da996d63d5c0f75b6d4d3Anton Korobeynikov 190b013f5094c3bdb36472da996d63d5c0f75b6d4d3Anton Korobeynikovstatic bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { 1913f32d65912b4da23793dab618d981be2ce11c331Evan Cheng for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs) 19247877050e7ea02c3514497aba54eef1d4cee8452Chris Lattner if (*Regs == ARM::CPSR) 1933f32d65912b4da23793dab618d981be2ce11c331Evan Cheng return true; 1943f32d65912b4da23793dab618d981be2ce11c331Evan Cheng return false; 19598a366d547772010e94609e4584489b3e5ce0043Bill Wendling} 1962c1d7726f25b6b219a1518b411351b99d25c1a02Bill Wendling 1972c1d7726f25b6b219a1518b411351b99d25c1a02Bill Wendling/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, 1983f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// the 's' 16-bit instruction partially update CPSR. Abort the 19902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// transformation to avoid adding false dependency on last CPSR setting 20004523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling/// instruction which hurts the ability for out-of-order execution engine 20102dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// to do register renaming magic. 2023f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// This function checks if there is a read-of-write dependency between the 203be8cc2a3dedeb7685f07e68cdc4b9502eb97eb2bBill Wendling/// last instruction that defines the CPSR and the current instruction. If there 2043f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// is, then there is no harm done since the instruction cannot be retired 2053f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// before the CPSR setting instruction anyway. 20602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// Note, we are not doing full dependency analysis here for the sake of compile 2073f32d65912b4da23793dab618d981be2ce11c331Evan Cheng/// time. We're not looking for cases like: 20802dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// r0 = muls ... 209ada779fb11eb411536aa8219a176ca0ce4d58fd1Christopher Lamb/// r1 = add.w r0, ... 21002dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman/// ... 211ada779fb11eb411536aa8219a176ca0ce4d58fd1Christopher Lamb/// = mul.w r1 212ada779fb11eb411536aa8219a176ca0ce4d58fd1Christopher Lamb/// In this case it would have been ok to narrow the mul.w to muls since there 21304523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling/// are indirect RAW dependency between the muls and the mul.w 21447877050e7ea02c3514497aba54eef1d4cee8452Chris Lattnerbool 21547877050e7ea02c3514497aba54eef1d4cee8452Chris LattnerThumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { 21602dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman if (!Def || !STI->avoidCPSRPartialUpdate()) 2173f32d65912b4da23793dab618d981be2ce11c331Evan Cheng return false; 21804523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling 21902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman SmallSet<unsigned, 2> Defs; 220e7e7d0d7e39d0c7c659d26b97e8081fce0fcd749Dale Johannesen for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { 22198a366d547772010e94609e4584489b3e5ce0043Bill Wendling const MachineOperand &MO = Def->getOperand(i); 22272f159640382a16e036b63dcb9c0b427e6d5dc0aDale Johannesen if (!MO.isReg() || MO.isUndef() || MO.isUse()) 223e7e7d0d7e39d0c7c659d26b97e8081fce0fcd749Dale Johannesen continue; 2245ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman unsigned Reg = MO.getReg(); 2255ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman if (Reg == 0 || Reg == ARM::CPSR) 2265ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman continue; 2275ce0973f7f4d149f986d16d1a1f79b131fd70423Dan Gohman Defs.insert(Reg); 22823b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman } 22998a366d547772010e94609e4584489b3e5ce0043Bill Wendling 23023b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { 23123b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman const MachineOperand &MO = Use->getOperand(i); 23223b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman if (!MO.isReg() || MO.isUndef() || MO.isDef()) 23323b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman continue; 23423b0d490cdf27b2cba7b497aceeb5c79b0d0c9feDan Gohman unsigned Reg = MO.getReg(); 23593f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen if (Defs.count(Reg)) 2363f32d65912b4da23793dab618d981be2ce11c331Evan Cheng return false; 23793f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen } 23893f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen 23902dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohman // No read-after-write dependency. The narrowing will add false dependency. 24093f96d00bf10299246ea726956ce84dcb4b9a59eGordon Henriksen return true; 2415eca075b74d62c621b160aa216b4cd50829a2cc7Gordon Henriksen} 24204523eab6bbc5d55a6e3f3296ddd583c8ad5ebb1Bill Wendling 24302dae4ba06a05d28b24b3c1b39d54de751271c95Dan Gohmanbool 24447877050e7ea02c3514497aba54eef1d4cee8452Chris LattnerThumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 245 bool is2Addr, ARMCC::CondCodes Pred, 246 bool LiveCPSR, bool &HasCC, bool &CCDead) { 247 if ((is2Addr && Entry.PredCC2 == 0) || 248 (!is2Addr && Entry.PredCC1 == 0)) { 249 if (Pred == ARMCC::AL) { 250 // Not predicated, must set CPSR. 251 if (!HasCC) { 252 // Original instruction was not setting CPSR, but CPSR is not 253 // currently live anyway. It's ok to set it. The CPSR def is 254 // dead though. 255 if (!LiveCPSR) { 256 HasCC = true; 257 CCDead = true; 258 return true; 259 } 260 return false; 261 } 262 } else { 263 // Predicated, must not set CPSR. 264 if (HasCC) 265 return false; 266 } 267 } else if ((is2Addr && Entry.PredCC2 == 2) || 268 (!is2Addr && Entry.PredCC1 == 2)) { 269 /// Old opcode has an optional def of CPSR. 270 if (HasCC) 271 return true; 272 // If old opcode does not implicitly define CPSR, then it's not ok since 273 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. 274 if (!HasImplicitCPSRDef(MI->getDesc())) 275 return false; 276 HasCC = true; 277 } else { 278 // 16-bit instruction does not set CPSR. 279 if (HasCC) 280 return false; 281 } 282 283 return true; 284} 285 286static bool VerifyLowRegs(MachineInstr *MI) { 287 unsigned Opc = MI->getOpcode(); 288 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || 289 Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || 290 Opc == ARM::t2LDMDB_UPD); 291 bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); 292 bool isSPOk = isPCOk || isLROk; 293 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 294 const MachineOperand &MO = MI->getOperand(i); 295 if (!MO.isReg() || MO.isImplicit()) 296 continue; 297 unsigned Reg = MO.getReg(); 298 if (Reg == 0 || Reg == ARM::CPSR) 299 continue; 300 if (isPCOk && Reg == ARM::PC) 301 continue; 302 if (isLROk && Reg == ARM::LR) 303 continue; 304 if (Reg == ARM::SP) { 305 if (isSPOk) 306 continue; 307 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 308 // Special case for these ldr / str with sp as base register. 309 continue; 310 } 311 if (!isARMLowRegister(Reg)) 312 return false; 313 } 314 return true; 315} 316 317bool 318Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 319 const ReduceEntry &Entry) { 320 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 321 return false; 322 323 unsigned Scale = 1; 324 bool HasImmOffset = false; 325 bool HasShift = false; 326 bool HasOffReg = true; 327 bool isLdStMul = false; 328 unsigned Opc = Entry.NarrowOpc1; 329 unsigned OpNum = 3; // First 'rest' of operands. 330 uint8_t ImmLimit = Entry.Imm1Limit; 331 332 switch (Entry.WideOpc) { 333 default: 334 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 335 case ARM::t2LDRi12: 336 case ARM::t2STRi12: 337 if (MI->getOperand(1).getReg() == ARM::SP) { 338 Opc = Entry.NarrowOpc2; 339 ImmLimit = Entry.Imm2Limit; 340 HasOffReg = false; 341 } 342 343 Scale = 4; 344 HasImmOffset = true; 345 HasOffReg = false; 346 break; 347 case ARM::t2LDRBi12: 348 case ARM::t2STRBi12: 349 HasImmOffset = true; 350 HasOffReg = false; 351 break; 352 case ARM::t2LDRHi12: 353 case ARM::t2STRHi12: 354 Scale = 2; 355 HasImmOffset = true; 356 HasOffReg = false; 357 break; 358 case ARM::t2LDRs: 359 case ARM::t2LDRBs: 360 case ARM::t2LDRHs: 361 case ARM::t2LDRSBs: 362 case ARM::t2LDRSHs: 363 case ARM::t2STRs: 364 case ARM::t2STRBs: 365 case ARM::t2STRHs: 366 HasShift = true; 367 OpNum = 4; 368 break; 369 case ARM::t2LDMIA: 370 case ARM::t2LDMDB: { 371 unsigned BaseReg = MI->getOperand(0).getReg(); 372 if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) 373 return false; 374 375 // For the non-writeback version (this one), the base register must be 376 // one of the registers being loaded. 377 bool isOK = false; 378 for (unsigned i = 4; i < MI->getNumOperands(); ++i) { 379 if (MI->getOperand(i).getReg() == BaseReg) { 380 isOK = true; 381 break; 382 } 383 } 384 385 if (!isOK) 386 return false; 387 388 OpNum = 0; 389 isLdStMul = true; 390 break; 391 } 392 case ARM::t2LDMIA_RET: { 393 unsigned BaseReg = MI->getOperand(1).getReg(); 394 if (BaseReg != ARM::SP) 395 return false; 396 Opc = Entry.NarrowOpc2; // tPOP_RET 397 OpNum = 2; 398 isLdStMul = true; 399 break; 400 } 401 case ARM::t2LDMIA_UPD: 402 case ARM::t2LDMDB_UPD: 403 case ARM::t2STMIA_UPD: 404 case ARM::t2STMDB_UPD: { 405 OpNum = 0; 406 407 unsigned BaseReg = MI->getOperand(1).getReg(); 408 if (BaseReg == ARM::SP && 409 (Entry.WideOpc == ARM::t2LDMIA_UPD || 410 Entry.WideOpc == ARM::t2STMDB_UPD)) { 411 Opc = Entry.NarrowOpc2; // tPOP or tPUSH 412 OpNum = 2; 413 } else if (!isARMLowRegister(BaseReg) || 414 (Entry.WideOpc != ARM::t2LDMIA_UPD && 415 Entry.WideOpc != ARM::t2STMIA_UPD)) { 416 return false; 417 } 418 419 isLdStMul = true; 420 break; 421 } 422 } 423 424 unsigned OffsetReg = 0; 425 bool OffsetKill = false; 426 if (HasShift) { 427 OffsetReg = MI->getOperand(2).getReg(); 428 OffsetKill = MI->getOperand(2).isKill(); 429 430 if (MI->getOperand(3).getImm()) 431 // Thumb1 addressing mode doesn't support shift. 432 return false; 433 } 434 435 unsigned OffsetImm = 0; 436 if (HasImmOffset) { 437 OffsetImm = MI->getOperand(2).getImm(); 438 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 439 440 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) 441 // Make sure the immediate field fits. 442 return false; 443 } 444 445 // Add the 16-bit load / store instruction. 446 DebugLoc dl = MI->getDebugLoc(); 447 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); 448 if (!isLdStMul) { 449 MIB.addOperand(MI->getOperand(0)); 450 MIB.addOperand(MI->getOperand(1)); 451 452 if (HasImmOffset) 453 MIB.addImm(OffsetImm / Scale); 454 455 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 456 457 if (HasOffReg) 458 MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); 459 } 460 461 // Transfer the rest of operands. 462 for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) 463 MIB.addOperand(MI->getOperand(OpNum)); 464 465 // Transfer memoperands. 466 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 467 468 // Transfer MI flags. 469 MIB.setMIFlags(MI->getFlags()); 470 471 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 472 473 MBB.erase(MI); 474 ++NumLdSts; 475 return true; 476} 477 478bool 479Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 480 const ReduceEntry &Entry, 481 bool LiveCPSR, MachineInstr *CPSRDef) { 482 unsigned Opc = MI->getOpcode(); 483 if (Opc == ARM::t2ADDri) { 484 // If the source register is SP, try to reduce to tADDrSPi, otherwise 485 // it's a normal reduce. 486 if (MI->getOperand(1).getReg() != ARM::SP) { 487 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) 488 return true; 489 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); 490 } 491 // Try to reduce to tADDrSPi. 492 unsigned Imm = MI->getOperand(2).getImm(); 493 // The immediate must be in range, the destination register must be a low 494 // reg, the predicate must be "always" and the condition flags must not 495 // be being set. 496 if (Imm & 3 || Imm > 1024) 497 return false; 498 if (!isARMLowRegister(MI->getOperand(0).getReg())) 499 return false; 500 if (MI->getOperand(3).getImm() != ARMCC::AL) 501 return false; 502 const MCInstrDesc &MCID = MI->getDesc(); 503 if (MCID.hasOptionalDef() && 504 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) 505 return false; 506 507 MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), 508 TII->get(ARM::tADDrSPi)) 509 .addOperand(MI->getOperand(0)) 510 .addOperand(MI->getOperand(1)) 511 .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. 512 513 // Transfer MI flags. 514 MIB.setMIFlags(MI->getFlags()); 515 516 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); 517 518 MBB.erase(MI); 519 ++NumNarrows; 520 return true; 521 } 522 523 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 524 return false; 525 526 const MCInstrDesc &MCID = MI->getDesc(); 527 if (MCID.mayLoad() || MCID.mayStore()) 528 return ReduceLoadStore(MBB, MI, Entry); 529 530 switch (Opc) { 531 default: break; 532 case ARM::t2ADDSri: 533 case ARM::t2ADDSrr: { 534 unsigned PredReg = 0; 535 if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { 536 switch (Opc) { 537 default: break; 538 case ARM::t2ADDSri: { 539 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) 540 return true; 541 // fallthrough 542 } 543 case ARM::t2ADDSrr: 544 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); 545 } 546 } 547 break; 548 } 549 case ARM::t2RSBri: 550 case ARM::t2RSBSri: 551 if (MI->getOperand(2).getImm() == 0) 552 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); 553 break; 554 case ARM::t2MOVi16: 555 // Can convert only 'pure' immediate operands, not immediates obtained as 556 // globals' addresses. 557 if (MI->getOperand(1).isImm()) 558 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); 559 break; 560 case ARM::t2CMPrr: { 561 // Try to reduce to the lo-reg only version first. Why there are two 562 // versions of the instruction is a mystery. 563 // It would be nice to just have two entries in the master table that 564 // are prioritized, but the table assumes a unique entry for each 565 // source insn opcode. So for now, we hack a local entry record to use. 566 static const ReduceEntry NarrowEntry = 567 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; 568 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) 569 return true; 570 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); 571 } 572 } 573 return false; 574} 575 576bool 577Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 578 const ReduceEntry &Entry, 579 bool LiveCPSR, MachineInstr *CPSRDef) { 580 581 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 582 return false; 583 584 unsigned Reg0 = MI->getOperand(0).getReg(); 585 unsigned Reg1 = MI->getOperand(1).getReg(); 586 if (Reg0 != Reg1) { 587 // Try to commute the operands to make it a 2-address instruction. 588 unsigned CommOpIdx1, CommOpIdx2; 589 if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || 590 CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) 591 return false; 592 MachineInstr *CommutedMI = TII->commuteInstruction(MI); 593 if (!CommutedMI) 594 return false; 595 } 596 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 597 return false; 598 if (Entry.Imm2Limit) { 599 unsigned Imm = MI->getOperand(2).getImm(); 600 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 601 if (Imm > Limit) 602 return false; 603 } else { 604 unsigned Reg2 = MI->getOperand(2).getReg(); 605 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 606 return false; 607 } 608 609 // Check if it's possible / necessary to transfer the predicate. 610 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); 611 unsigned PredReg = 0; 612 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 613 bool SkipPred = false; 614 if (Pred != ARMCC::AL) { 615 if (!NewMCID.isPredicable()) 616 // Can't transfer predicate, fail. 617 return false; 618 } else { 619 SkipPred = !NewMCID.isPredicable(); 620 } 621 622 bool HasCC = false; 623 bool CCDead = false; 624 const MCInstrDesc &MCID = MI->getDesc(); 625 if (MCID.hasOptionalDef()) { 626 unsigned NumOps = MCID.getNumOperands(); 627 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 628 if (HasCC && MI->getOperand(NumOps-1).isDead()) 629 CCDead = true; 630 } 631 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 632 return false; 633 634 // Avoid adding a false dependency on partial flag update by some 16-bit 635 // instructions which has the 's' bit set. 636 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 637 canAddPseudoFlagDep(CPSRDef, MI)) 638 return false; 639 640 // Add the 16-bit instruction. 641 DebugLoc dl = MI->getDebugLoc(); 642 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); 643 MIB.addOperand(MI->getOperand(0)); 644 if (NewMCID.hasOptionalDef()) { 645 if (HasCC) 646 AddDefaultT1CC(MIB, CCDead); 647 else 648 AddNoT1CC(MIB); 649 } 650 651 // Transfer the rest of operands. 652 unsigned NumOps = MCID.getNumOperands(); 653 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 654 if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 655 continue; 656 if (SkipPred && MCID.OpInfo[i].isPredicate()) 657 continue; 658 MIB.addOperand(MI->getOperand(i)); 659 } 660 661 // Transfer MI flags. 662 MIB.setMIFlags(MI->getFlags()); 663 664 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 665 666 MBB.erase(MI); 667 ++Num2Addrs; 668 return true; 669} 670 671bool 672Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 673 const ReduceEntry &Entry, 674 bool LiveCPSR, MachineInstr *CPSRDef) { 675 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 676 return false; 677 678 unsigned Limit = ~0U; 679 if (Entry.Imm1Limit) 680 Limit = (1 << Entry.Imm1Limit) - 1; 681 682 const MCInstrDesc &MCID = MI->getDesc(); 683 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { 684 if (MCID.OpInfo[i].isPredicate()) 685 continue; 686 const MachineOperand &MO = MI->getOperand(i); 687 if (MO.isReg()) { 688 unsigned Reg = MO.getReg(); 689 if (!Reg || Reg == ARM::CPSR) 690 continue; 691 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 692 return false; 693 } else if (MO.isImm() && 694 !MCID.OpInfo[i].isPredicate()) { 695 if (((unsigned)MO.getImm()) > Limit) 696 return false; 697 } 698 } 699 700 // Check if it's possible / necessary to transfer the predicate. 701 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); 702 unsigned PredReg = 0; 703 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 704 bool SkipPred = false; 705 if (Pred != ARMCC::AL) { 706 if (!NewMCID.isPredicable()) 707 // Can't transfer predicate, fail. 708 return false; 709 } else { 710 SkipPred = !NewMCID.isPredicable(); 711 } 712 713 bool HasCC = false; 714 bool CCDead = false; 715 if (MCID.hasOptionalDef()) { 716 unsigned NumOps = MCID.getNumOperands(); 717 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 718 if (HasCC && MI->getOperand(NumOps-1).isDead()) 719 CCDead = true; 720 } 721 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 722 return false; 723 724 // Avoid adding a false dependency on partial flag update by some 16-bit 725 // instructions which has the 's' bit set. 726 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && 727 canAddPseudoFlagDep(CPSRDef, MI)) 728 return false; 729 730 // Add the 16-bit instruction. 731 DebugLoc dl = MI->getDebugLoc(); 732 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); 733 MIB.addOperand(MI->getOperand(0)); 734 if (NewMCID.hasOptionalDef()) { 735 if (HasCC) 736 AddDefaultT1CC(MIB, CCDead); 737 else 738 AddNoT1CC(MIB); 739 } 740 741 // Transfer the rest of operands. 742 unsigned NumOps = MCID.getNumOperands(); 743 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 744 if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) 745 continue; 746 if ((MCID.getOpcode() == ARM::t2RSBSri || 747 MCID.getOpcode() == ARM::t2RSBri) && i == 2) 748 // Skip the zero immediate operand, it's now implicit. 749 continue; 750 bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); 751 if (SkipPred && isPred) 752 continue; 753 const MachineOperand &MO = MI->getOperand(i); 754 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 755 // Skip implicit def of CPSR. Either it's modeled as an optional 756 // def now or it's already an implicit def on the new instruction. 757 continue; 758 MIB.addOperand(MO); 759 } 760 if (!MCID.isPredicable() && NewMCID.isPredicable()) 761 AddDefaultPred(MIB); 762 763 // Transfer MI flags. 764 MIB.setMIFlags(MI->getFlags()); 765 766 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 767 768 MBB.erase(MI); 769 ++NumNarrows; 770 return true; 771} 772 773static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { 774 bool HasDef = false; 775 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 776 const MachineOperand &MO = MI.getOperand(i); 777 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 778 continue; 779 if (MO.getReg() != ARM::CPSR) 780 continue; 781 782 DefCPSR = true; 783 if (!MO.isDead()) 784 HasDef = true; 785 } 786 787 return HasDef || LiveCPSR; 788} 789 790static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 791 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 792 const MachineOperand &MO = MI.getOperand(i); 793 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 794 continue; 795 if (MO.getReg() != ARM::CPSR) 796 continue; 797 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 798 if (MO.isKill()) { 799 LiveCPSR = false; 800 break; 801 } 802 } 803 804 return LiveCPSR; 805} 806 807bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 808 bool Modified = false; 809 810 // Yes, CPSR could be livein. 811 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); 812 MachineInstr *CPSRDef = 0; 813 814 MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); 815 MachineBasicBlock::iterator NextMII; 816 for (; MII != E; MII = NextMII) { 817 NextMII = llvm::next(MII); 818 819 MachineInstr *MI = &*MII; 820 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 821 822 unsigned Opcode = MI->getOpcode(); 823 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 824 if (OPI != ReduceOpcodeMap.end()) { 825 const ReduceEntry &Entry = ReduceTable[OPI->second]; 826 // Ignore "special" cases for now. 827 if (Entry.Special) { 828 if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { 829 Modified = true; 830 MachineBasicBlock::iterator I = prior(NextMII); 831 MI = &*I; 832 } 833 goto ProcessNext; 834 } 835 836 // Try to transform to a 16-bit two-address instruction. 837 if (Entry.NarrowOpc2 && 838 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { 839 Modified = true; 840 MachineBasicBlock::iterator I = prior(NextMII); 841 MI = &*I; 842 goto ProcessNext; 843 } 844 845 // Try to transform to a 16-bit non-two-address instruction. 846 if (Entry.NarrowOpc1 && 847 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { 848 Modified = true; 849 MachineBasicBlock::iterator I = prior(NextMII); 850 MI = &*I; 851 } 852 } 853 854 ProcessNext: 855 bool DefCPSR = false; 856 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); 857 if (MI->getDesc().isCall()) 858 // Calls don't really set CPSR. 859 CPSRDef = 0; 860 else if (DefCPSR) 861 // This is the last CPSR defining instruction. 862 CPSRDef = MI; 863 } 864 865 return Modified; 866} 867 868bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 869 const TargetMachine &TM = MF.getTarget(); 870 TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); 871 STI = &TM.getSubtarget<ARMSubtarget>(); 872 873 bool Modified = false; 874 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 875 Modified |= ReduceMBB(*I); 876 return Modified; 877} 878 879/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 880/// reduction pass. 881FunctionPass *llvm::createThumb2SizeReductionPass() { 882 return new Thumb2SizeReduce(); 883} 884