Thumb2SizeReduction.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)//
3868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)//                     The LLVM Compiler Infrastructure
4868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)//
5868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source
6868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)// License. See LICENSE.TXT for details.
73551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)//
83551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)//===----------------------------------------------------------------------===//
97d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
10868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#define DEBUG_TYPE "t2-reduce-size"
11868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "ARM.h"
12effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#include "ARMBaseInstrInfo.h"
13effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#include "ARMSubtarget.h"
14eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "MCTargetDesc/ARMAddressingModes.h"
15868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "Thumb2InstrInfo.h"
16868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/DenseMap.h"
1758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include "llvm/ADT/PostOrderIterator.h"
18868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/Statistic.h"
19868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/CodeGen/MachineFunctionPass.h"
2058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include "llvm/CodeGen/MachineInstr.h"
217dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "llvm/CodeGen/MachineInstrBuilder.h"
22eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "llvm/IR/Function.h"        // To access Function attributes
23a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)#include "llvm/Support/CommandLine.h"
241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "llvm/Support/Debug.h"
253551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#include "llvm/Target/TargetMachine.h"
267d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)using namespace llvm;
27868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
28868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
29868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
30868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
3158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)
32effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochstatic cl::opt<int> ReduceLimit("t2-reduce-limit",
3358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                                cl::init(-1), cl::Hidden);
34effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochstatic cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                                     cl::init(-1), cl::Hidden);
3658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
3758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)                                     cl::init(-1), cl::Hidden);
38868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
39eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochnamespace {
40868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  /// ReduceTable - A static table with information on mapping from wide
41868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  /// opcodes to narrow
42868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  struct ReduceEntry {
43eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    uint16_t WideOpc;      // Wide opcode
44868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    uint16_t NarrowOpc1;   // Narrow opcode to transform to
45868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
46868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
47868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
48868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned LowRegs1 : 1; // Only possible if low-registers are used
49eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
50868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
51eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch                           // 1 - No cc field.
52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch                           // 2 - Always set CPSR.
53868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned PredCC2  : 2;
54868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned PartFlag : 1; // 16-bit instruction does partial flag update
55868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned Special  : 1; // Needs to be dealt with specially
56868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
57868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  };
58868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
59868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  static const ReduceEntry ReduceTable[] = {
60868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  // Wide,        Narrow1,      Narrow2,     imm1,imm2, lo1, lo2, P/C,PF,S,AM
61868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,   0,   1,  0,0, 0,0,0 },
62868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  0,0, 0,1,0 },
63868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,   1,   0,  0,1, 0,0,0 },
64868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  2,2, 0,1,0 },
65868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,   1,   0,  2,0, 0,1,0 },
6658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)  { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,   0,   1,  0,0, 1,0,0 },
67868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
68868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
6958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)  { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,   0,   1,  0,0, 1,0,0 },
7058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)  //FIXME: Disable CMN, as CCodes are backwards from compare expectations
71effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
7258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)  { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
73868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,   1,   0,  2,0, 0,0,0 },
74868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,   0,   0,  2,0, 0,1,0 },
75868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,   0,   1,  0,0, 1,0,0 },
76868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  // FIXME: adr.n immediate offset must be multiple of 4.
77868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,   1,   0,  1,0, 0,0,0 },
78868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
79868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
803551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
813551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
823551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,0,0 },
833551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,1,0 },
843551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // FIXME: Do we need the 16-bit 'S' variant?
853551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
863551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
873551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,   1,   0,  0,0, 0,0,0 },
883551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,   0,   1,  0,0, 1,0,0 },
893551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2REV,   ARM::tREV,    0,             0,   0,   1,   0,  1,0, 0,0,0 },
903551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2REV16, ARM::tREV16,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
913551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
923551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,   0,   1,  0,0, 1,0,0 },
933551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,   1,   0,  0,0, 0,1,0 },
943551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
953551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,   0,   1,  0,0, 0,0,0 },
963551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  0,0, 0,0,0 },
973551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,   1,   0,  0,0, 0,0,0 },
983551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  2,2, 0,0,0 },
9903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
1003551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
1013551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
1023551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
1033551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
1043551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
1053551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1063551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // FIXME: Clean this up after splitting each Thumb load / store opcode
1073551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // into multiple ones.
1083551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
1093551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
1103551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
1113551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
1123551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
1133551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
1143551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
1153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
11603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
1173551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
1183551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
1193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
1203551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
1213551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
1223551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1233551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
1243551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
1253551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
1263551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
1273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
1283551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
1293551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  };
1303551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1313551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  class Thumb2SizeReduce : public MachineFunctionPass {
1323551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  public:
1333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    static char ID;
1343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    Thumb2SizeReduce();
1353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1363551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    const Thumb2InstrInfo *TII;
1373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    const ARMSubtarget *STI;
1383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool runOnMachineFunction(MachineFunction &MF) override;
1403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    const char *getPassName() const override {
1423551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      return "Thumb2 instruction size reduction pass";
1433551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    }
1443551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1453551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  private:
1463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
1473551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
1483551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1493551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
1503551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1513551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
1523551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                         bool is2Addr, ARMCC::CondCodes Pred,
1533551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                         bool LiveCPSR, bool &HasCC, bool &CCDead);
1543551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1553551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
1563551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                         const ReduceEntry &Entry);
1573551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1583551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
1593551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                       const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
1603551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1613551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
1623551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    /// instruction.
1633551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
1643551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                       const ReduceEntry &Entry, bool LiveCPSR,
1653551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                       bool IsSelfLoop);
1663551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
1683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    /// non-two-address instruction.
1693551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
1703551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                        const ReduceEntry &Entry, bool LiveCPSR,
1713551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                        bool IsSelfLoop);
1723551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1733551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    /// ReduceMI - Attempt to reduce MI, return true on success.
1743551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
1753551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                  bool LiveCPSR, bool IsSelfLoop);
1763551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
177868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    /// ReduceMBB - Reduce width of instructions in the specified basic block.
178868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    bool ReduceMBB(MachineBasicBlock &MBB);
179868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
1804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    bool OptimizeSize;
1813551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    bool MinimizeSize;
1824e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
183868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    // Last instruction to define CPSR in the current block.
184868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    MachineInstr *CPSRDef;
185868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    // Was CPSR last defined by a high latency instruction?
1864e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    // When CPSRDef is null, this refers to CPSR defs in predecessors.
1874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    bool HighLatencyCPSR;
1884e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
1898bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)    struct MBBInfo {
1908bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)      // The flags leaving this block have high latency.
1914e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      bool HighLatencyCPSR;
1923551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      // Has this block been visited yet?
1933551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      bool Visited;
1943551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
195868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
196868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    };
1973551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1983551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    SmallVector<MBBInfo, 8> BlockInfo;
1993551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  };
2003551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  char Thumb2SizeReduce::ID = 0;
201868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)}
2027dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
2034e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
204868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  OptimizeSize = MinimizeSize = false;
205868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
206868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned FromOpc = ReduceTable[i].WideOpc;
207868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
208868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      assert(false && "Duplicated entries?");
2097dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  }
2104e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)}
211868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
212868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
213868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
214868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    if (*Regs == ARM::CPSR)
215868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      return true;
216868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  return false;
217868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)}
218868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
219868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)// Check for a likely high-latency flag def.
2207dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdochstatic bool isHighLatencyCPSR(MachineInstr *Def) {
2214e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  switch(Def->getOpcode()) {
222868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  case ARM::FMSTAT:
223868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  case ARM::tMUL:
224868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    return true;
225868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  }
226868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  return false;
227868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)}
228868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
229868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
230868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// the 's' 16-bit instruction partially update CPSR. Abort the
231868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// transformation to avoid adding false dependency on last CPSR setting
232868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// instruction which hurts the ability for out-of-order execution engine
233868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// to do register renaming magic.
234868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// This function checks if there is a read-of-write dependency between the
235868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// last instruction that defines the CPSR and the current instruction. If there
236868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// is, then there is no harm done since the instruction cannot be retired
237868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// before the CPSR setting instruction anyway.
2387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch/// Note, we are not doing full dependency analysis here for the sake of compile
2394e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)/// time. We're not looking for cases like:
240868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// r0 = muls ...
241868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// r1 = add.w r0, ...
242868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// ...
243868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)///    = mul.w r1
244868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// In this case it would have been ok to narrow the mul.w to muls since there
245868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)/// are indirect RAW dependency between the muls and the mul.w
246868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)bool
247868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
248868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  // Disable the check for -Oz (aka OptimizeForSizeHarder).
249868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
2507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch    return false;
2514e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
252868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  if (!CPSRDef)
253868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    // If this BB loops back to itself, conservatively avoid narrowing the
254868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    // first instruction that does partial flag update.
255868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    return HighLatencyCPSR || FirstInSelfLoop;
256868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
257868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  SmallSet<unsigned, 2> Defs;
258868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  for (const MachineOperand &MO : CPSRDef->operands()) {
259868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    if (!MO.isReg() || MO.isUndef() || MO.isUse())
260868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      continue;
261868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned Reg = MO.getReg();
262868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    if (Reg == 0 || Reg == ARM::CPSR)
263868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      continue;
264868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    Defs.insert(Reg);
265868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  }
266868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
2677dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  for (const MachineOperand &MO : Use->operands()) {
2684e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    if (!MO.isReg() || MO.isUndef() || MO.isDef())
269eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      continue;
270868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    unsigned Reg = MO.getReg();
271eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    if (Defs.count(Reg))
272eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      return false;
273eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  }
274eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
275eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // If the current CPSR has high latency, try to avoid the false dependency.
276eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  if (HighLatencyCPSR)
277868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    return true;
278868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
279eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // tMOVi8 usually doesn't start long dependency chains, and there are a lot
280eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // of them, so always shrink them when CPSR doesn't have high latency.
281eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  if (Use->getOpcode() == ARM::t2MOVi ||
2827dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch      Use->getOpcode() == ARM::t2MOVi16)
2834e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    return false;
2844e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
285eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  // No read-after-write dependency. The narrowing will add false dependency.
286eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  return true;
287eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch}
288eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
289effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochbool
290eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen MurdochThumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
291eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch                                  bool is2Addr, ARMCC::CondCodes Pred,
292eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
293effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  if ((is2Addr  && Entry.PredCC2 == 0) ||
29458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)      (!is2Addr && Entry.PredCC1 == 0)) {
295eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    if (Pred == ARMCC::AL) {
296eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      // Not predicated, must set CPSR.
297eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      if (!HasCC) {
298eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch        // Original instruction was not setting CPSR, but CPSR is not
2994e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)        // currently live anyway. It's ok to set it. The CPSR def is
300eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch        // dead though.
301eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch        if (!LiveCPSR) {
302868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)          HasCC = true;
303          CCDead = true;
304          return true;
305        }
306        return false;
307      }
308    } else {
309      // Predicated, must not set CPSR.
310      if (HasCC)
311        return false;
312    }
313  } else if ((is2Addr  && Entry.PredCC2 == 2) ||
314             (!is2Addr && Entry.PredCC1 == 2)) {
315    /// Old opcode has an optional def of CPSR.
316    if (HasCC)
317      return true;
318    // If old opcode does not implicitly define CPSR, then it's not ok since
319    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
320    if (!HasImplicitCPSRDef(MI->getDesc()))
321      return false;
322    HasCC = true;
323  } else {
324    // 16-bit instruction does not set CPSR.
325    if (HasCC)
326      return false;
327  }
328
329  return true;
330}
331
332static bool VerifyLowRegs(MachineInstr *MI) {
333  unsigned Opc = MI->getOpcode();
334  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
335                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
336                 Opc == ARM::t2LDMDB_UPD);
337  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
338  bool isSPOk = isPCOk || isLROk;
339  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
340    const MachineOperand &MO = MI->getOperand(i);
341    if (!MO.isReg() || MO.isImplicit())
342      continue;
343    unsigned Reg = MO.getReg();
344    if (Reg == 0 || Reg == ARM::CPSR)
345      continue;
346    if (isPCOk && Reg == ARM::PC)
347      continue;
348    if (isLROk && Reg == ARM::LR)
349      continue;
350    if (Reg == ARM::SP) {
351      if (isSPOk)
352        continue;
353      if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
354        // Special case for these ldr / str with sp as base register.
355        continue;
356    }
357    if (!isARMLowRegister(Reg))
358      return false;
359  }
360  return true;
361}
362
363bool
364Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
365                                  const ReduceEntry &Entry) {
366  if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
367    return false;
368
369  unsigned Scale = 1;
370  bool HasImmOffset = false;
371  bool HasShift = false;
372  bool HasOffReg = true;
373  bool isLdStMul = false;
374  unsigned Opc = Entry.NarrowOpc1;
375  unsigned OpNum = 3; // First 'rest' of operands.
376  uint8_t  ImmLimit = Entry.Imm1Limit;
377
378  switch (Entry.WideOpc) {
379  default:
380    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
381  case ARM::t2LDRi12:
382  case ARM::t2STRi12:
383    if (MI->getOperand(1).getReg() == ARM::SP) {
384      Opc = Entry.NarrowOpc2;
385      ImmLimit = Entry.Imm2Limit;
386      HasOffReg = false;
387    }
388
389    Scale = 4;
390    HasImmOffset = true;
391    HasOffReg = false;
392    break;
393  case ARM::t2LDRBi12:
394  case ARM::t2STRBi12:
395    HasImmOffset = true;
396    HasOffReg = false;
397    break;
398  case ARM::t2LDRHi12:
399  case ARM::t2STRHi12:
400    Scale = 2;
401    HasImmOffset = true;
402    HasOffReg = false;
403    break;
404  case ARM::t2LDRs:
405  case ARM::t2LDRBs:
406  case ARM::t2LDRHs:
407  case ARM::t2LDRSBs:
408  case ARM::t2LDRSHs:
409  case ARM::t2STRs:
410  case ARM::t2STRBs:
411  case ARM::t2STRHs:
412    HasShift = true;
413    OpNum = 4;
414    break;
415  case ARM::t2LDMIA:
416  case ARM::t2LDMDB: {
417    unsigned BaseReg = MI->getOperand(0).getReg();
418    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
419      return false;
420
421    // For the non-writeback version (this one), the base register must be
422    // one of the registers being loaded.
423    bool isOK = false;
424    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
425      if (MI->getOperand(i).getReg() == BaseReg) {
426        isOK = true;
427        break;
428      }
429    }
430
431    if (!isOK)
432      return false;
433
434    OpNum = 0;
435    isLdStMul = true;
436    break;
437  }
438  case ARM::t2LDMIA_RET: {
439    unsigned BaseReg = MI->getOperand(1).getReg();
440    if (BaseReg != ARM::SP)
441      return false;
442    Opc = Entry.NarrowOpc2; // tPOP_RET
443    OpNum = 2;
444    isLdStMul = true;
445    break;
446  }
447  case ARM::t2LDMIA_UPD:
448  case ARM::t2LDMDB_UPD:
449  case ARM::t2STMIA_UPD:
450  case ARM::t2STMDB_UPD: {
451    OpNum = 0;
452
453    unsigned BaseReg = MI->getOperand(1).getReg();
454    if (BaseReg == ARM::SP &&
455        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
456         Entry.WideOpc == ARM::t2STMDB_UPD)) {
457      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
458      OpNum = 2;
459    } else if (!isARMLowRegister(BaseReg) ||
460               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
461                Entry.WideOpc != ARM::t2STMIA_UPD)) {
462      return false;
463    }
464
465    isLdStMul = true;
466    break;
467  }
468  }
469
470  unsigned OffsetReg = 0;
471  bool OffsetKill = false;
472  if (HasShift) {
473    OffsetReg  = MI->getOperand(2).getReg();
474    OffsetKill = MI->getOperand(2).isKill();
475
476    if (MI->getOperand(3).getImm())
477      // Thumb1 addressing mode doesn't support shift.
478      return false;
479  }
480
481  unsigned OffsetImm = 0;
482  if (HasImmOffset) {
483    OffsetImm = MI->getOperand(2).getImm();
484    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
485
486    if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
487      // Make sure the immediate field fits.
488      return false;
489  }
490
491  // Add the 16-bit load / store instruction.
492  DebugLoc dl = MI->getDebugLoc();
493  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
494  if (!isLdStMul) {
495    MIB.addOperand(MI->getOperand(0));
496    MIB.addOperand(MI->getOperand(1));
497
498    if (HasImmOffset)
499      MIB.addImm(OffsetImm / Scale);
500
501    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
502
503    if (HasOffReg)
504      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
505  }
506
507  // Transfer the rest of operands.
508  for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
509    MIB.addOperand(MI->getOperand(OpNum));
510
511  // Transfer memoperands.
512  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
513
514  // Transfer MI flags.
515  MIB.setMIFlags(MI->getFlags());
516
517  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
518
519  MBB.erase_instr(MI);
520  ++NumLdSts;
521  return true;
522}
523
524bool
525Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
526                                const ReduceEntry &Entry,
527                                bool LiveCPSR, bool IsSelfLoop) {
528  unsigned Opc = MI->getOpcode();
529  if (Opc == ARM::t2ADDri) {
530    // If the source register is SP, try to reduce to tADDrSPi, otherwise
531    // it's a normal reduce.
532    if (MI->getOperand(1).getReg() != ARM::SP) {
533      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
534        return true;
535      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
536    }
537    // Try to reduce to tADDrSPi.
538    unsigned Imm = MI->getOperand(2).getImm();
539    // The immediate must be in range, the destination register must be a low
540    // reg, the predicate must be "always" and the condition flags must not
541    // be being set.
542    if (Imm & 3 || Imm > 1020)
543      return false;
544    if (!isARMLowRegister(MI->getOperand(0).getReg()))
545      return false;
546    if (MI->getOperand(3).getImm() != ARMCC::AL)
547      return false;
548    const MCInstrDesc &MCID = MI->getDesc();
549    if (MCID.hasOptionalDef() &&
550        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
551      return false;
552
553    MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
554                                      TII->get(ARM::tADDrSPi))
555      .addOperand(MI->getOperand(0))
556      .addOperand(MI->getOperand(1))
557      .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
558    AddDefaultPred(MIB);
559
560    // Transfer MI flags.
561    MIB.setMIFlags(MI->getFlags());
562
563    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
564
565    MBB.erase_instr(MI);
566    ++NumNarrows;
567    return true;
568  }
569
570  if (Entry.LowRegs1 && !VerifyLowRegs(MI))
571    return false;
572
573  if (MI->mayLoad() || MI->mayStore())
574    return ReduceLoadStore(MBB, MI, Entry);
575
576  switch (Opc) {
577  default: break;
578  case ARM::t2ADDSri:
579  case ARM::t2ADDSrr: {
580    unsigned PredReg = 0;
581    if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
582      switch (Opc) {
583      default: break;
584      case ARM::t2ADDSri: {
585        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
586          return true;
587        // fallthrough
588      }
589      case ARM::t2ADDSrr:
590        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
591      }
592    }
593    break;
594  }
595  case ARM::t2RSBri:
596  case ARM::t2RSBSri:
597  case ARM::t2SXTB:
598  case ARM::t2SXTH:
599  case ARM::t2UXTB:
600  case ARM::t2UXTH:
601    if (MI->getOperand(2).getImm() == 0)
602      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
603    break;
604  case ARM::t2MOVi16:
605    // Can convert only 'pure' immediate operands, not immediates obtained as
606    // globals' addresses.
607    if (MI->getOperand(1).isImm())
608      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
609    break;
610  case ARM::t2CMPrr: {
611    // Try to reduce to the lo-reg only version first. Why there are two
612    // versions of the instruction is a mystery.
613    // It would be nice to just have two entries in the master table that
614    // are prioritized, but the table assumes a unique entry for each
615    // source insn opcode. So for now, we hack a local entry record to use.
616    static const ReduceEntry NarrowEntry =
617      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
618    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
619      return true;
620    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
621  }
622  }
623  return false;
624}
625
626bool
627Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
628                                const ReduceEntry &Entry,
629                                bool LiveCPSR, bool IsSelfLoop) {
630
631  if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
632    return false;
633
634  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
635      STI->avoidMOVsShifterOperand())
636    // Don't issue movs with shifter operand for some CPUs unless we
637    // are optimizing / minimizing for size.
638    return false;
639
640  unsigned Reg0 = MI->getOperand(0).getReg();
641  unsigned Reg1 = MI->getOperand(1).getReg();
642  // t2MUL is "special". The tied source operand is second, not first.
643  if (MI->getOpcode() == ARM::t2MUL) {
644    unsigned Reg2 = MI->getOperand(2).getReg();
645    // Early exit if the regs aren't all low regs.
646    if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
647        || !isARMLowRegister(Reg2))
648      return false;
649    if (Reg0 != Reg2) {
650      // If the other operand also isn't the same as the destination, we
651      // can't reduce.
652      if (Reg1 != Reg0)
653        return false;
654      // Try to commute the operands to make it a 2-address instruction.
655      MachineInstr *CommutedMI = TII->commuteInstruction(MI);
656      if (!CommutedMI)
657        return false;
658    }
659  } else if (Reg0 != Reg1) {
660    // Try to commute the operands to make it a 2-address instruction.
661    unsigned CommOpIdx1, CommOpIdx2;
662    if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
663        CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
664      return false;
665    MachineInstr *CommutedMI = TII->commuteInstruction(MI);
666    if (!CommutedMI)
667      return false;
668  }
669  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
670    return false;
671  if (Entry.Imm2Limit) {
672    unsigned Imm = MI->getOperand(2).getImm();
673    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
674    if (Imm > Limit)
675      return false;
676  } else {
677    unsigned Reg2 = MI->getOperand(2).getReg();
678    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
679      return false;
680  }
681
682  // Check if it's possible / necessary to transfer the predicate.
683  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
684  unsigned PredReg = 0;
685  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
686  bool SkipPred = false;
687  if (Pred != ARMCC::AL) {
688    if (!NewMCID.isPredicable())
689      // Can't transfer predicate, fail.
690      return false;
691  } else {
692    SkipPred = !NewMCID.isPredicable();
693  }
694
695  bool HasCC = false;
696  bool CCDead = false;
697  const MCInstrDesc &MCID = MI->getDesc();
698  if (MCID.hasOptionalDef()) {
699    unsigned NumOps = MCID.getNumOperands();
700    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
701    if (HasCC && MI->getOperand(NumOps-1).isDead())
702      CCDead = true;
703  }
704  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
705    return false;
706
707  // Avoid adding a false dependency on partial flag update by some 16-bit
708  // instructions which has the 's' bit set.
709  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
710      canAddPseudoFlagDep(MI, IsSelfLoop))
711    return false;
712
713  // Add the 16-bit instruction.
714  DebugLoc dl = MI->getDebugLoc();
715  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
716  MIB.addOperand(MI->getOperand(0));
717  if (NewMCID.hasOptionalDef()) {
718    if (HasCC)
719      AddDefaultT1CC(MIB, CCDead);
720    else
721      AddNoT1CC(MIB);
722  }
723
724  // Transfer the rest of operands.
725  unsigned NumOps = MCID.getNumOperands();
726  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
727    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
728      continue;
729    if (SkipPred && MCID.OpInfo[i].isPredicate())
730      continue;
731    MIB.addOperand(MI->getOperand(i));
732  }
733
734  // Transfer MI flags.
735  MIB.setMIFlags(MI->getFlags());
736
737  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
738
739  MBB.erase_instr(MI);
740  ++Num2Addrs;
741  return true;
742}
743
744bool
745Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
746                                 const ReduceEntry &Entry,
747                                 bool LiveCPSR, bool IsSelfLoop) {
748  if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
749    return false;
750
751  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
752      STI->avoidMOVsShifterOperand())
753    // Don't issue movs with shifter operand for some CPUs unless we
754    // are optimizing / minimizing for size.
755    return false;
756
757  unsigned Limit = ~0U;
758  if (Entry.Imm1Limit)
759    Limit = (1 << Entry.Imm1Limit) - 1;
760
761  const MCInstrDesc &MCID = MI->getDesc();
762  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
763    if (MCID.OpInfo[i].isPredicate())
764      continue;
765    const MachineOperand &MO = MI->getOperand(i);
766    if (MO.isReg()) {
767      unsigned Reg = MO.getReg();
768      if (!Reg || Reg == ARM::CPSR)
769        continue;
770      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
771        return false;
772    } else if (MO.isImm() &&
773               !MCID.OpInfo[i].isPredicate()) {
774      if (((unsigned)MO.getImm()) > Limit)
775        return false;
776    }
777  }
778
779  // Check if it's possible / necessary to transfer the predicate.
780  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
781  unsigned PredReg = 0;
782  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
783  bool SkipPred = false;
784  if (Pred != ARMCC::AL) {
785    if (!NewMCID.isPredicable())
786      // Can't transfer predicate, fail.
787      return false;
788  } else {
789    SkipPred = !NewMCID.isPredicable();
790  }
791
792  bool HasCC = false;
793  bool CCDead = false;
794  if (MCID.hasOptionalDef()) {
795    unsigned NumOps = MCID.getNumOperands();
796    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
797    if (HasCC && MI->getOperand(NumOps-1).isDead())
798      CCDead = true;
799  }
800  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
801    return false;
802
803  // Avoid adding a false dependency on partial flag update by some 16-bit
804  // instructions which has the 's' bit set.
805  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
806      canAddPseudoFlagDep(MI, IsSelfLoop))
807    return false;
808
809  // Add the 16-bit instruction.
810  DebugLoc dl = MI->getDebugLoc();
811  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
812  MIB.addOperand(MI->getOperand(0));
813  if (NewMCID.hasOptionalDef()) {
814    if (HasCC)
815      AddDefaultT1CC(MIB, CCDead);
816    else
817      AddNoT1CC(MIB);
818  }
819
820  // Transfer the rest of operands.
821  unsigned NumOps = MCID.getNumOperands();
822  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
823    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
824      continue;
825    if ((MCID.getOpcode() == ARM::t2RSBSri ||
826         MCID.getOpcode() == ARM::t2RSBri ||
827         MCID.getOpcode() == ARM::t2SXTB ||
828         MCID.getOpcode() == ARM::t2SXTH ||
829         MCID.getOpcode() == ARM::t2UXTB ||
830         MCID.getOpcode() == ARM::t2UXTH) && i == 2)
831      // Skip the zero immediate operand, it's now implicit.
832      continue;
833    bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
834    if (SkipPred && isPred)
835        continue;
836    const MachineOperand &MO = MI->getOperand(i);
837    if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
838      // Skip implicit def of CPSR. Either it's modeled as an optional
839      // def now or it's already an implicit def on the new instruction.
840      continue;
841    MIB.addOperand(MO);
842  }
843  if (!MCID.isPredicable() && NewMCID.isPredicable())
844    AddDefaultPred(MIB);
845
846  // Transfer MI flags.
847  MIB.setMIFlags(MI->getFlags());
848
849  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
850
851  MBB.erase_instr(MI);
852  ++NumNarrows;
853  return true;
854}
855
856static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
857  bool HasDef = false;
858  for (const MachineOperand &MO : MI.operands()) {
859    if (!MO.isReg() || MO.isUndef() || MO.isUse())
860      continue;
861    if (MO.getReg() != ARM::CPSR)
862      continue;
863
864    DefCPSR = true;
865    if (!MO.isDead())
866      HasDef = true;
867  }
868
869  return HasDef || LiveCPSR;
870}
871
872static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
873  for (const MachineOperand &MO : MI.operands()) {
874    if (!MO.isReg() || MO.isUndef() || MO.isDef())
875      continue;
876    if (MO.getReg() != ARM::CPSR)
877      continue;
878    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
879    if (MO.isKill()) {
880      LiveCPSR = false;
881      break;
882    }
883  }
884
885  return LiveCPSR;
886}
887
888bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
889                                bool LiveCPSR, bool IsSelfLoop) {
890  unsigned Opcode = MI->getOpcode();
891  DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
892  if (OPI == ReduceOpcodeMap.end())
893    return false;
894  const ReduceEntry &Entry = ReduceTable[OPI->second];
895
896  // Don't attempt normal reductions on "special" cases for now.
897  if (Entry.Special)
898    return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
899
900  // Try to transform to a 16-bit two-address instruction.
901  if (Entry.NarrowOpc2 &&
902      ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
903    return true;
904
905  // Try to transform to a 16-bit non-two-address instruction.
906  if (Entry.NarrowOpc1 &&
907      ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
908    return true;
909
910  return false;
911}
912
913bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
914  bool Modified = false;
915
916  // Yes, CPSR could be livein.
917  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
918  MachineInstr *BundleMI = 0;
919
920  CPSRDef = 0;
921  HighLatencyCPSR = false;
922
923  // Check predecessors for the latest CPSRDef.
924  for (MachineBasicBlock::pred_iterator
925       I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
926    const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
927    if (!PInfo.Visited) {
928      // Since blocks are visited in RPO, this must be a back-edge.
929      continue;
930    }
931    if (PInfo.HighLatencyCPSR) {
932      HighLatencyCPSR = true;
933      break;
934    }
935  }
936
937  // If this BB loops back to itself, conservatively avoid narrowing the
938  // first instruction that does partial flag update.
939  bool IsSelfLoop = MBB.isSuccessor(&MBB);
940  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
941  MachineBasicBlock::instr_iterator NextMII;
942  for (; MII != E; MII = NextMII) {
943    NextMII = std::next(MII);
944
945    MachineInstr *MI = &*MII;
946    if (MI->isBundle()) {
947      BundleMI = MI;
948      continue;
949    }
950    if (MI->isDebugValue())
951      continue;
952
953    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
954
955    // Does NextMII belong to the same bundle as MI?
956    bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
957
958    if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
959      Modified = true;
960      MachineBasicBlock::instr_iterator I = std::prev(NextMII);
961      MI = &*I;
962      // Removing and reinserting the first instruction in a bundle will break
963      // up the bundle. Fix the bundling if it was broken.
964      if (NextInSameBundle && !NextMII->isBundledWithPred())
965        NextMII->bundleWithPred();
966    }
967
968    if (!NextInSameBundle && MI->isInsideBundle()) {
969      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
970      // marker is only on the BUNDLE instruction. Process the BUNDLE
971      // instruction as we finish with the bundled instruction to work around
972      // the inconsistency.
973      if (BundleMI->killsRegister(ARM::CPSR))
974        LiveCPSR = false;
975      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
976      if (MO && !MO->isDead())
977        LiveCPSR = true;
978      MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
979      if (MO && !MO->isKill())
980        LiveCPSR = true;
981    }
982
983    bool DefCPSR = false;
984    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
985    if (MI->isCall()) {
986      // Calls don't really set CPSR.
987      CPSRDef = 0;
988      HighLatencyCPSR = false;
989      IsSelfLoop = false;
990    } else if (DefCPSR) {
991      // This is the last CPSR defining instruction.
992      CPSRDef = MI;
993      HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
994      IsSelfLoop = false;
995    }
996  }
997
998  MBBInfo &Info = BlockInfo[MBB.getNumber()];
999  Info.HighLatencyCPSR = HighLatencyCPSR;
1000  Info.Visited = true;
1001  return Modified;
1002}
1003
1004bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1005  const TargetMachine &TM = MF.getTarget();
1006  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
1007  STI = &TM.getSubtarget<ARMSubtarget>();
1008
1009  // Optimizing / minimizing size?
1010  AttributeSet FnAttrs = MF.getFunction()->getAttributes();
1011  OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
1012                                      Attribute::OptimizeForSize);
1013  MinimizeSize = STI->isMinSize();
1014
1015  BlockInfo.clear();
1016  BlockInfo.resize(MF.getNumBlockIDs());
1017
1018  // Visit blocks in reverse post-order so LastCPSRDef is known for all
1019  // predecessors.
1020  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
1021  bool Modified = false;
1022  for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
1023       I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
1024    Modified |= ReduceMBB(**I);
1025  return Modified;
1026}
1027
1028/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1029/// reduction pass.
1030FunctionPass *llvm::createThumb2SizeReductionPass() {
1031  return new Thumb2SizeReduce();
1032}
1033