Thumb2SizeReduction.cpp revision 65f2e7887a8b70b3ee63ef535a6bcfe8a170c074
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#define DEBUG_TYPE "t2-reduce-size" 11#include "ARM.h" 12#include "ARMBaseRegisterInfo.h" 13#include "ARMBaseInstrInfo.h" 14#include "Thumb2InstrInfo.h" 15#include "llvm/CodeGen/MachineInstr.h" 16#include "llvm/CodeGen/MachineInstrBuilder.h" 17#include "llvm/CodeGen/MachineFunctionPass.h" 18#include "llvm/Support/CommandLine.h" 19#include "llvm/Support/Compiler.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/ADT/DenseMap.h" 22#include "llvm/ADT/Statistic.h" 23using namespace llvm; 24 25STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 26STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 27 28static cl::opt<int> ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden); 29 30namespace { 31 /// ReduceTable - A static table with information on mapping from wide 32 /// opcodes to narrow 33 struct ReduceEntry { 34 unsigned WideOpc; // Wide opcode 35 unsigned NarrowOpc1; // Narrow opcode to transform to 36 unsigned NarrowOpc2; // Narrow opcode when it's two-address 37 uint8_t Imm1Limit; // Limit of immediate field (bits) 38 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 39 unsigned LowRegs1 : 1; // Only possible if low-registers are used 40 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 41 unsigned PredCC1 : 1; // 0 - If predicated, cc is on and vice versa. 42 // 1 - No cc field. 43 unsigned PredCC2 : 1; 44 unsigned Special : 1; // Needs to be dealt with specially 45 }; 46 47 static const ReduceEntry ReduceTable[] = { 48 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S 49 { ARM::t2ADCrr, ARM::tADC, 0, 0, 0, 1, 0, 0,0, 0 }, 50 // FIXME: t2ADDS variants. 51 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, 52 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, 53 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, 54 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, 55 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, 56 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, 57 { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 1,0, 0 }, 58 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 1,0, 0 }, 59 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 1,0, 0 }, 60 { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 1,0, 0 }, 61 { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 1,0, 0 }, 62 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, 63 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, 64 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, 65 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, 66 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, 67 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, 68 // FIXME: Do we need the 16-bit 'S' variant? 69 // FIXME: t2MOVcc 70 { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, 71 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, 72 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, 73 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, 74 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, 75 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, 76 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, 77 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, 78 // FIXME: T2RSBri immediate must be zero. Also need entry for T2RSBS 79 //{ ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0 }, 80 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, 81 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, 82 { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 83 { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 84 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 1,0, 0 }, 85 { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 86 { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 } 87 }; 88 89 class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass { 90 public: 91 static char ID; 92 Thumb2SizeReduce(); 93 94 const TargetInstrInfo *TII; 95 96 virtual bool runOnMachineFunction(MachineFunction &MF); 97 98 virtual const char *getPassName() const { 99 return "Thumb2 instruction size reduction pass"; 100 } 101 102 private: 103 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 104 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 105 106 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 107 /// instruction. 108 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 109 const ReduceEntry &Entry, 110 bool LiveCPSR); 111 112 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 113 /// non-two-address instruction. 114 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 115 const ReduceEntry &Entry, 116 bool LiveCPSR); 117 118 /// ReduceMBB - Reduce width of instructions in the specified basic block. 119 bool ReduceMBB(MachineBasicBlock &MBB); 120 }; 121 char Thumb2SizeReduce::ID = 0; 122} 123 124Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { 125 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 126 unsigned FromOpc = ReduceTable[i].WideOpc; 127 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 128 assert(false && "Duplicated entries?"); 129 } 130} 131 132static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 133 bool is2Addr, ARMCC::CondCodes Pred, 134 bool LiveCPSR, bool &HasCC, bool &CCDead) { 135 if ((is2Addr && Entry.PredCC2 == 0) || 136 (!is2Addr && Entry.PredCC1 == 0)) { 137 if (Pred == ARMCC::AL) { 138 // Not predicated, must set CPSR. 139 if (!HasCC) { 140 // Original instruction was not setting CPSR, but CPSR is not 141 // currently live anyway. It's ok to set it. The CPSR def is 142 // dead though. 143 if (!LiveCPSR) { 144 HasCC = true; 145 CCDead = true; 146 return true; 147 } 148 return false; 149 } 150 } else { 151 // Predicated, must not set CPSR. 152 if (HasCC) 153 return false; 154 } 155 } else { 156 // 16-bit instruction does not set CPSR. 157 if (HasCC) 158 return false; 159 } 160 161 return true; 162} 163 164bool 165Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 166 const ReduceEntry &Entry, 167 bool LiveCPSR) { 168 const TargetInstrDesc &TID = MI->getDesc(); 169 unsigned Reg0 = MI->getOperand(0).getReg(); 170 unsigned Reg1 = MI->getOperand(1).getReg(); 171 if (Reg0 != Reg1) 172 return false; 173 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 174 return false; 175 if (Entry.Imm2Limit) { 176 unsigned Imm = MI->getOperand(2).getImm(); 177 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 178 if (Imm > Limit) 179 return false; 180 } else { 181 unsigned Reg2 = MI->getOperand(2).getReg(); 182 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 183 return false; 184 } 185 186 // Check if it's possible / necessary to transfer the predicate. 187 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); 188 unsigned PredReg = 0; 189 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 190 bool SkipPred = false; 191 if (Pred != ARMCC::AL) { 192 if (!NewTID.isPredicable()) 193 // Can't transfer predicate, fail. 194 return false; 195 } else { 196 SkipPred = !NewTID.isPredicable(); 197 } 198 199 bool HasCC = false; 200 bool CCDead = false; 201 if (TID.hasOptionalDef()) { 202 unsigned NumOps = TID.getNumOperands(); 203 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 204 if (HasCC && MI->getOperand(NumOps-1).isDead()) 205 CCDead = true; 206 } 207 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 208 return false; 209 210 // Add the 16-bit instruction. 211 DebugLoc dl = MI->getDebugLoc(); 212 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc2)); 213 MIB.addOperand(MI->getOperand(0)); 214 if (HasCC) 215 AddDefaultT1CC(MIB, CCDead); 216 217 // Transfer the rest of operands. 218 unsigned NumOps = TID.getNumOperands(); 219 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 220 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 221 continue; 222 if (SkipPred && TID.OpInfo[i].isPredicate()) 223 continue; 224 MIB.addOperand(MI->getOperand(i)); 225 } 226 227 DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; 228 229 MBB.erase(MI); 230 ++Num2Addrs; 231 return true; 232} 233 234bool 235Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 236 const ReduceEntry &Entry, 237 bool LiveCPSR) { 238 unsigned Limit = ~0U; 239 if (Entry.Imm1Limit) 240 Limit = (1 << Entry.Imm1Limit) - 1; 241 242 const TargetInstrDesc &TID = MI->getDesc(); 243 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { 244 if (TID.OpInfo[i].isPredicate()) 245 continue; 246 const MachineOperand &MO = MI->getOperand(i); 247 if (MO.isReg()) { 248 unsigned Reg = MO.getReg(); 249 if (!Reg || Reg == ARM::CPSR) 250 continue; 251 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 252 return false; 253 } else if (MO.isImm()) { 254 if (MO.getImm() > Limit) 255 return false; 256 } 257 } 258 259 // Check if it's possible / necessary to transfer the predicate. 260 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); 261 unsigned PredReg = 0; 262 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 263 bool SkipPred = false; 264 if (Pred != ARMCC::AL) { 265 if (!NewTID.isPredicable()) 266 // Can't transfer predicate, fail. 267 return false; 268 } else { 269 SkipPred = !NewTID.isPredicable(); 270 } 271 272 bool HasCC = false; 273 bool CCDead = false; 274 if (TID.hasOptionalDef()) { 275 unsigned NumOps = TID.getNumOperands(); 276 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 277 if (HasCC && MI->getOperand(NumOps-1).isDead()) 278 CCDead = true; 279 } 280 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 281 return false; 282 283 // Add the 16-bit instruction. 284 DebugLoc dl = MI->getDebugLoc(); 285 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1)); 286 MIB.addOperand(MI->getOperand(0)); 287 if (HasCC) 288 AddDefaultT1CC(MIB, CCDead); 289 290 // Transfer the rest of operands. 291 unsigned NumOps = TID.getNumOperands(); 292 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 293 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 294 continue; 295 if (SkipPred && TID.OpInfo[i].isPredicate()) 296 continue; 297 MIB.addOperand(MI->getOperand(i)); 298 } 299 300 301 DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; 302 303 MBB.erase(MI); 304 ++NumNarrows; 305 return true; 306} 307 308static bool UpdateCPSRLiveness(MachineInstr &MI, bool LiveCPSR) { 309 bool HasDef = false; 310 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 311 const MachineOperand &MO = MI.getOperand(i); 312 if (!MO.isReg() || MO.isUndef()) 313 continue; 314 if (MO.getReg() != ARM::CPSR) 315 continue; 316 if (MO.isDef()) { 317 if (!MO.isDead()) 318 HasDef = true; 319 continue; 320 } 321 322 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 323 if (MO.isKill()) { 324 LiveCPSR = false; 325 break; 326 } 327 } 328 329 return HasDef || LiveCPSR; 330} 331 332bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 333 bool Modified = false; 334 335 bool LiveCPSR = false; 336 // Yes, CPSR could be livein. 337 for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), 338 E = MBB.livein_end(); I != E; ++I) { 339 if (*I == ARM::CPSR) { 340 LiveCPSR = true; 341 break; 342 } 343 } 344 345 MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); 346 MachineBasicBlock::iterator NextMII; 347 for (; MII != E; MII = NextMII) { 348 NextMII = next(MII); 349 350 MachineInstr *MI = &*MII; 351 unsigned Opcode = MI->getOpcode(); 352 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 353 if (OPI != ReduceOpcodeMap.end()) { 354 const ReduceEntry &Entry = ReduceTable[OPI->second]; 355 // Ignore "special" cases for now. 356 if (Entry.Special) 357 goto ProcessNext; 358 359 // Try to transform to a 16-bit two-address instruction. 360 if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { 361 Modified = true; 362 MachineBasicBlock::iterator I = prior(NextMII); 363 MI = &*I; 364 goto ProcessNext; 365 } 366 367 // Try to transform ro a 16-bit non-two-address instruction. 368 if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) 369 Modified = true; 370 } 371 372 ProcessNext: 373 LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR); 374 375 if (ReduceLimit != -1 && ((int)(NumNarrows + Num2Addrs) > ReduceLimit)) 376 break; 377 } 378 379 return Modified; 380} 381 382bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 383 const TargetMachine &TM = MF.getTarget(); 384 TII = TM.getInstrInfo(); 385 386 bool Modified = false; 387 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 388 Modified |= ReduceMBB(*I); 389 return Modified; 390} 391 392/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 393/// reduction pass. 394FunctionPass *llvm::createThumb2SizeReductionPass() { 395 return new Thumb2SizeReduce(); 396} 397