Thumb2SizeReduction.cpp revision c2d98bc0d682419f09659d94afefd6a6266dd6ee
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#define DEBUG_TYPE "t2-reduce-size" 11#include "ARM.h" 12#include "ARMAddressingModes.h" 13#include "ARMBaseRegisterInfo.h" 14#include "ARMBaseInstrInfo.h" 15#include "Thumb2InstrInfo.h" 16#include "llvm/CodeGen/MachineInstr.h" 17#include "llvm/CodeGen/MachineInstrBuilder.h" 18#include "llvm/CodeGen/MachineFunctionPass.h" 19#include "llvm/Support/CommandLine.h" 20#include "llvm/Support/Compiler.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Support/raw_ostream.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/Statistic.h" 25using namespace llvm; 26 27STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 28STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 29STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 30 31static cl::opt<int> ReduceLimit("t2-reduce-limit", 32 cl::init(-1), cl::Hidden); 33static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 34 cl::init(-1), cl::Hidden); 35static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 36 cl::init(-1), cl::Hidden); 37 38namespace { 39 /// ReduceTable - A static table with information on mapping from wide 40 /// opcodes to narrow 41 struct ReduceEntry { 42 unsigned WideOpc; // Wide opcode 43 unsigned NarrowOpc1; // Narrow opcode to transform to 44 unsigned NarrowOpc2; // Narrow opcode when it's two-address 45 uint8_t Imm1Limit; // Limit of immediate field (bits) 46 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 47 unsigned LowRegs1 : 1; // Only possible if low-registers are used 48 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 49 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 50 // 1 - No cc field. 51 // 2 - Always set CPSR. 52 unsigned PredCC2 : 2; 53 unsigned Special : 1; // Needs to be dealt with specially 54 }; 55 56 static const ReduceEntry ReduceTable[] = { 57 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S 58 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, 59 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, 60 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, 61 // Note: immediate scale is 4. 62 { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 }, 63 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, 64 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, 65 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, 66 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, 67 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, 68 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, 69 { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, 70 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, 71 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 }, 72 { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 }, 73 { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 }, 74 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, 75 // FIXME: adr.n immediate offset must be multiple of 4. 76 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, 77 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, 78 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, 79 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, 80 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, 81 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, 82 // FIXME: Do we need the 16-bit 'S' variant? 83 { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, 84 { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, 85 { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 0, 0,1, 0 }, 86 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, 87 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, 88 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, 89 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, 90 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, 91 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, 92 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, 93 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, 94 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, 95 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, 96 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, 97 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, 98 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, 99 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, 100 { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 101 { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 102 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, 103 { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 104 { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 105 106 // FIXME: Clean this up after splitting each Thumb load / store opcode 107 // into multiple ones. 108 { ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 }, 109 { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, 110 { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, 111 { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, 112 { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 }, 113 { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, 114 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, 115 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, 116 { ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 }, 117 { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, 118 { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, 119 { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, 120 { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 }, 121 { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }, 122 123 { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, 124 { ARM::t2LDM, ARM::tLDM, ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, 125 { ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, 126 }; 127 128 class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass { 129 public: 130 static char ID; 131 Thumb2SizeReduce(); 132 133 const Thumb2InstrInfo *TII; 134 135 virtual bool runOnMachineFunction(MachineFunction &MF); 136 137 virtual const char *getPassName() const { 138 return "Thumb2 instruction size reduction pass"; 139 } 140 141 private: 142 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 143 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 144 145 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 146 bool is2Addr, ARMCC::CondCodes Pred, 147 bool LiveCPSR, bool &HasCC, bool &CCDead); 148 149 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 150 const ReduceEntry &Entry); 151 152 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 153 const ReduceEntry &Entry, bool LiveCPSR); 154 155 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 156 /// instruction. 157 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 158 const ReduceEntry &Entry, 159 bool LiveCPSR); 160 161 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 162 /// non-two-address instruction. 163 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 164 const ReduceEntry &Entry, 165 bool LiveCPSR); 166 167 /// ReduceMBB - Reduce width of instructions in the specified basic block. 168 bool ReduceMBB(MachineBasicBlock &MBB); 169 }; 170 char Thumb2SizeReduce::ID = 0; 171} 172 173Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { 174 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 175 unsigned FromOpc = ReduceTable[i].WideOpc; 176 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 177 assert(false && "Duplicated entries?"); 178 } 179} 180 181static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { 182 for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs) 183 if (*Regs == ARM::CPSR) 184 return true; 185 return false; 186} 187 188bool 189Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 190 bool is2Addr, ARMCC::CondCodes Pred, 191 bool LiveCPSR, bool &HasCC, bool &CCDead) { 192 if ((is2Addr && Entry.PredCC2 == 0) || 193 (!is2Addr && Entry.PredCC1 == 0)) { 194 if (Pred == ARMCC::AL) { 195 // Not predicated, must set CPSR. 196 if (!HasCC) { 197 // Original instruction was not setting CPSR, but CPSR is not 198 // currently live anyway. It's ok to set it. The CPSR def is 199 // dead though. 200 if (!LiveCPSR) { 201 HasCC = true; 202 CCDead = true; 203 return true; 204 } 205 return false; 206 } 207 } else { 208 // Predicated, must not set CPSR. 209 if (HasCC) 210 return false; 211 } 212 } else if ((is2Addr && Entry.PredCC2 == 2) || 213 (!is2Addr && Entry.PredCC1 == 2)) { 214 /// Old opcode has an optional def of CPSR. 215 if (HasCC) 216 return true; 217 // If both old opcode does not implicit CPSR def, then it's not ok since 218 // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP. 219 if (!HasImplicitCPSRDef(MI->getDesc())) 220 return false; 221 HasCC = true; 222 } else { 223 // 16-bit instruction does not set CPSR. 224 if (HasCC) 225 return false; 226 } 227 228 return true; 229} 230 231static bool VerifyLowRegs(MachineInstr *MI) { 232 unsigned Opc = MI->getOpcode(); 233 bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM); 234 bool isLROk = (Opc == ARM::t2STM); 235 bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); 236 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 237 const MachineOperand &MO = MI->getOperand(i); 238 if (!MO.isReg() || MO.isImplicit()) 239 continue; 240 unsigned Reg = MO.getReg(); 241 if (Reg == 0 || Reg == ARM::CPSR) 242 continue; 243 if (isPCOk && Reg == ARM::PC) 244 continue; 245 if (isLROk && Reg == ARM::LR) 246 continue; 247 if (isSPOk && Reg == ARM::SP) 248 continue; 249 if (!isARMLowRegister(Reg)) 250 return false; 251 } 252 return true; 253} 254 255bool 256Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 257 const ReduceEntry &Entry) { 258 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 259 return false; 260 261 unsigned Scale = 1; 262 bool HasImmOffset = false; 263 bool HasShift = false; 264 bool isLdStMul = false; 265 unsigned Opc = Entry.NarrowOpc1; 266 unsigned OpNum = 3; // First 'rest' of operands. 267 switch (Entry.WideOpc) { 268 default: 269 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 270 case ARM::t2LDRi12: 271 case ARM::t2STRi12: 272 Scale = 4; 273 HasImmOffset = true; 274 break; 275 case ARM::t2LDRBi12: 276 case ARM::t2STRBi12: 277 HasImmOffset = true; 278 break; 279 case ARM::t2LDRHi12: 280 case ARM::t2STRHi12: 281 Scale = 2; 282 HasImmOffset = true; 283 break; 284 case ARM::t2LDRs: 285 case ARM::t2LDRBs: 286 case ARM::t2LDRHs: 287 case ARM::t2LDRSBs: 288 case ARM::t2LDRSHs: 289 case ARM::t2STRs: 290 case ARM::t2STRBs: 291 case ARM::t2STRHs: 292 HasShift = true; 293 OpNum = 4; 294 break; 295 case ARM::t2LDM_RET: 296 case ARM::t2LDM: 297 case ARM::t2STM: { 298 OpNum = 0; 299 unsigned BaseReg = MI->getOperand(0).getReg(); 300 unsigned Mode = MI->getOperand(1).getImm(); 301 if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) { 302 Opc = Entry.NarrowOpc2; 303 OpNum = 2; 304 } else if (Entry.WideOpc == ARM::t2LDM_RET || 305 !isARMLowRegister(BaseReg) || 306 !ARM_AM::getAM4WBFlag(Mode) || 307 ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) { 308 return false; 309 } 310 isLdStMul = true; 311 break; 312 } 313 } 314 315 unsigned OffsetReg = 0; 316 bool OffsetKill = false; 317 if (HasShift) { 318 OffsetReg = MI->getOperand(2).getReg(); 319 OffsetKill = MI->getOperand(2).isKill(); 320 if (MI->getOperand(3).getImm()) 321 // Thumb1 addressing mode doesn't support shift. 322 return false; 323 } 324 325 unsigned OffsetImm = 0; 326 if (HasImmOffset) { 327 OffsetImm = MI->getOperand(2).getImm(); 328 unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale; 329 if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) 330 // Make sure the immediate field fits. 331 return false; 332 } 333 334 // Add the 16-bit load / store instruction. 335 // FIXME: Thumb1 addressing mode encode both immediate and register offset. 336 DebugLoc dl = MI->getDebugLoc(); 337 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); 338 if (!isLdStMul) { 339 MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); 340 if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) { 341 // tLDRSB and tLDRSH do not have an immediate offset field. On the other 342 // hand, it must have an offset register. 343 // FIXME: Remove this special case. 344 MIB.addImm(OffsetImm/Scale); 345 } 346 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 347 348 MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); 349 } 350 351 // Transfer the rest of operands. 352 for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) 353 MIB.addOperand(MI->getOperand(OpNum)); 354 355 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 356 357 MBB.erase(MI); 358 ++NumLdSts; 359 return true; 360} 361 362bool 363Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 364 const ReduceEntry &Entry, 365 bool LiveCPSR) { 366 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 367 return false; 368 369 const TargetInstrDesc &TID = MI->getDesc(); 370 if (TID.mayLoad() || TID.mayStore()) 371 return ReduceLoadStore(MBB, MI, Entry); 372 373 unsigned Opc = MI->getOpcode(); 374 switch (Opc) { 375 default: break; 376 case ARM::t2ADDSri: 377 case ARM::t2ADDSrr: { 378 unsigned PredReg = 0; 379 if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { 380 switch (Opc) { 381 default: break; 382 case ARM::t2ADDSri: { 383 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) 384 return true; 385 // fallthrough 386 } 387 case ARM::t2ADDSrr: 388 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 389 } 390 } 391 break; 392 } 393 case ARM::t2RSBri: 394 case ARM::t2RSBSri: 395 if (MI->getOperand(2).getImm() == 0) 396 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 397 break; 398 } 399 return false; 400} 401 402bool 403Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 404 const ReduceEntry &Entry, 405 bool LiveCPSR) { 406 407 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 408 return false; 409 410 const TargetInstrDesc &TID = MI->getDesc(); 411 unsigned Reg0 = MI->getOperand(0).getReg(); 412 unsigned Reg1 = MI->getOperand(1).getReg(); 413 if (Reg0 != Reg1) 414 return false; 415 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 416 return false; 417 if (Entry.Imm2Limit) { 418 unsigned Imm = MI->getOperand(2).getImm(); 419 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 420 if (Imm > Limit) 421 return false; 422 } else { 423 unsigned Reg2 = MI->getOperand(2).getReg(); 424 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 425 return false; 426 } 427 428 // Check if it's possible / necessary to transfer the predicate. 429 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); 430 unsigned PredReg = 0; 431 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 432 bool SkipPred = false; 433 if (Pred != ARMCC::AL) { 434 if (!NewTID.isPredicable()) 435 // Can't transfer predicate, fail. 436 return false; 437 } else { 438 SkipPred = !NewTID.isPredicable(); 439 } 440 441 bool HasCC = false; 442 bool CCDead = false; 443 if (TID.hasOptionalDef()) { 444 unsigned NumOps = TID.getNumOperands(); 445 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 446 if (HasCC && MI->getOperand(NumOps-1).isDead()) 447 CCDead = true; 448 } 449 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 450 return false; 451 452 // Add the 16-bit instruction. 453 DebugLoc dl = MI->getDebugLoc(); 454 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); 455 MIB.addOperand(MI->getOperand(0)); 456 if (NewTID.hasOptionalDef()) { 457 if (HasCC) 458 AddDefaultT1CC(MIB, CCDead); 459 else 460 AddNoT1CC(MIB); 461 } 462 463 // Transfer the rest of operands. 464 unsigned NumOps = TID.getNumOperands(); 465 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 466 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 467 continue; 468 if (SkipPred && TID.OpInfo[i].isPredicate()) 469 continue; 470 MIB.addOperand(MI->getOperand(i)); 471 } 472 473 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 474 475 MBB.erase(MI); 476 ++Num2Addrs; 477 return true; 478} 479 480bool 481Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 482 const ReduceEntry &Entry, 483 bool LiveCPSR) { 484 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 485 return false; 486 487 unsigned Limit = ~0U; 488 unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; 489 if (Entry.Imm1Limit) 490 Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; 491 492 const TargetInstrDesc &TID = MI->getDesc(); 493 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { 494 if (TID.OpInfo[i].isPredicate()) 495 continue; 496 const MachineOperand &MO = MI->getOperand(i); 497 if (MO.isReg()) { 498 unsigned Reg = MO.getReg(); 499 if (!Reg || Reg == ARM::CPSR) 500 continue; 501 if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) 502 continue; 503 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 504 return false; 505 } else if (MO.isImm() && 506 !TID.OpInfo[i].isPredicate()) { 507 if (MO.getImm() > Limit || (MO.getImm() & (Scale-1)) != 0) 508 return false; 509 } 510 } 511 512 // Check if it's possible / necessary to transfer the predicate. 513 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); 514 unsigned PredReg = 0; 515 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 516 bool SkipPred = false; 517 if (Pred != ARMCC::AL) { 518 if (!NewTID.isPredicable()) 519 // Can't transfer predicate, fail. 520 return false; 521 } else { 522 SkipPred = !NewTID.isPredicable(); 523 } 524 525 bool HasCC = false; 526 bool CCDead = false; 527 if (TID.hasOptionalDef()) { 528 unsigned NumOps = TID.getNumOperands(); 529 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 530 if (HasCC && MI->getOperand(NumOps-1).isDead()) 531 CCDead = true; 532 } 533 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 534 return false; 535 536 // Add the 16-bit instruction. 537 DebugLoc dl = MI->getDebugLoc(); 538 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); 539 MIB.addOperand(MI->getOperand(0)); 540 if (NewTID.hasOptionalDef()) { 541 if (HasCC) 542 AddDefaultT1CC(MIB, CCDead); 543 else 544 AddNoT1CC(MIB); 545 } 546 547 // Transfer the rest of operands. 548 unsigned NumOps = TID.getNumOperands(); 549 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 550 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 551 continue; 552 if ((TID.getOpcode() == ARM::t2RSBSri || 553 TID.getOpcode() == ARM::t2RSBri) && i == 2) 554 // Skip the zero immediate operand, it's now implicit. 555 continue; 556 bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); 557 if (SkipPred && isPred) 558 continue; 559 const MachineOperand &MO = MI->getOperand(i); 560 if (Scale > 1 && !isPred && MO.isImm()) 561 MIB.addImm(MO.getImm() / Scale); 562 else { 563 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 564 // Skip implicit def of CPSR. Either it's modeled as an optional 565 // def now or it's already an implicit def on the new instruction. 566 continue; 567 MIB.addOperand(MO); 568 } 569 } 570 if (!TID.isPredicable() && NewTID.isPredicable()) 571 AddDefaultPred(MIB); 572 573 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 574 575 MBB.erase(MI); 576 ++NumNarrows; 577 return true; 578} 579 580static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { 581 bool HasDef = false; 582 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 583 const MachineOperand &MO = MI.getOperand(i); 584 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 585 continue; 586 if (MO.getReg() != ARM::CPSR) 587 continue; 588 if (!MO.isDead()) 589 HasDef = true; 590 } 591 592 return HasDef || LiveCPSR; 593} 594 595static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 596 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 597 const MachineOperand &MO = MI.getOperand(i); 598 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 599 continue; 600 if (MO.getReg() != ARM::CPSR) 601 continue; 602 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 603 if (MO.isKill()) { 604 LiveCPSR = false; 605 break; 606 } 607 } 608 609 return LiveCPSR; 610} 611 612bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 613 bool Modified = false; 614 615 bool LiveCPSR = false; 616 // Yes, CPSR could be livein. 617 for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), 618 E = MBB.livein_end(); I != E; ++I) { 619 if (*I == ARM::CPSR) { 620 LiveCPSR = true; 621 break; 622 } 623 } 624 625 MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); 626 MachineBasicBlock::iterator NextMII; 627 for (; MII != E; MII = NextMII) { 628 NextMII = next(MII); 629 630 MachineInstr *MI = &*MII; 631 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 632 633 unsigned Opcode = MI->getOpcode(); 634 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 635 if (OPI != ReduceOpcodeMap.end()) { 636 const ReduceEntry &Entry = ReduceTable[OPI->second]; 637 // Ignore "special" cases for now. 638 if (Entry.Special) { 639 if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { 640 Modified = true; 641 MachineBasicBlock::iterator I = prior(NextMII); 642 MI = &*I; 643 } 644 goto ProcessNext; 645 } 646 647 // Try to transform to a 16-bit two-address instruction. 648 if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { 649 Modified = true; 650 MachineBasicBlock::iterator I = prior(NextMII); 651 MI = &*I; 652 goto ProcessNext; 653 } 654 655 // Try to transform ro a 16-bit non-two-address instruction. 656 if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { 657 Modified = true; 658 MachineBasicBlock::iterator I = prior(NextMII); 659 MI = &*I; 660 } 661 } 662 663 ProcessNext: 664 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); 665 } 666 667 return Modified; 668} 669 670bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 671 const TargetMachine &TM = MF.getTarget(); 672 TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); 673 674 bool Modified = false; 675 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 676 Modified |= ReduceMBB(*I); 677 return Modified; 678} 679 680/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 681/// reduction pass. 682FunctionPass *llvm::createThumb2SizeReductionPass() { 683 return new Thumb2SizeReduce(); 684} 685