Thumb2SizeReduction.cpp revision 789fef987f9c3dd14743731e3a6733f2f90c9778
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#define DEBUG_TYPE "t2-reduce-size" 11#include "ARM.h" 12#include "ARMAddressingModes.h" 13#include "ARMBaseRegisterInfo.h" 14#include "ARMBaseInstrInfo.h" 15#include "Thumb2InstrInfo.h" 16#include "llvm/CodeGen/MachineInstr.h" 17#include "llvm/CodeGen/MachineInstrBuilder.h" 18#include "llvm/CodeGen/MachineFunctionPass.h" 19#include "llvm/Support/CommandLine.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Support/raw_ostream.h" 22#include "llvm/ADT/DenseMap.h" 23#include "llvm/ADT/Statistic.h" 24using namespace llvm; 25 26STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 27STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 28STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 29 30static cl::opt<int> ReduceLimit("t2-reduce-limit", 31 cl::init(-1), cl::Hidden); 32static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 33 cl::init(-1), cl::Hidden); 34static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 35 cl::init(-1), cl::Hidden); 36 37namespace { 38 /// ReduceTable - A static table with information on mapping from wide 39 /// opcodes to narrow 40 struct ReduceEntry { 41 unsigned WideOpc; // Wide opcode 42 unsigned NarrowOpc1; // Narrow opcode to transform to 43 unsigned NarrowOpc2; // Narrow opcode when it's two-address 44 uint8_t Imm1Limit; // Limit of immediate field (bits) 45 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 46 unsigned LowRegs1 : 1; // Only possible if low-registers are used 47 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 48 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 49 // 1 - No cc field. 50 // 2 - Always set CPSR. 51 unsigned PredCC2 : 2; 52 unsigned Special : 1; // Needs to be dealt with specially 53 }; 54 55 static const ReduceEntry ReduceTable[] = { 56 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S 57 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, 58 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, 59 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, 60 // Note: immediate scale is 4. 61 { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 }, 62 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, 63 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, 64 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, 65 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, 66 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, 67 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, 68 //FIXME: Disable CMN, as CCodes are backwards from compare expectations 69 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, 70 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, 71 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 }, 72 { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 }, 73 { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 }, 74 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, 75 // FIXME: adr.n immediate offset must be multiple of 4. 76 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, 77 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, 78 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, 79 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, 80 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, 81 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, 82 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, 83 // FIXME: Do we need the 16-bit 'S' variant? 84 { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, 85 { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, 86 { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 }, 87 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, 88 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, 89 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, 90 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, 91 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, 92 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, 93 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, 94 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, 95 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, 96 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, 97 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, 98 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, 99 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, 100 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, 101 { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 102 { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 103 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, 104 { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 105 { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 106 107 // FIXME: Clean this up after splitting each Thumb load / store opcode 108 // into multiple ones. 109 { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, 110 { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, 111 { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, 112 { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, 113 { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 }, 114 { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, 115 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, 116 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, 117 { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, 118 { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, 119 { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, 120 { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, 121 { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 }, 122 { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }, 123 124 { ARM::t2LDM, ARM::tLDM, 0, 0, 0, 1, 1, 1,1, 1 }, 125 { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, 126 { ARM::t2LDM_UPD,ARM::tLDM_UPD,ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, 127 // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent 128 { ARM::t2STM_UPD,ARM::tSTM_UPD,ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, 129 }; 130 131 class Thumb2SizeReduce : public MachineFunctionPass { 132 public: 133 static char ID; 134 Thumb2SizeReduce(); 135 136 const Thumb2InstrInfo *TII; 137 138 virtual bool runOnMachineFunction(MachineFunction &MF); 139 140 virtual const char *getPassName() const { 141 return "Thumb2 instruction size reduction pass"; 142 } 143 144 private: 145 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 146 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 147 148 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 149 bool is2Addr, ARMCC::CondCodes Pred, 150 bool LiveCPSR, bool &HasCC, bool &CCDead); 151 152 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 153 const ReduceEntry &Entry); 154 155 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 156 const ReduceEntry &Entry, bool LiveCPSR); 157 158 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 159 /// instruction. 160 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 161 const ReduceEntry &Entry, 162 bool LiveCPSR); 163 164 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 165 /// non-two-address instruction. 166 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 167 const ReduceEntry &Entry, 168 bool LiveCPSR); 169 170 /// ReduceMBB - Reduce width of instructions in the specified basic block. 171 bool ReduceMBB(MachineBasicBlock &MBB); 172 }; 173 char Thumb2SizeReduce::ID = 0; 174} 175 176Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { 177 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 178 unsigned FromOpc = ReduceTable[i].WideOpc; 179 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 180 assert(false && "Duplicated entries?"); 181 } 182} 183 184static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { 185 for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs) 186 if (*Regs == ARM::CPSR) 187 return true; 188 return false; 189} 190 191bool 192Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 193 bool is2Addr, ARMCC::CondCodes Pred, 194 bool LiveCPSR, bool &HasCC, bool &CCDead) { 195 if ((is2Addr && Entry.PredCC2 == 0) || 196 (!is2Addr && Entry.PredCC1 == 0)) { 197 if (Pred == ARMCC::AL) { 198 // Not predicated, must set CPSR. 199 if (!HasCC) { 200 // Original instruction was not setting CPSR, but CPSR is not 201 // currently live anyway. It's ok to set it. The CPSR def is 202 // dead though. 203 if (!LiveCPSR) { 204 HasCC = true; 205 CCDead = true; 206 return true; 207 } 208 return false; 209 } 210 } else { 211 // Predicated, must not set CPSR. 212 if (HasCC) 213 return false; 214 } 215 } else if ((is2Addr && Entry.PredCC2 == 2) || 216 (!is2Addr && Entry.PredCC1 == 2)) { 217 /// Old opcode has an optional def of CPSR. 218 if (HasCC) 219 return true; 220 // If both old opcode does not implicit CPSR def, then it's not ok since 221 // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP. 222 if (!HasImplicitCPSRDef(MI->getDesc())) 223 return false; 224 HasCC = true; 225 } else { 226 // 16-bit instruction does not set CPSR. 227 if (HasCC) 228 return false; 229 } 230 231 return true; 232} 233 234static bool VerifyLowRegs(MachineInstr *MI) { 235 unsigned Opc = MI->getOpcode(); 236 bool isPCOk = (Opc == ARM::t2LDM_RET || Opc == ARM::t2LDM || 237 Opc == ARM::t2LDM_UPD); 238 bool isLROk = (Opc == ARM::t2STM_UPD); 239 bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); 240 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 241 const MachineOperand &MO = MI->getOperand(i); 242 if (!MO.isReg() || MO.isImplicit()) 243 continue; 244 unsigned Reg = MO.getReg(); 245 if (Reg == 0 || Reg == ARM::CPSR) 246 continue; 247 if (isPCOk && Reg == ARM::PC) 248 continue; 249 if (isLROk && Reg == ARM::LR) 250 continue; 251 if (Reg == ARM::SP) { 252 if (isSPOk) 253 continue; 254 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 255 // Special case for these ldr / str with sp as base register. 256 continue; 257 } 258 if (!isARMLowRegister(Reg)) 259 return false; 260 } 261 return true; 262} 263 264bool 265Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 266 const ReduceEntry &Entry) { 267 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 268 return false; 269 270 unsigned Scale = 1; 271 bool HasImmOffset = false; 272 bool HasShift = false; 273 bool HasOffReg = true; 274 bool isLdStMul = false; 275 unsigned Opc = Entry.NarrowOpc1; 276 unsigned OpNum = 3; // First 'rest' of operands. 277 uint8_t ImmLimit = Entry.Imm1Limit; 278 switch (Entry.WideOpc) { 279 default: 280 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 281 case ARM::t2LDRi12: 282 case ARM::t2STRi12: { 283 unsigned BaseReg = MI->getOperand(1).getReg(); 284 if (BaseReg == ARM::SP) { 285 Opc = Entry.NarrowOpc2; 286 ImmLimit = Entry.Imm2Limit; 287 HasOffReg = false; 288 } 289 Scale = 4; 290 HasImmOffset = true; 291 break; 292 } 293 case ARM::t2LDRBi12: 294 case ARM::t2STRBi12: 295 HasImmOffset = true; 296 break; 297 case ARM::t2LDRHi12: 298 case ARM::t2STRHi12: 299 Scale = 2; 300 HasImmOffset = true; 301 break; 302 case ARM::t2LDRs: 303 case ARM::t2LDRBs: 304 case ARM::t2LDRHs: 305 case ARM::t2LDRSBs: 306 case ARM::t2LDRSHs: 307 case ARM::t2STRs: 308 case ARM::t2STRBs: 309 case ARM::t2STRHs: 310 HasShift = true; 311 OpNum = 4; 312 break; 313 case ARM::t2LDM: { 314 unsigned BaseReg = MI->getOperand(0).getReg(); 315 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 316 if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) 317 return false; 318 OpNum = 0; 319 isLdStMul = true; 320 break; 321 } 322 case ARM::t2LDM_RET: { 323 unsigned BaseReg = MI->getOperand(1).getReg(); 324 if (BaseReg != ARM::SP) 325 return false; 326 Opc = Entry.NarrowOpc2; // tPOP_RET 327 OpNum = 3; 328 isLdStMul = true; 329 break; 330 } 331 case ARM::t2LDM_UPD: 332 case ARM::t2STM_UPD: { 333 OpNum = 0; 334 unsigned BaseReg = MI->getOperand(1).getReg(); 335 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(2).getImm()); 336 if (BaseReg == ARM::SP && 337 ((Entry.WideOpc == ARM::t2LDM_UPD && Mode == ARM_AM::ia) || 338 (Entry.WideOpc == ARM::t2STM_UPD && Mode == ARM_AM::db))) { 339 Opc = Entry.NarrowOpc2; // tPOP or tPUSH 340 OpNum = 3; 341 } else if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) { 342 return false; 343 } 344 isLdStMul = true; 345 break; 346 } 347 } 348 349 unsigned OffsetReg = 0; 350 bool OffsetKill = false; 351 if (HasShift) { 352 OffsetReg = MI->getOperand(2).getReg(); 353 OffsetKill = MI->getOperand(2).isKill(); 354 if (MI->getOperand(3).getImm()) 355 // Thumb1 addressing mode doesn't support shift. 356 return false; 357 } 358 359 unsigned OffsetImm = 0; 360 if (HasImmOffset) { 361 OffsetImm = MI->getOperand(2).getImm(); 362 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 363 if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) 364 // Make sure the immediate field fits. 365 return false; 366 } 367 368 // Add the 16-bit load / store instruction. 369 // FIXME: Thumb1 addressing mode encode both immediate and register offset. 370 DebugLoc dl = MI->getDebugLoc(); 371 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); 372 if (!isLdStMul) { 373 MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); 374 if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) { 375 // tLDRSB and tLDRSH do not have an immediate offset field. On the other 376 // hand, it must have an offset register. 377 // FIXME: Remove this special case. 378 MIB.addImm(OffsetImm/Scale); 379 } 380 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 381 382 if (HasOffReg) 383 MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); 384 } 385 386 // Transfer the rest of operands. 387 for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) 388 MIB.addOperand(MI->getOperand(OpNum)); 389 390 // Transfer memoperands. 391 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 392 393 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 394 395 MBB.erase(MI); 396 ++NumLdSts; 397 return true; 398} 399 400bool 401Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 402 const ReduceEntry &Entry, 403 bool LiveCPSR) { 404 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 405 return false; 406 407 const TargetInstrDesc &TID = MI->getDesc(); 408 if (TID.mayLoad() || TID.mayStore()) 409 return ReduceLoadStore(MBB, MI, Entry); 410 411 unsigned Opc = MI->getOpcode(); 412 switch (Opc) { 413 default: break; 414 case ARM::t2ADDSri: 415 case ARM::t2ADDSrr: { 416 unsigned PredReg = 0; 417 if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { 418 switch (Opc) { 419 default: break; 420 case ARM::t2ADDSri: { 421 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) 422 return true; 423 // fallthrough 424 } 425 case ARM::t2ADDSrr: 426 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 427 } 428 } 429 break; 430 } 431 case ARM::t2RSBri: 432 case ARM::t2RSBSri: 433 if (MI->getOperand(2).getImm() == 0) 434 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 435 break; 436 case ARM::t2MOVi16: 437 // Can convert only 'pure' immediate operands, not immediates obtained as 438 // globals' addresses. 439 if (MI->getOperand(1).isImm()) 440 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 441 break; 442 } 443 return false; 444} 445 446bool 447Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 448 const ReduceEntry &Entry, 449 bool LiveCPSR) { 450 451 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 452 return false; 453 454 unsigned Reg0 = MI->getOperand(0).getReg(); 455 unsigned Reg1 = MI->getOperand(1).getReg(); 456 if (Reg0 != Reg1) { 457 // Try to commute the operands to make it a 2-address instruction. 458 unsigned CommOpIdx1, CommOpIdx2; 459 if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || 460 CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) 461 return false; 462 MachineInstr *CommutedMI = TII->commuteInstruction(MI); 463 if (!CommutedMI) 464 return false; 465 } 466 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 467 return false; 468 if (Entry.Imm2Limit) { 469 unsigned Imm = MI->getOperand(2).getImm(); 470 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 471 if (Imm > Limit) 472 return false; 473 } else { 474 unsigned Reg2 = MI->getOperand(2).getReg(); 475 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 476 return false; 477 } 478 479 // Check if it's possible / necessary to transfer the predicate. 480 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); 481 unsigned PredReg = 0; 482 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 483 bool SkipPred = false; 484 if (Pred != ARMCC::AL) { 485 if (!NewTID.isPredicable()) 486 // Can't transfer predicate, fail. 487 return false; 488 } else { 489 SkipPred = !NewTID.isPredicable(); 490 } 491 492 bool HasCC = false; 493 bool CCDead = false; 494 const TargetInstrDesc &TID = MI->getDesc(); 495 if (TID.hasOptionalDef()) { 496 unsigned NumOps = TID.getNumOperands(); 497 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 498 if (HasCC && MI->getOperand(NumOps-1).isDead()) 499 CCDead = true; 500 } 501 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 502 return false; 503 504 // Add the 16-bit instruction. 505 DebugLoc dl = MI->getDebugLoc(); 506 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); 507 MIB.addOperand(MI->getOperand(0)); 508 if (NewTID.hasOptionalDef()) { 509 if (HasCC) 510 AddDefaultT1CC(MIB, CCDead); 511 else 512 AddNoT1CC(MIB); 513 } 514 515 // Transfer the rest of operands. 516 unsigned NumOps = TID.getNumOperands(); 517 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 518 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 519 continue; 520 if (SkipPred && TID.OpInfo[i].isPredicate()) 521 continue; 522 MIB.addOperand(MI->getOperand(i)); 523 } 524 525 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 526 527 MBB.erase(MI); 528 ++Num2Addrs; 529 return true; 530} 531 532bool 533Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 534 const ReduceEntry &Entry, 535 bool LiveCPSR) { 536 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 537 return false; 538 539 unsigned Limit = ~0U; 540 unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; 541 if (Entry.Imm1Limit) 542 Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; 543 544 const TargetInstrDesc &TID = MI->getDesc(); 545 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { 546 if (TID.OpInfo[i].isPredicate()) 547 continue; 548 const MachineOperand &MO = MI->getOperand(i); 549 if (MO.isReg()) { 550 unsigned Reg = MO.getReg(); 551 if (!Reg || Reg == ARM::CPSR) 552 continue; 553 if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) 554 continue; 555 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 556 return false; 557 } else if (MO.isImm() && 558 !TID.OpInfo[i].isPredicate()) { 559 if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0) 560 return false; 561 } 562 } 563 564 // Check if it's possible / necessary to transfer the predicate. 565 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); 566 unsigned PredReg = 0; 567 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 568 bool SkipPred = false; 569 if (Pred != ARMCC::AL) { 570 if (!NewTID.isPredicable()) 571 // Can't transfer predicate, fail. 572 return false; 573 } else { 574 SkipPred = !NewTID.isPredicable(); 575 } 576 577 bool HasCC = false; 578 bool CCDead = false; 579 if (TID.hasOptionalDef()) { 580 unsigned NumOps = TID.getNumOperands(); 581 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 582 if (HasCC && MI->getOperand(NumOps-1).isDead()) 583 CCDead = true; 584 } 585 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 586 return false; 587 588 // Add the 16-bit instruction. 589 DebugLoc dl = MI->getDebugLoc(); 590 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); 591 MIB.addOperand(MI->getOperand(0)); 592 if (NewTID.hasOptionalDef()) { 593 if (HasCC) 594 AddDefaultT1CC(MIB, CCDead); 595 else 596 AddNoT1CC(MIB); 597 } 598 599 // Transfer the rest of operands. 600 unsigned NumOps = TID.getNumOperands(); 601 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 602 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 603 continue; 604 if ((TID.getOpcode() == ARM::t2RSBSri || 605 TID.getOpcode() == ARM::t2RSBri) && i == 2) 606 // Skip the zero immediate operand, it's now implicit. 607 continue; 608 bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); 609 if (SkipPred && isPred) 610 continue; 611 const MachineOperand &MO = MI->getOperand(i); 612 if (Scale > 1 && !isPred && MO.isImm()) 613 MIB.addImm(MO.getImm() / Scale); 614 else { 615 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 616 // Skip implicit def of CPSR. Either it's modeled as an optional 617 // def now or it's already an implicit def on the new instruction. 618 continue; 619 MIB.addOperand(MO); 620 } 621 } 622 if (!TID.isPredicable() && NewTID.isPredicable()) 623 AddDefaultPred(MIB); 624 625 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 626 627 MBB.erase(MI); 628 ++NumNarrows; 629 return true; 630} 631 632static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { 633 bool HasDef = false; 634 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 635 const MachineOperand &MO = MI.getOperand(i); 636 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 637 continue; 638 if (MO.getReg() != ARM::CPSR) 639 continue; 640 if (!MO.isDead()) 641 HasDef = true; 642 } 643 644 return HasDef || LiveCPSR; 645} 646 647static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 648 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 649 const MachineOperand &MO = MI.getOperand(i); 650 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 651 continue; 652 if (MO.getReg() != ARM::CPSR) 653 continue; 654 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 655 if (MO.isKill()) { 656 LiveCPSR = false; 657 break; 658 } 659 } 660 661 return LiveCPSR; 662} 663 664bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 665 bool Modified = false; 666 667 // Yes, CPSR could be livein. 668 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); 669 670 MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); 671 MachineBasicBlock::iterator NextMII; 672 for (; MII != E; MII = NextMII) { 673 NextMII = llvm::next(MII); 674 675 MachineInstr *MI = &*MII; 676 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 677 678 unsigned Opcode = MI->getOpcode(); 679 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 680 if (OPI != ReduceOpcodeMap.end()) { 681 const ReduceEntry &Entry = ReduceTable[OPI->second]; 682 // Ignore "special" cases for now. 683 if (Entry.Special) { 684 if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { 685 Modified = true; 686 MachineBasicBlock::iterator I = prior(NextMII); 687 MI = &*I; 688 } 689 goto ProcessNext; 690 } 691 692 // Try to transform to a 16-bit two-address instruction. 693 if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { 694 Modified = true; 695 MachineBasicBlock::iterator I = prior(NextMII); 696 MI = &*I; 697 goto ProcessNext; 698 } 699 700 // Try to transform to a 16-bit non-two-address instruction. 701 if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { 702 Modified = true; 703 MachineBasicBlock::iterator I = prior(NextMII); 704 MI = &*I; 705 } 706 } 707 708 ProcessNext: 709 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); 710 } 711 712 return Modified; 713} 714 715bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 716 const TargetMachine &TM = MF.getTarget(); 717 TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); 718 719 bool Modified = false; 720 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 721 Modified |= ReduceMBB(*I); 722 return Modified; 723} 724 725/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 726/// reduction pass. 727FunctionPass *llvm::createThumb2SizeReductionPass() { 728 return new Thumb2SizeReduce(); 729} 730