Thumb2SizeReduction.cpp revision 7896c9f436a4eda5ec15e882a7505ba482a2fcd0
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#define DEBUG_TYPE "t2-reduce-size" 11#include "ARM.h" 12#include "ARMAddressingModes.h" 13#include "ARMBaseRegisterInfo.h" 14#include "ARMBaseInstrInfo.h" 15#include "Thumb2InstrInfo.h" 16#include "llvm/CodeGen/MachineInstr.h" 17#include "llvm/CodeGen/MachineInstrBuilder.h" 18#include "llvm/CodeGen/MachineFunctionPass.h" 19#include "llvm/Support/CommandLine.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Support/raw_ostream.h" 22#include "llvm/ADT/DenseMap.h" 23#include "llvm/ADT/Statistic.h" 24using namespace llvm; 25 26STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); 27STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); 28STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); 29 30static cl::opt<int> ReduceLimit("t2-reduce-limit", 31 cl::init(-1), cl::Hidden); 32static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2", 33 cl::init(-1), cl::Hidden); 34static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", 35 cl::init(-1), cl::Hidden); 36 37namespace { 38 /// ReduceTable - A static table with information on mapping from wide 39 /// opcodes to narrow 40 struct ReduceEntry { 41 unsigned WideOpc; // Wide opcode 42 unsigned NarrowOpc1; // Narrow opcode to transform to 43 unsigned NarrowOpc2; // Narrow opcode when it's two-address 44 uint8_t Imm1Limit; // Limit of immediate field (bits) 45 uint8_t Imm2Limit; // Limit of immediate field when it's two-address 46 unsigned LowRegs1 : 1; // Only possible if low-registers are used 47 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) 48 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. 49 // 1 - No cc field. 50 // 2 - Always set CPSR. 51 unsigned PredCC2 : 2; 52 unsigned Special : 1; // Needs to be dealt with specially 53 }; 54 55 static const ReduceEntry ReduceTable[] = { 56 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S 57 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, 58 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, 59 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, 60 // Note: immediate scale is 4. 61 { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 }, 62 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, 63 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, 64 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, 65 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, 66 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, 67 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, 68 { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, 69 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, 70 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 }, 71 { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 }, 72 { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 }, 73 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, 74 // FIXME: adr.n immediate offset must be multiple of 4. 75 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, 76 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, 77 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, 78 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, 79 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, 80 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, 81 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, 82 // FIXME: Do we need the 16-bit 'S' variant? 83 { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, 84 { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, 85 { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 0, 0,1, 0 }, 86 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, 87 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, 88 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, 89 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, 90 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, 91 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, 92 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, 93 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, 94 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, 95 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, 96 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, 97 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, 98 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, 99 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, 100 { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 101 { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 102 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, 103 { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, 104 { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, 105 106 // FIXME: Clean this up after splitting each Thumb load / store opcode 107 // into multiple ones. 108 { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, 109 { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, 110 { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, 111 { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, 112 { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 }, 113 { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, 114 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, 115 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, 116 { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, 117 { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, 118 { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, 119 { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, 120 { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 }, 121 { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }, 122 123 { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, 124 { ARM::t2LDM, ARM::tLDM, ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, 125 { ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, 126 }; 127 128 class Thumb2SizeReduce : public MachineFunctionPass { 129 public: 130 static char ID; 131 Thumb2SizeReduce(); 132 133 const Thumb2InstrInfo *TII; 134 135 virtual bool runOnMachineFunction(MachineFunction &MF); 136 137 virtual const char *getPassName() const { 138 return "Thumb2 instruction size reduction pass"; 139 } 140 141 private: 142 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. 143 DenseMap<unsigned, unsigned> ReduceOpcodeMap; 144 145 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 146 bool is2Addr, ARMCC::CondCodes Pred, 147 bool LiveCPSR, bool &HasCC, bool &CCDead); 148 149 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 150 const ReduceEntry &Entry); 151 152 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 153 const ReduceEntry &Entry, bool LiveCPSR); 154 155 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address 156 /// instruction. 157 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 158 const ReduceEntry &Entry, 159 bool LiveCPSR); 160 161 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit 162 /// non-two-address instruction. 163 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 164 const ReduceEntry &Entry, 165 bool LiveCPSR); 166 167 /// ReduceMBB - Reduce width of instructions in the specified basic block. 168 bool ReduceMBB(MachineBasicBlock &MBB); 169 }; 170 char Thumb2SizeReduce::ID = 0; 171} 172 173Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { 174 for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { 175 unsigned FromOpc = ReduceTable[i].WideOpc; 176 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) 177 assert(false && "Duplicated entries?"); 178 } 179} 180 181static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { 182 for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs) 183 if (*Regs == ARM::CPSR) 184 return true; 185 return false; 186} 187 188bool 189Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, 190 bool is2Addr, ARMCC::CondCodes Pred, 191 bool LiveCPSR, bool &HasCC, bool &CCDead) { 192 if ((is2Addr && Entry.PredCC2 == 0) || 193 (!is2Addr && Entry.PredCC1 == 0)) { 194 if (Pred == ARMCC::AL) { 195 // Not predicated, must set CPSR. 196 if (!HasCC) { 197 // Original instruction was not setting CPSR, but CPSR is not 198 // currently live anyway. It's ok to set it. The CPSR def is 199 // dead though. 200 if (!LiveCPSR) { 201 HasCC = true; 202 CCDead = true; 203 return true; 204 } 205 return false; 206 } 207 } else { 208 // Predicated, must not set CPSR. 209 if (HasCC) 210 return false; 211 } 212 } else if ((is2Addr && Entry.PredCC2 == 2) || 213 (!is2Addr && Entry.PredCC1 == 2)) { 214 /// Old opcode has an optional def of CPSR. 215 if (HasCC) 216 return true; 217 // If both old opcode does not implicit CPSR def, then it's not ok since 218 // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP. 219 if (!HasImplicitCPSRDef(MI->getDesc())) 220 return false; 221 HasCC = true; 222 } else { 223 // 16-bit instruction does not set CPSR. 224 if (HasCC) 225 return false; 226 } 227 228 return true; 229} 230 231static bool VerifyLowRegs(MachineInstr *MI) { 232 unsigned Opc = MI->getOpcode(); 233 bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM); 234 bool isLROk = (Opc == ARM::t2STM); 235 bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); 236 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 237 const MachineOperand &MO = MI->getOperand(i); 238 if (!MO.isReg() || MO.isImplicit()) 239 continue; 240 unsigned Reg = MO.getReg(); 241 if (Reg == 0 || Reg == ARM::CPSR) 242 continue; 243 if (isPCOk && Reg == ARM::PC) 244 continue; 245 if (isLROk && Reg == ARM::LR) 246 continue; 247 if (Reg == ARM::SP) { 248 if (isSPOk) 249 continue; 250 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) 251 // Special case for these ldr / str with sp as base register. 252 continue; 253 } 254 if (!isARMLowRegister(Reg)) 255 return false; 256 } 257 return true; 258} 259 260bool 261Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, 262 const ReduceEntry &Entry) { 263 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) 264 return false; 265 266 unsigned Scale = 1; 267 bool HasImmOffset = false; 268 bool HasShift = false; 269 bool HasOffReg = true; 270 bool isLdStMul = false; 271 unsigned Opc = Entry.NarrowOpc1; 272 unsigned OpNum = 3; // First 'rest' of operands. 273 uint8_t ImmLimit = Entry.Imm1Limit; 274 switch (Entry.WideOpc) { 275 default: 276 llvm_unreachable("Unexpected Thumb2 load / store opcode!"); 277 case ARM::t2LDRi12: 278 case ARM::t2STRi12: { 279 unsigned BaseReg = MI->getOperand(1).getReg(); 280 if (BaseReg == ARM::SP) { 281 Opc = Entry.NarrowOpc2; 282 ImmLimit = Entry.Imm2Limit; 283 HasOffReg = false; 284 } 285 Scale = 4; 286 HasImmOffset = true; 287 break; 288 } 289 case ARM::t2LDRBi12: 290 case ARM::t2STRBi12: 291 HasImmOffset = true; 292 break; 293 case ARM::t2LDRHi12: 294 case ARM::t2STRHi12: 295 Scale = 2; 296 HasImmOffset = true; 297 break; 298 case ARM::t2LDRs: 299 case ARM::t2LDRBs: 300 case ARM::t2LDRHs: 301 case ARM::t2LDRSBs: 302 case ARM::t2LDRSHs: 303 case ARM::t2STRs: 304 case ARM::t2STRBs: 305 case ARM::t2STRHs: 306 HasShift = true; 307 OpNum = 4; 308 break; 309 case ARM::t2LDM_RET: 310 case ARM::t2LDM: 311 case ARM::t2STM: { 312 OpNum = 0; 313 unsigned BaseReg = MI->getOperand(0).getReg(); 314 unsigned Mode = MI->getOperand(1).getImm(); 315 if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) { 316 Opc = Entry.NarrowOpc2; 317 OpNum = 2; 318 } else if (Entry.WideOpc == ARM::t2LDM_RET || 319 !isARMLowRegister(BaseReg) || 320 !ARM_AM::getAM4WBFlag(Mode) || 321 ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) { 322 return false; 323 } 324 isLdStMul = true; 325 break; 326 } 327 } 328 329 unsigned OffsetReg = 0; 330 bool OffsetKill = false; 331 if (HasShift) { 332 OffsetReg = MI->getOperand(2).getReg(); 333 OffsetKill = MI->getOperand(2).isKill(); 334 if (MI->getOperand(3).getImm()) 335 // Thumb1 addressing mode doesn't support shift. 336 return false; 337 } 338 339 unsigned OffsetImm = 0; 340 if (HasImmOffset) { 341 OffsetImm = MI->getOperand(2).getImm(); 342 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; 343 if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) 344 // Make sure the immediate field fits. 345 return false; 346 } 347 348 // Add the 16-bit load / store instruction. 349 // FIXME: Thumb1 addressing mode encode both immediate and register offset. 350 DebugLoc dl = MI->getDebugLoc(); 351 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); 352 if (!isLdStMul) { 353 MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); 354 if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) { 355 // tLDRSB and tLDRSH do not have an immediate offset field. On the other 356 // hand, it must have an offset register. 357 // FIXME: Remove this special case. 358 MIB.addImm(OffsetImm/Scale); 359 } 360 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); 361 362 if (HasOffReg) 363 MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); 364 } 365 366 // Transfer the rest of operands. 367 for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) 368 MIB.addOperand(MI->getOperand(OpNum)); 369 370 // Transfer memoperands. 371 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 372 373 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 374 375 MBB.erase(MI); 376 ++NumLdSts; 377 return true; 378} 379 380bool 381Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, 382 const ReduceEntry &Entry, 383 bool LiveCPSR) { 384 if (Entry.LowRegs1 && !VerifyLowRegs(MI)) 385 return false; 386 387 const TargetInstrDesc &TID = MI->getDesc(); 388 if (TID.mayLoad() || TID.mayStore()) 389 return ReduceLoadStore(MBB, MI, Entry); 390 391 unsigned Opc = MI->getOpcode(); 392 switch (Opc) { 393 default: break; 394 case ARM::t2ADDSri: 395 case ARM::t2ADDSrr: { 396 unsigned PredReg = 0; 397 if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { 398 switch (Opc) { 399 default: break; 400 case ARM::t2ADDSri: { 401 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) 402 return true; 403 // fallthrough 404 } 405 case ARM::t2ADDSrr: 406 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 407 } 408 } 409 break; 410 } 411 case ARM::t2RSBri: 412 case ARM::t2RSBSri: 413 if (MI->getOperand(2).getImm() == 0) 414 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 415 break; 416 case ARM::t2MOVi16: 417 // Can convert only 'pure' immediate operands, not immediates obtained as 418 // globals' addresses. 419 if (MI->getOperand(1).isImm()) 420 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); 421 break; 422 } 423 return false; 424} 425 426bool 427Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, 428 const ReduceEntry &Entry, 429 bool LiveCPSR) { 430 431 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) 432 return false; 433 434 const TargetInstrDesc &TID = MI->getDesc(); 435 unsigned Reg0 = MI->getOperand(0).getReg(); 436 unsigned Reg1 = MI->getOperand(1).getReg(); 437 if (Reg0 != Reg1) 438 return false; 439 if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) 440 return false; 441 if (Entry.Imm2Limit) { 442 unsigned Imm = MI->getOperand(2).getImm(); 443 unsigned Limit = (1 << Entry.Imm2Limit) - 1; 444 if (Imm > Limit) 445 return false; 446 } else { 447 unsigned Reg2 = MI->getOperand(2).getReg(); 448 if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) 449 return false; 450 } 451 452 // Check if it's possible / necessary to transfer the predicate. 453 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); 454 unsigned PredReg = 0; 455 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 456 bool SkipPred = false; 457 if (Pred != ARMCC::AL) { 458 if (!NewTID.isPredicable()) 459 // Can't transfer predicate, fail. 460 return false; 461 } else { 462 SkipPred = !NewTID.isPredicable(); 463 } 464 465 bool HasCC = false; 466 bool CCDead = false; 467 if (TID.hasOptionalDef()) { 468 unsigned NumOps = TID.getNumOperands(); 469 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 470 if (HasCC && MI->getOperand(NumOps-1).isDead()) 471 CCDead = true; 472 } 473 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) 474 return false; 475 476 // Add the 16-bit instruction. 477 DebugLoc dl = MI->getDebugLoc(); 478 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); 479 MIB.addOperand(MI->getOperand(0)); 480 if (NewTID.hasOptionalDef()) { 481 if (HasCC) 482 AddDefaultT1CC(MIB, CCDead); 483 else 484 AddNoT1CC(MIB); 485 } 486 487 // Transfer the rest of operands. 488 unsigned NumOps = TID.getNumOperands(); 489 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 490 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 491 continue; 492 if (SkipPred && TID.OpInfo[i].isPredicate()) 493 continue; 494 MIB.addOperand(MI->getOperand(i)); 495 } 496 497 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 498 499 MBB.erase(MI); 500 ++Num2Addrs; 501 return true; 502} 503 504bool 505Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, 506 const ReduceEntry &Entry, 507 bool LiveCPSR) { 508 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) 509 return false; 510 511 unsigned Limit = ~0U; 512 unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; 513 if (Entry.Imm1Limit) 514 Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; 515 516 const TargetInstrDesc &TID = MI->getDesc(); 517 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { 518 if (TID.OpInfo[i].isPredicate()) 519 continue; 520 const MachineOperand &MO = MI->getOperand(i); 521 if (MO.isReg()) { 522 unsigned Reg = MO.getReg(); 523 if (!Reg || Reg == ARM::CPSR) 524 continue; 525 if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) 526 continue; 527 if (Entry.LowRegs1 && !isARMLowRegister(Reg)) 528 return false; 529 } else if (MO.isImm() && 530 !TID.OpInfo[i].isPredicate()) { 531 if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0) 532 return false; 533 } 534 } 535 536 // Check if it's possible / necessary to transfer the predicate. 537 const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); 538 unsigned PredReg = 0; 539 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 540 bool SkipPred = false; 541 if (Pred != ARMCC::AL) { 542 if (!NewTID.isPredicable()) 543 // Can't transfer predicate, fail. 544 return false; 545 } else { 546 SkipPred = !NewTID.isPredicable(); 547 } 548 549 bool HasCC = false; 550 bool CCDead = false; 551 if (TID.hasOptionalDef()) { 552 unsigned NumOps = TID.getNumOperands(); 553 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); 554 if (HasCC && MI->getOperand(NumOps-1).isDead()) 555 CCDead = true; 556 } 557 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) 558 return false; 559 560 // Add the 16-bit instruction. 561 DebugLoc dl = MI->getDebugLoc(); 562 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); 563 MIB.addOperand(MI->getOperand(0)); 564 if (NewTID.hasOptionalDef()) { 565 if (HasCC) 566 AddDefaultT1CC(MIB, CCDead); 567 else 568 AddNoT1CC(MIB); 569 } 570 571 // Transfer the rest of operands. 572 unsigned NumOps = TID.getNumOperands(); 573 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 574 if (i < NumOps && TID.OpInfo[i].isOptionalDef()) 575 continue; 576 if ((TID.getOpcode() == ARM::t2RSBSri || 577 TID.getOpcode() == ARM::t2RSBri) && i == 2) 578 // Skip the zero immediate operand, it's now implicit. 579 continue; 580 bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); 581 if (SkipPred && isPred) 582 continue; 583 const MachineOperand &MO = MI->getOperand(i); 584 if (Scale > 1 && !isPred && MO.isImm()) 585 MIB.addImm(MO.getImm() / Scale); 586 else { 587 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) 588 // Skip implicit def of CPSR. Either it's modeled as an optional 589 // def now or it's already an implicit def on the new instruction. 590 continue; 591 MIB.addOperand(MO); 592 } 593 } 594 if (!TID.isPredicable() && NewTID.isPredicable()) 595 AddDefaultPred(MIB); 596 597 DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); 598 599 MBB.erase(MI); 600 ++NumNarrows; 601 return true; 602} 603 604static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { 605 bool HasDef = false; 606 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 607 const MachineOperand &MO = MI.getOperand(i); 608 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 609 continue; 610 if (MO.getReg() != ARM::CPSR) 611 continue; 612 if (!MO.isDead()) 613 HasDef = true; 614 } 615 616 return HasDef || LiveCPSR; 617} 618 619static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { 620 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 621 const MachineOperand &MO = MI.getOperand(i); 622 if (!MO.isReg() || MO.isUndef() || MO.isDef()) 623 continue; 624 if (MO.getReg() != ARM::CPSR) 625 continue; 626 assert(LiveCPSR && "CPSR liveness tracking is wrong!"); 627 if (MO.isKill()) { 628 LiveCPSR = false; 629 break; 630 } 631 } 632 633 return LiveCPSR; 634} 635 636bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { 637 bool Modified = false; 638 639 bool LiveCPSR = false; 640 // Yes, CPSR could be livein. 641 for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), 642 E = MBB.livein_end(); I != E; ++I) { 643 if (*I == ARM::CPSR) { 644 LiveCPSR = true; 645 break; 646 } 647 } 648 649 MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); 650 MachineBasicBlock::iterator NextMII; 651 for (; MII != E; MII = NextMII) { 652 NextMII = llvm::next(MII); 653 654 MachineInstr *MI = &*MII; 655 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); 656 657 unsigned Opcode = MI->getOpcode(); 658 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); 659 if (OPI != ReduceOpcodeMap.end()) { 660 const ReduceEntry &Entry = ReduceTable[OPI->second]; 661 // Ignore "special" cases for now. 662 if (Entry.Special) { 663 if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { 664 Modified = true; 665 MachineBasicBlock::iterator I = prior(NextMII); 666 MI = &*I; 667 } 668 goto ProcessNext; 669 } 670 671 // Try to transform to a 16-bit two-address instruction. 672 if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { 673 Modified = true; 674 MachineBasicBlock::iterator I = prior(NextMII); 675 MI = &*I; 676 goto ProcessNext; 677 } 678 679 // Try to transform ro a 16-bit non-two-address instruction. 680 if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { 681 Modified = true; 682 MachineBasicBlock::iterator I = prior(NextMII); 683 MI = &*I; 684 } 685 } 686 687 ProcessNext: 688 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); 689 } 690 691 return Modified; 692} 693 694bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { 695 const TargetMachine &TM = MF.getTarget(); 696 TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); 697 698 bool Modified = false; 699 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 700 Modified |= ReduceMBB(*I); 701 return Modified; 702} 703 704/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size 705/// reduction pass. 706FunctionPass *llvm::createThumb2SizeReductionPass() { 707 return new Thumb2SizeReduce(); 708} 709