1//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8/// \file 9//===----------------------------------------------------------------------===// 10// 11 12#include "AMDGPU.h" 13#include "AMDGPUSubtarget.h" 14#include "SIInstrInfo.h" 15#include "llvm/CodeGen/LiveIntervalAnalysis.h" 16#include "llvm/CodeGen/MachineDominators.h" 17#include "llvm/CodeGen/MachineFunctionPass.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/IR/Function.h" 21#include "llvm/IR/LLVMContext.h" 22#include "llvm/Support/Debug.h" 23#include "llvm/Support/raw_ostream.h" 24#include "llvm/Target/TargetMachine.h" 25 26#define DEBUG_TYPE "si-fold-operands" 27using namespace llvm; 28 29namespace { 30 31class SIFoldOperands : public MachineFunctionPass { 32public: 33 static char ID; 34 35public: 36 SIFoldOperands() : MachineFunctionPass(ID) { 37 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 const char *getPassName() const override { 43 return "SI Fold Operands"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.addRequired<MachineDominatorTree>(); 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51}; 52 53struct FoldCandidate { 54 MachineInstr *UseMI; 55 unsigned UseOpNo; 56 MachineOperand *OpToFold; 57 uint64_t ImmToFold; 58 59 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : 60 UseMI(MI), UseOpNo(OpNo) { 61 62 if (FoldOp->isImm()) { 63 OpToFold = nullptr; 64 ImmToFold = FoldOp->getImm(); 65 } else { 66 assert(FoldOp->isReg()); 67 OpToFold = FoldOp; 68 } 69 } 70 71 bool isImm() const { 72 return !OpToFold; 73 } 74}; 75 76} // End anonymous namespace. 77 78INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, 79 "SI Fold Operands", false, false) 80INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 81INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, 82 "SI Fold Operands", false, false) 83 84char SIFoldOperands::ID = 0; 85 86char &llvm::SIFoldOperandsID = SIFoldOperands::ID; 87 88FunctionPass *llvm::createSIFoldOperandsPass() { 89 return new SIFoldOperands(); 90} 91 92static bool isSafeToFold(unsigned Opcode) { 93 switch(Opcode) { 94 case AMDGPU::V_MOV_B32_e32: 95 case AMDGPU::V_MOV_B32_e64: 96 case AMDGPU::V_MOV_B64_PSEUDO: 97 case AMDGPU::S_MOV_B32: 98 case AMDGPU::S_MOV_B64: 99 case AMDGPU::COPY: 100 return true; 101 default: 102 return false; 103 } 104} 105 106static bool updateOperand(FoldCandidate &Fold, 107 const TargetRegisterInfo &TRI) { 108 MachineInstr *MI = Fold.UseMI; 109 MachineOperand &Old = MI->getOperand(Fold.UseOpNo); 110 assert(Old.isReg()); 111 112 if (Fold.isImm()) { 113 Old.ChangeToImmediate(Fold.ImmToFold); 114 return true; 115 } 116 117 MachineOperand *New = Fold.OpToFold; 118 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && 119 TargetRegisterInfo::isVirtualRegister(New->getReg())) { 120 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); 121 return true; 122 } 123 124 // FIXME: Handle physical registers. 125 126 return false; 127} 128 129static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, 130 MachineInstr *MI, unsigned OpNo, 131 MachineOperand *OpToFold, 132 const SIInstrInfo *TII) { 133 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { 134 // Operand is not legal, so try to commute the instruction to 135 // see if this makes it possible to fold. 136 unsigned CommuteIdx0; 137 unsigned CommuteIdx1; 138 bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); 139 140 if (CanCommute) { 141 if (CommuteIdx0 == OpNo) 142 OpNo = CommuteIdx1; 143 else if (CommuteIdx1 == OpNo) 144 OpNo = CommuteIdx0; 145 } 146 147 if (!CanCommute || !TII->commuteInstruction(MI)) 148 return false; 149 150 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) 151 return false; 152 } 153 154 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 155 return true; 156} 157 158bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { 159 MachineRegisterInfo &MRI = MF.getRegInfo(); 160 const SIInstrInfo *TII = 161 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 162 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 163 164 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 165 BI != BE; ++BI) { 166 167 MachineBasicBlock &MBB = *BI; 168 MachineBasicBlock::iterator I, Next; 169 for (I = MBB.begin(); I != MBB.end(); I = Next) { 170 Next = std::next(I); 171 MachineInstr &MI = *I; 172 173 if (!isSafeToFold(MI.getOpcode())) 174 continue; 175 176 unsigned OpSize = TII->getOpSize(MI, 1); 177 MachineOperand &OpToFold = MI.getOperand(1); 178 bool FoldingImm = OpToFold.isImm(); 179 180 // FIXME: We could also be folding things like FrameIndexes and 181 // TargetIndexes. 182 if (!FoldingImm && !OpToFold.isReg()) 183 continue; 184 185 // Folding immediates with more than one use will increase program size. 186 // FIXME: This will also reduce register usage, which may be better 187 // in some cases. A better heuristic is needed. 188 if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && 189 !MRI.hasOneUse(MI.getOperand(0).getReg())) 190 continue; 191 192 // FIXME: Fold operands with subregs. 193 if (OpToFold.isReg() && 194 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || 195 OpToFold.getSubReg())) 196 continue; 197 198 std::vector<FoldCandidate> FoldList; 199 for (MachineRegisterInfo::use_iterator 200 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); 201 Use != E; ++Use) { 202 203 MachineInstr *UseMI = Use->getParent(); 204 const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); 205 206 // FIXME: Fold operands with subregs. 207 if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || 208 UseOp.isImplicit())) { 209 continue; 210 } 211 212 APInt Imm; 213 214 if (FoldingImm) { 215 unsigned UseReg = UseOp.getReg(); 216 const TargetRegisterClass *UseRC 217 = TargetRegisterInfo::isVirtualRegister(UseReg) ? 218 MRI.getRegClass(UseReg) : 219 TRI.getRegClass(UseReg); 220 221 Imm = APInt(64, OpToFold.getImm()); 222 223 // Split 64-bit constants into 32-bits for folding. 224 if (UseOp.getSubReg()) { 225 if (UseRC->getSize() != 8) 226 continue; 227 228 if (UseOp.getSubReg() == AMDGPU::sub0) { 229 Imm = Imm.getLoBits(32); 230 } else { 231 assert(UseOp.getSubReg() == AMDGPU::sub1); 232 Imm = Imm.getHiBits(32); 233 } 234 } 235 236 // In order to fold immediates into copies, we need to change the 237 // copy to a MOV. 238 if (UseMI->getOpcode() == AMDGPU::COPY) { 239 unsigned DestReg = UseMI->getOperand(0).getReg(); 240 const TargetRegisterClass *DestRC 241 = TargetRegisterInfo::isVirtualRegister(DestReg) ? 242 MRI.getRegClass(DestReg) : 243 TRI.getRegClass(DestReg); 244 245 unsigned MovOp = TII->getMovOpcode(DestRC); 246 if (MovOp == AMDGPU::COPY) 247 continue; 248 249 UseMI->setDesc(TII->get(MovOp)); 250 } 251 } 252 253 const MCInstrDesc &UseDesc = UseMI->getDesc(); 254 255 // Don't fold into target independent nodes. Target independent opcodes 256 // don't have defined register classes. 257 if (UseDesc.isVariadic() || 258 UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) 259 continue; 260 261 if (FoldingImm) { 262 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); 263 tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); 264 continue; 265 } 266 267 tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); 268 269 // FIXME: We could try to change the instruction from 64-bit to 32-bit 270 // to enable more folding opportunites. The shrink operands pass 271 // already does this. 272 } 273 274 for (FoldCandidate &Fold : FoldList) { 275 if (updateOperand(Fold, TRI)) { 276 // Clear kill flags. 277 if (!Fold.isImm()) { 278 assert(Fold.OpToFold && Fold.OpToFold->isReg()); 279 Fold.OpToFold->setIsKill(false); 280 } 281 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << 282 Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); 283 } 284 } 285 } 286 } 287 return false; 288} 289