R600EmitClauseMarkers.cpp revision cedcfee405a22b245e869abe8609f094df34085a
1//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold 12/// 128 Alu instructions ; these instructions can access up to 4 prefetched 13/// 4 lines of 16 registers from constant buffers. Such ALU clauses are 14/// initiated by CF_ALU instructions. 15//===----------------------------------------------------------------------===// 16 17#include "AMDGPU.h" 18#include "R600Defines.h" 19#include "R600InstrInfo.h" 20#include "R600MachineFunctionInfo.h" 21#include "R600RegisterInfo.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25 26using namespace llvm; 27 28namespace { 29 30class R600EmitClauseMarkersPass : public MachineFunctionPass { 31 32private: 33 static char ID; 34 const R600InstrInfo *TII; 35 36 unsigned OccupiedDwords(MachineInstr *MI) const { 37 switch (MI->getOpcode()) { 38 case AMDGPU::INTERP_PAIR_XY: 39 case AMDGPU::INTERP_PAIR_ZW: 40 case AMDGPU::INTERP_VEC_LOAD: 41 case AMDGPU::DOT_4: 42 return 4; 43 case AMDGPU::KILL: 44 return 0; 45 default: 46 break; 47 } 48 49 if(TII->isVector(*MI) || 50 TII->isCubeOp(MI->getOpcode()) || 51 TII->isReductionOp(MI->getOpcode())) 52 return 4; 53 54 unsigned NumLiteral = 0; 55 for (MachineInstr::mop_iterator It = MI->operands_begin(), 56 E = MI->operands_end(); It != E; ++It) { 57 MachineOperand &MO = *It; 58 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) 59 ++NumLiteral; 60 } 61 return 1 + NumLiteral; 62 } 63 64 bool isALU(const MachineInstr *MI) const { 65 if (TII->isALUInstr(MI->getOpcode())) 66 return true; 67 if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode())) 68 return true; 69 switch (MI->getOpcode()) { 70 case AMDGPU::PRED_X: 71 case AMDGPU::INTERP_PAIR_XY: 72 case AMDGPU::INTERP_PAIR_ZW: 73 case AMDGPU::INTERP_VEC_LOAD: 74 case AMDGPU::COPY: 75 case AMDGPU::DOT_4: 76 return true; 77 default: 78 return false; 79 } 80 } 81 82 bool IsTrivialInst(MachineInstr *MI) const { 83 switch (MI->getOpcode()) { 84 case AMDGPU::KILL: 85 case AMDGPU::RETURN: 86 return true; 87 default: 88 return false; 89 } 90 } 91 92 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { 93 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 94 // (See also R600ISelLowering.cpp) 95 // ConstIndex value is in [0, 4095]; 96 return std::pair<unsigned, unsigned>( 97 ((Sel >> 2) - 512) >> 12, // KC_BANK 98 // Line Number of ConstIndex 99 // A line contains 16 constant registers however KCX bank can lock 100 // two line at the same time ; thus we want to get an even line number. 101 // Line number can be retrieved with (>>4), using (>>5) <<1 generates 102 // an even number. 103 ((((Sel >> 2) - 512) & 4095) >> 5) << 1); 104 } 105 106 bool SubstituteKCacheBank(MachineInstr *MI, 107 std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const { 108 std::vector<std::pair<unsigned, unsigned> > UsedKCache; 109 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts = 110 TII->getSrcs(MI); 111 assert((TII->isALUInstr(MI->getOpcode()) || 112 MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const"); 113 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 114 if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) 115 continue; 116 unsigned Sel = Consts[i].second; 117 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; 118 unsigned KCacheIndex = Index * 4 + Chan; 119 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); 120 if (CachedConsts.empty()) { 121 CachedConsts.push_back(BankLine); 122 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 123 continue; 124 } 125 if (CachedConsts[0] == BankLine) { 126 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 127 continue; 128 } 129 if (CachedConsts.size() == 1) { 130 CachedConsts.push_back(BankLine); 131 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 132 continue; 133 } 134 if (CachedConsts[1] == BankLine) { 135 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 136 continue; 137 } 138 return false; 139 } 140 141 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { 142 if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) 143 continue; 144 switch(UsedKCache[j].first) { 145 case 0: 146 Consts[i].first->setReg( 147 AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second)); 148 break; 149 case 1: 150 Consts[i].first->setReg( 151 AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second)); 152 break; 153 default: 154 llvm_unreachable("Wrong Cache Line"); 155 } 156 j++; 157 } 158 return true; 159 } 160 161 MachineBasicBlock::iterator 162 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { 163 MachineBasicBlock::iterator ClauseHead = I; 164 std::vector<std::pair<unsigned, unsigned> > KCacheBanks; 165 bool PushBeforeModifier = false; 166 unsigned AluInstCount = 0; 167 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 168 if (IsTrivialInst(I)) 169 continue; 170 if (!isALU(I)) 171 break; 172 if (AluInstCount > TII->getMaxAlusPerClause()) 173 break; 174 if (I->getOpcode() == AMDGPU::PRED_X) { 175 if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) 176 PushBeforeModifier = true; 177 AluInstCount ++; 178 continue; 179 } 180 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: 181 // 182 // * KILL or INTERP instructions 183 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits 184 // * Uses waterfalling (i.e. INDEX_MODE = AR.X) 185 // 186 // XXX: These checks have not been implemented yet. 187 if (TII->mustBeLastInClause(I->getOpcode())) { 188 I++; 189 break; 190 } 191 if (TII->isALUInstr(I->getOpcode()) && 192 !SubstituteKCacheBank(I, KCacheBanks)) 193 break; 194 if (I->getOpcode() == AMDGPU::DOT_4 && 195 !SubstituteKCacheBank(I, KCacheBanks)) 196 break; 197 AluInstCount += OccupiedDwords(I); 198 } 199 unsigned Opcode = PushBeforeModifier ? 200 AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; 201 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) 202 .addImm(0) // ADDR 203 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 204 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 205 .addImm(KCacheBanks.empty()?0:2) // KM0 206 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 207 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 208 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 209 .addImm(AluInstCount); // COUNT 210 return I; 211 } 212 213public: 214 R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), 215 TII(0) { } 216 217 virtual bool runOnMachineFunction(MachineFunction &MF) { 218 TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); 219 220 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 221 BB != BB_E; ++BB) { 222 MachineBasicBlock &MBB = *BB; 223 MachineBasicBlock::iterator I = MBB.begin(); 224 if (I->getOpcode() == AMDGPU::CF_ALU) 225 continue; // BB was already parsed 226 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { 227 if (isALU(I)) 228 I = MakeALUClause(MBB, I); 229 else 230 ++I; 231 } 232 } 233 return false; 234 } 235 236 const char *getPassName() const { 237 return "R600 Emit Clause Markers Pass"; 238 } 239}; 240 241char R600EmitClauseMarkersPass::ID = 0; 242 243} // end anonymous namespace 244 245 246llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) { 247 return new R600EmitClauseMarkersPass(TM); 248} 249 250