1//===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// This pass merges inputs of swizzeable instructions into vector sharing 12/// common data and/or have enough undef subreg using swizzle abilities. 13/// 14/// For instance let's consider the following pseudo code : 15/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16/// ... 17/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19/// 20/// is turned into : 21/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22/// ... 23/// vreg7<def> = INSERT_SUBREG vreg4, sub3 24/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25/// 26/// This allow regalloc to reduce register pressure for vector registers and 27/// to reduce MOV count. 28//===----------------------------------------------------------------------===// 29 30#include "AMDGPU.h" 31#include "AMDGPUSubtarget.h" 32#include "R600Defines.h" 33#include "R600InstrInfo.h" 34#include "llvm/CodeGen/DFAPacketizer.h" 35#include "llvm/CodeGen/MachineDominators.h" 36#include "llvm/CodeGen/MachineFunctionPass.h" 37#include "llvm/CodeGen/MachineInstrBuilder.h" 38#include "llvm/CodeGen/MachineLoopInfo.h" 39#include "llvm/CodeGen/MachineRegisterInfo.h" 40#include "llvm/CodeGen/Passes.h" 41#include "llvm/Support/Debug.h" 42#include "llvm/Support/raw_ostream.h" 43 44using namespace llvm; 45 46#define DEBUG_TYPE "vec-merger" 47 48namespace { 49 50static bool 51isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 52 for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg), 53 E = MRI.def_instr_end(); It != E; ++It) { 54 return (*It).isImplicitDef(); 55 } 56 if (MRI.isReserved(Reg)) { 57 return false; 58 } 59 llvm_unreachable("Reg without a def"); 60 return false; 61} 62 63class RegSeqInfo { 64public: 65 MachineInstr *Instr; 66 DenseMap<unsigned, unsigned> RegToChan; 67 std::vector<unsigned> UndefReg; 68 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 69 assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); 70 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 71 MachineOperand &MO = Instr->getOperand(i); 72 unsigned Chan = Instr->getOperand(i + 1).getImm(); 73 if (isImplicitlyDef(MRI, MO.getReg())) 74 UndefReg.push_back(Chan); 75 else 76 RegToChan[MO.getReg()] = Chan; 77 } 78 } 79 RegSeqInfo() {} 80 81 bool operator==(const RegSeqInfo &RSI) const { 82 return RSI.Instr == Instr; 83 } 84}; 85 86class R600VectorRegMerger : public MachineFunctionPass { 87private: 88 MachineRegisterInfo *MRI; 89 const R600InstrInfo *TII; 90 bool canSwizzle(const MachineInstr &) const; 91 bool areAllUsesSwizzeable(unsigned Reg) const; 92 void SwizzleInput(MachineInstr &, 93 const std::vector<std::pair<unsigned, unsigned> > &) const; 94 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 95 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 96 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 97 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 98 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 99 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 100 MachineInstr *RebuildVector(RegSeqInfo *MI, 101 const RegSeqInfo *BaseVec, 102 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 103 void RemoveMI(MachineInstr *); 104 void trackRSI(const RegSeqInfo &RSI); 105 106 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 107 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 108 InstructionSetMap PreviousRegSeqByReg; 109 InstructionSetMap PreviousRegSeqByUndefCount; 110public: 111 static char ID; 112 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 113 TII(nullptr) { } 114 115 void getAnalysisUsage(AnalysisUsage &AU) const override { 116 AU.setPreservesCFG(); 117 AU.addRequired<MachineDominatorTree>(); 118 AU.addPreserved<MachineDominatorTree>(); 119 AU.addRequired<MachineLoopInfo>(); 120 AU.addPreserved<MachineLoopInfo>(); 121 MachineFunctionPass::getAnalysisUsage(AU); 122 } 123 124 const char *getPassName() const override { 125 return "R600 Vector Registers Merge Pass"; 126 } 127 128 bool runOnMachineFunction(MachineFunction &Fn) override; 129}; 130 131char R600VectorRegMerger::ID = 0; 132 133bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 134 const { 135 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 136 return true; 137 switch (MI.getOpcode()) { 138 case AMDGPU::R600_ExportSwz: 139 case AMDGPU::EG_ExportSwz: 140 return true; 141 default: 142 return false; 143 } 144} 145 146bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 147 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 148 const { 149 unsigned CurrentUndexIdx = 0; 150 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 151 E = ToMerge->RegToChan.end(); It != E; ++It) { 152 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 153 Untouched->RegToChan.find((*It).first); 154 if (PosInUntouched != Untouched->RegToChan.end()) { 155 Remap.push_back(std::pair<unsigned, unsigned> 156 ((*It).second, (*PosInUntouched).second)); 157 continue; 158 } 159 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 160 return false; 161 Remap.push_back(std::pair<unsigned, unsigned> 162 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 163 } 164 165 return true; 166} 167 168static 169unsigned getReassignedChan( 170 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 171 unsigned Chan) { 172 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 173 if (RemapChan[j].first == Chan) 174 return RemapChan[j].second; 175 } 176 llvm_unreachable("Chan wasn't reassigned"); 177} 178 179MachineInstr *R600VectorRegMerger::RebuildVector( 180 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 181 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 182 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 183 MachineBasicBlock::iterator Pos = RSI->Instr; 184 MachineBasicBlock &MBB = *Pos->getParent(); 185 DebugLoc DL = Pos->getDebugLoc(); 186 187 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 188 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 189 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 190 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 191 E = RSI->RegToChan.end(); It != E; ++It) { 192 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 193 unsigned SubReg = (*It).first; 194 unsigned Swizzle = (*It).second; 195 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 196 197 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 198 DstReg) 199 .addReg(SrcVec) 200 .addReg(SubReg) 201 .addImm(Chan); 202 UpdatedRegToChan[SubReg] = Chan; 203 std::vector<unsigned>::iterator ChanPos = 204 std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 205 if (ChanPos != UpdatedUndef.end()) 206 UpdatedUndef.erase(ChanPos); 207 assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 208 UpdatedUndef.end() && 209 "UpdatedUndef shouldn't contain Chan more than once!"); 210 DEBUG(dbgs() << " ->"; Tmp->dump();); 211 (void)Tmp; 212 SrcVec = DstReg; 213 } 214 MachineInstr *NewMI = 215 BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec); 216 DEBUG(dbgs() << " ->"; NewMI->dump();); 217 218 DEBUG(dbgs() << " Updating Swizzle:\n"); 219 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 220 E = MRI->use_instr_end(); It != E; ++It) { 221 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 222 SwizzleInput(*It, RemapChan); 223 DEBUG((*It).dump()); 224 } 225 RSI->Instr->eraseFromParent(); 226 227 // Update RSI 228 RSI->Instr = NewMI; 229 RSI->RegToChan = UpdatedRegToChan; 230 RSI->UndefReg = UpdatedUndef; 231 232 return NewMI; 233} 234 235void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 236 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 237 E = PreviousRegSeqByReg.end(); It != E; ++It) { 238 std::vector<MachineInstr *> &MIs = (*It).second; 239 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 240 } 241 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 242 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 243 std::vector<MachineInstr *> &MIs = (*It).second; 244 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 245 } 246} 247 248void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 249 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 250 unsigned Offset; 251 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 252 Offset = 2; 253 else 254 Offset = 3; 255 for (unsigned i = 0; i < 4; i++) { 256 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 257 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 258 if (RemapChan[j].first == Swizzle) { 259 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 260 break; 261 } 262 } 263 } 264} 265 266bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 267 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 268 E = MRI->use_instr_end(); It != E; ++It) { 269 if (!canSwizzle(*It)) 270 return false; 271 } 272 return true; 273} 274 275bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 276 RegSeqInfo &CompatibleRSI, 277 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 278 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 279 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 280 if (!MOp->isReg()) 281 continue; 282 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 283 continue; 284 for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { 285 CompatibleRSI = PreviousRegSeq[MI]; 286 if (RSI == CompatibleRSI) 287 continue; 288 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 289 return true; 290 } 291 } 292 return false; 293} 294 295bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 296 RegSeqInfo &CompatibleRSI, 297 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 298 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 299 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 300 return false; 301 std::vector<MachineInstr *> &MIs = 302 PreviousRegSeqByUndefCount[NeededUndefs]; 303 CompatibleRSI = PreviousRegSeq[MIs.back()]; 304 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 305 return true; 306} 307 308void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 309 for (DenseMap<unsigned, unsigned>::const_iterator 310 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 311 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 312 } 313 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 314 PreviousRegSeq[RSI.Instr] = RSI; 315} 316 317bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 318 if (skipFunction(*Fn.getFunction())) 319 return false; 320 321 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 322 TII = ST.getInstrInfo(); 323 MRI = &Fn.getRegInfo(); 324 325 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 326 MBB != MBBe; ++MBB) { 327 MachineBasicBlock *MB = &*MBB; 328 PreviousRegSeq.clear(); 329 PreviousRegSeqByReg.clear(); 330 PreviousRegSeqByUndefCount.clear(); 331 332 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 333 MII != MIIE; ++MII) { 334 MachineInstr &MI = *MII; 335 if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) { 336 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 337 unsigned Reg = MI.getOperand(1).getReg(); 338 for (MachineRegisterInfo::def_instr_iterator 339 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 340 It != E; ++It) { 341 RemoveMI(&(*It)); 342 } 343 } 344 continue; 345 } 346 347 RegSeqInfo RSI(*MRI, &MI); 348 349 // All uses of MI are swizzeable ? 350 unsigned Reg = MI.getOperand(0).getReg(); 351 if (!areAllUsesSwizzeable(Reg)) 352 continue; 353 354 DEBUG({ 355 dbgs() << "Trying to optimize "; 356 MI.dump(); 357 }); 358 359 RegSeqInfo CandidateRSI; 360 std::vector<std::pair<unsigned, unsigned> > RemapChan; 361 DEBUG(dbgs() << "Using common slots...\n";); 362 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 363 // Remove CandidateRSI mapping 364 RemoveMI(CandidateRSI.Instr); 365 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 366 trackRSI(RSI); 367 continue; 368 } 369 DEBUG(dbgs() << "Using free slots...\n";); 370 RemapChan.clear(); 371 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 372 RemoveMI(CandidateRSI.Instr); 373 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 374 trackRSI(RSI); 375 continue; 376 } 377 //Failed to merge 378 trackRSI(RSI); 379 } 380 } 381 return false; 382} 383 384} 385 386llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 387 return new R600VectorRegMerger(tm); 388} 389