1//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// Copies from VGPR to SGPR registers are illegal and the register coalescer 12/// will sometimes generate these illegal copies in situations like this: 13/// 14/// Register Class <vsrc> is the union of <vgpr> and <sgpr> 15/// 16/// BB0: 17/// %vreg0 <sgpr> = SCALAR_INST 18/// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 19/// ... 20/// BRANCH %cond BB1, BB2 21/// BB1: 22/// %vreg2 <vgpr> = VECTOR_INST 23/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 24/// BB2: 25/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 26/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 27/// 28/// 29/// The coalescer will begin at BB0 and eliminate its copy, then the resulting 30/// code will look like this: 31/// 32/// BB0: 33/// %vreg0 <sgpr> = SCALAR_INST 34/// ... 35/// BRANCH %cond BB1, BB2 36/// BB1: 37/// %vreg2 <vgpr> = VECTOR_INST 38/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 39/// BB2: 40/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 42/// 43/// Now that the result of the PHI instruction is an SGPR, the register 44/// allocator is now forced to constrain the register class of %vreg3 to 45/// <sgpr> so we end up with final code like this: 46/// 47/// BB0: 48/// %vreg0 <sgpr> = SCALAR_INST 49/// ... 50/// BRANCH %cond BB1, BB2 51/// BB1: 52/// %vreg2 <vgpr> = VECTOR_INST 53/// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 54/// BB2: 55/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 56/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 57/// 58/// Now this code contains an illegal copy from a VGPR to an SGPR. 59/// 60/// In order to avoid this problem, this pass searches for PHI instructions 61/// which define a <vsrc> register and constrains its definition class to 62/// <vgpr> if the user of the PHI's definition register is a vector instruction. 63/// If the PHI's definition class is constrained to <vgpr> then the coalescer 64/// will be unable to perform the COPY removal from the above example which 65/// ultimately led to the creation of an illegal COPY. 66//===----------------------------------------------------------------------===// 67 68#include "AMDGPU.h" 69#include "AMDGPUSubtarget.h" 70#include "SIInstrInfo.h" 71#include "llvm/CodeGen/MachineFunctionPass.h" 72#include "llvm/CodeGen/MachineInstrBuilder.h" 73#include "llvm/CodeGen/MachineRegisterInfo.h" 74#include "llvm/Support/Debug.h" 75#include "llvm/Support/raw_ostream.h" 76#include "llvm/Target/TargetMachine.h" 77 78using namespace llvm; 79 80#define DEBUG_TYPE "sgpr-copies" 81 82namespace { 83 84class SIFixSGPRCopies : public MachineFunctionPass { 85public: 86 static char ID; 87 88 SIFixSGPRCopies() : MachineFunctionPass(ID) { } 89 90 bool runOnMachineFunction(MachineFunction &MF) override; 91 92 const char *getPassName() const override { 93 return "SI Fix SGPR copies"; 94 } 95 96 void getAnalysisUsage(AnalysisUsage &AU) const override { 97 AU.setPreservesCFG(); 98 MachineFunctionPass::getAnalysisUsage(AU); 99 } 100}; 101 102} // End anonymous namespace 103 104INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE, 105 "SI Fix SGPR copies", false, false) 106 107char SIFixSGPRCopies::ID = 0; 108 109char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; 110 111FunctionPass *llvm::createSIFixSGPRCopiesPass() { 112 return new SIFixSGPRCopies(); 113} 114 115static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 116 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 117 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 118 if (!MI.getOperand(i).isReg() || 119 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 120 continue; 121 122 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 123 return true; 124 } 125 return false; 126} 127 128static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 129getCopyRegClasses(const MachineInstr &Copy, 130 const SIRegisterInfo &TRI, 131 const MachineRegisterInfo &MRI) { 132 unsigned DstReg = Copy.getOperand(0).getReg(); 133 unsigned SrcReg = Copy.getOperand(1).getReg(); 134 135 const TargetRegisterClass *SrcRC = 136 TargetRegisterInfo::isVirtualRegister(SrcReg) ? 137 MRI.getRegClass(SrcReg) : 138 TRI.getPhysRegClass(SrcReg); 139 140 // We don't really care about the subregister here. 141 // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); 142 143 const TargetRegisterClass *DstRC = 144 TargetRegisterInfo::isVirtualRegister(DstReg) ? 145 MRI.getRegClass(DstReg) : 146 TRI.getPhysRegClass(DstReg); 147 148 return std::make_pair(SrcRC, DstRC); 149} 150 151static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, 152 const TargetRegisterClass *DstRC, 153 const SIRegisterInfo &TRI) { 154 return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC); 155} 156 157static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, 158 const TargetRegisterClass *DstRC, 159 const SIRegisterInfo &TRI) { 160 return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); 161} 162 163// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. 164// 165// SGPRx = ... 166// SGPRy = REG_SEQUENCE SGPRx, sub0 ... 167// VGPRz = COPY SGPRy 168// 169// ==> 170// 171// VGPRx = COPY SGPRx 172// VGPRz = REG_SEQUENCE VGPRx, sub0 173// 174// This exposes immediate folding opportunities when materializing 64-bit 175// immediates. 176static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, 177 const SIRegisterInfo *TRI, 178 const SIInstrInfo *TII, 179 MachineRegisterInfo &MRI) { 180 assert(MI.isRegSequence()); 181 182 unsigned DstReg = MI.getOperand(0).getReg(); 183 if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) 184 return false; 185 186 if (!MRI.hasOneUse(DstReg)) 187 return false; 188 189 MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); 190 if (!CopyUse.isCopy()) 191 return false; 192 193 const TargetRegisterClass *SrcRC, *DstRC; 194 std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); 195 196 if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) 197 return false; 198 199 // TODO: Could have multiple extracts? 200 unsigned SubReg = CopyUse.getOperand(1).getSubReg(); 201 if (SubReg != AMDGPU::NoSubRegister) 202 return false; 203 204 MRI.setRegClass(DstReg, DstRC); 205 206 // SGPRx = ... 207 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 208 // VGPRz = COPY SGPRy 209 210 // => 211 // VGPRx = COPY SGPRx 212 // VGPRz = REG_SEQUENCE VGPRx, sub0 213 214 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); 215 216 for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { 217 unsigned SrcReg = MI.getOperand(I).getReg(); 218 unsigned SrcSubReg = MI.getOperand(I).getReg(); 219 220 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); 221 assert(TRI->isSGPRClass(SrcRC) && 222 "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); 223 224 SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); 225 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); 226 227 unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); 228 229 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) 230 .addOperand(MI.getOperand(I)); 231 232 MI.getOperand(I).setReg(TmpReg); 233 } 234 235 CopyUse.eraseFromParent(); 236 return true; 237} 238 239bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 240 MachineRegisterInfo &MRI = MF.getRegInfo(); 241 const SIRegisterInfo *TRI = 242 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); 243 const SIInstrInfo *TII = 244 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 245 246 SmallVector<MachineInstr *, 16> Worklist; 247 248 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 249 BI != BE; ++BI) { 250 251 MachineBasicBlock &MBB = *BI; 252 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 253 I != E; ++I) { 254 MachineInstr &MI = *I; 255 256 switch (MI.getOpcode()) { 257 default: 258 continue; 259 case AMDGPU::COPY: { 260 // If the destination register is a physical register there isn't really 261 // much we can do to fix this. 262 if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) 263 continue; 264 265 const TargetRegisterClass *SrcRC, *DstRC; 266 std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); 267 if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { 268 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); 269 TII->moveToVALU(MI); 270 } 271 272 break; 273 } 274 case AMDGPU::PHI: { 275 DEBUG(dbgs() << "Fixing PHI: " << MI); 276 unsigned Reg = MI.getOperand(0).getReg(); 277 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 278 break; 279 280 // If a PHI node defines an SGPR and any of its operands are VGPRs, 281 // then we need to move it to the VALU. 282 // 283 // Also, if a PHI node defines an SGPR and has all SGPR operands 284 // we must move it to the VALU, because the SGPR operands will 285 // all end up being assigned the same register, which means 286 // there is a potential for a conflict if different threads take 287 // different control flow paths. 288 // 289 // For Example: 290 // 291 // sgpr0 = def; 292 // ... 293 // sgpr1 = def; 294 // ... 295 // sgpr2 = PHI sgpr0, sgpr1 296 // use sgpr2; 297 // 298 // Will Become: 299 // 300 // sgpr2 = def; 301 // ... 302 // sgpr2 = def; 303 // ... 304 // use sgpr2 305 // 306 // FIXME: This is OK if the branching decision is made based on an 307 // SGPR value. 308 bool SGPRBranch = false; 309 310 // The one exception to this rule is when one of the operands 311 // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK 312 // instruction. In this case, there we know the program will 313 // never enter the second block (the loop) without entering 314 // the first block (where the condition is computed), so there 315 // is no chance for values to be over-written. 316 317 bool HasBreakDef = false; 318 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 319 unsigned Reg = MI.getOperand(i).getReg(); 320 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 321 TII->moveToVALU(MI); 322 break; 323 } 324 MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); 325 assert(DefInstr); 326 switch(DefInstr->getOpcode()) { 327 328 case AMDGPU::SI_BREAK: 329 case AMDGPU::SI_IF_BREAK: 330 case AMDGPU::SI_ELSE_BREAK: 331 // If we see a PHI instruction that defines an SGPR, then that PHI 332 // instruction has already been considered and should have 333 // a *_BREAK as an operand. 334 case AMDGPU::PHI: 335 HasBreakDef = true; 336 break; 337 } 338 } 339 340 if (!SGPRBranch && !HasBreakDef) 341 TII->moveToVALU(MI); 342 break; 343 } 344 case AMDGPU::REG_SEQUENCE: { 345 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 346 !hasVGPROperands(MI, TRI)) { 347 foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); 348 continue; 349 } 350 351 DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); 352 353 TII->moveToVALU(MI); 354 break; 355 } 356 case AMDGPU::INSERT_SUBREG: { 357 const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; 358 DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); 359 Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); 360 Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); 361 if (TRI->isSGPRClass(DstRC) && 362 (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { 363 DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); 364 TII->moveToVALU(MI); 365 } 366 break; 367 } 368 } 369 } 370 } 371 372 return true; 373} 374