SIFixSGPRCopies.cpp revision e59daaa2b83ddb7b6c563e69ef9ae5d67d3a8e07
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The LLVM Compiler Infrastructure 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details. 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \file 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// Copies from VGPR to SGPR registers are illegal and the register coalescer 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// will sometimes generate these illegal copies in situations like this: 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// Register Class <vsrc> is the union of <vgpr> and <sgpr> 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB0: 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg0 <sgpr> = SCALAR_INST 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// ... 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BRANCH %cond BB1, BB2 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB1: 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg2 <vgpr> = VECTOR_INST 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB2: 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// The coalescer will begin at BB0 and eliminate its copy, then the resulting 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// code will look like this: 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB0: 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg0 <sgpr> = SCALAR_INST 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// ... 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BRANCH %cond BB1, BB2 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB1: 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg2 <vgpr> = VECTOR_INST 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB2: 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// Now that the result of the PHI instruction is an SGPR, the register 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// allocator is now forced to constrain the register class of %vreg3 to 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// <sgpr> so we end up with final code like this: 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/// 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB0: 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg0 <sgpr> = SCALAR_INST 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// ... 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BRANCH %cond BB1, BB2 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB1: 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg2 <vgpr> = VECTOR_INST 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// BB2: 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// Now this code contains an illegal copy from a VGPR to an SGPR. 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// In order to avoid this problem, this pass searches for PHI instructions 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// which define a <vsrc> register and constrains its definition class to 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// <vgpr> if the user of the PHI's definition register is a vector instruction. 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// If the PHI's definition class is constrained to <vgpr> then the coalescer 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// will be unable to perform the COPY removal from the above example which 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// ultimately led to the creation of an illegal COPY. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "sgpr-copies" 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "AMDGPU.h" 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "SIInstrInfo.h" 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/MachineFunctionPass.h" 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/MachineInstrBuilder.h" 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h" 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/Debug.h" 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetMachine.h" 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace llvm; 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace { 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SIFixSGPRCopies : public MachineFunctionPass { 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)private: 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static char ID; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI, 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg, 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SubReg) const; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI, 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg, 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SubReg) const; 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI) const; 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)public: 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool runOnMachineFunction(MachineFunction &MF); 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const char *getPassName() const { 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return "SI Fix SGPR copies"; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // End anonymous namespace 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)char SIFixSGPRCopies::ID = 0; 1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return new SIFixSGPRCopies(tm); 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!MI.getOperand(i).isReg() || 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// This functions walks the use list of Reg until it finds an Instruction 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// that isn't a COPY returns the register class of that instruction. 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \return The register defined by the first non-COPY instruction. 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SIRegisterInfo *TRI, 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI, 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg, 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SubReg) const { 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The Reg parameter to the function must always be defined by either a PHI 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // or a COPY, therefore it cannot be a physical register. 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert(TargetRegisterInfo::isVirtualRegister(Reg) && 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Reg cannot be a physical register"); 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *RC = MRI.getRegClass(Reg); 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RC = TRI->getSubRegClass(RC, SubReg); 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) E = MRI.use_end(); I != E; ++I) { 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (I->getOpcode()) { 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::COPY: 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) I->getOperand(0).getReg(), 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) I->getOperand(0).getSubReg())); 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RC; 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SIRegisterInfo *TRI, 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI, 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg, 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SubReg) const { 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return TRI->getSubRegClass(RC, SubReg); 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineInstr *Def = MRI.getVRegDef(Reg); 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Def->getOpcode() != AMDGPU::COPY) { 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Def->getOperand(1).getSubReg()); 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SIRegisterInfo *TRI, 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const MachineRegisterInfo &MRI) const { 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned DstReg = Copy.getOperand(0).getReg(); 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SrcReg = Copy.getOperand(1).getReg(); 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DstRC == &AMDGPU::M0RegRegClass) 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *SrcRC = TRI->getSubRegClass( 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI.getRegClass(SrcReg), SrcSubReg); 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return TRI->isSGPRClass(DstRC) && 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !TRI->getCommonSubClass(DstRC, SrcRC); 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineRegisterInfo &MRI = MF.getRegInfo(); 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MF.getTarget().getRegisterInfo()); 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MF.getTarget().getInstrInfo()); 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BI != BE; ++BI) { 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineBasicBlock &MBB = *BI; 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) I != E; ++I) { 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineInstr &MI = *I; 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(MI.print(dbgs())); 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TII->moveToVALU(MI); 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (MI.getOpcode()) { 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: continue; 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::PHI: { 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << " Fixing PHI:\n"); 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(MI.print(dbgs())); 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg = MI.getOperand(i).getReg(); 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg, 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MI.getOperand(0).getSubReg()); 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI.constrainRegClass(Reg, RC); 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg = MI.getOperand(0).getReg(); 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MI.getOperand(0).getSubReg()); 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If a PHI node defines an SGPR and any of its operands are VGPRs, 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // then we need to move it to the VALU. 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned Reg = MI.getOperand(i).getReg(); 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TII->moveToVALU(MI); 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::REG_SEQUENCE: { 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !hasVGPROperands(MI, TRI)) 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << "Fixing REG_SEQUENCE: \n"); 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(MI.print(dbgs())); 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TII->moveToVALU(MI); 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TII->legalizeOperands(&MI); 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)