1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The LLVM Compiler Infrastructure 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details. 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This pass maps the pseudo interpolation registers to the correct physical 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// registers. Prior to executing a fragment shader, the GPU loads interpolation 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// parameters into physical registers. The specific physical register that each 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// interpolation parameter ends up in depends on the type of the interpolation 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// parameter as well as how many interpolation parameters are used by the 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// shader. 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDGPU.h" 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDIL.h" 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "SIMachineFunctionInfo.h" 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunctionPass.h" 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineInstrBuilder.h" 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineRegisterInfo.h" 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm; 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace { 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass SIAssignInterpRegsPass : public MachineFunctionPass { 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org static char ID; 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TargetMachine &TM; 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void AddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned physReg, unsigned virtReg); 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic: 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SIAssignInterpRegsPass(TargetMachine &tm) : 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineFunctionPass(ID), TM(tm) { } 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool runOnMachineFunction(MachineFunction &MF); 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char *getPassName() const { return "SI Assign intrpolation registers"; } 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // End anonymous namespace 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgchar SIAssignInterpRegsPass::ID = 0; 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define INTERP_VALUES 16 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct interp_info { 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool enabled; 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned regs[3]; 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned reg_count; 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return new SIAssignInterpRegsPass(tm); 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct interp_info InterpUse[INTERP_VALUES] = { 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::POS_X_FLOAT}, 1}, 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::POS_Y_FLOAT}, 1}, 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::POS_Z_FLOAT}, 1}, 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::POS_W_FLOAT}, 1}, 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::FRONT_FACE}, 1}, 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::ANCILLARY}, 1}, 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {false, {AMDGPU::POS_FIXED_PT}, 1} 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineRegisterInfo &MRI = MF.getRegInfo(); 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* First pass, mark the interpolation values that are used. */ 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) { 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count; 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg_idx++) { 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org InterpUse[interp_idx].enabled = 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]); 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned used_vgprs = 0; 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Second pass, replace with VGPRs. */ 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) { 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!InterpUse[interp_idx].enabled) { 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MFI->spi_ps_input_addr |= (1 << interp_idx); 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count; 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reg_idx++, used_vgprs++) { 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned new_reg = AMDGPU::VReg_32RegClass.getRegister(used_vgprs); 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned virt_reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg); 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AddLiveIn(&MF, MRI, new_reg, virt_reg); 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SIAssignInterpRegsPass::AddLiveIn(MachineFunction * MF, 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineRegisterInfo & MRI, 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned physReg, unsigned virtReg) 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const TargetInstrInfo * TII = TM.getInstrInfo(); 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!MRI.isLiveIn(physReg)) { 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MRI.addLiveIn(physReg, virtReg); 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MF->front().addLiveIn(physReg); 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(MF->front(), MF->front().begin(), DebugLoc(), 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TII->get(TargetOpcode::COPY), virtReg) 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(physReg); 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 137