1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//                     The LLVM Compiler Infrastructure
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details.
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===//
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This pass maps the pseudo interpolation registers to the correct physical
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// registers.  Prior to executing a fragment shader, the GPU loads interpolation
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// parameters into physical registers.  The specific physical register that each
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// interpolation parameter ends up in depends on the type of the interpolation
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// parameter as well as how many interpolation parameters are used by the
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// shader.
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===//
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDGPU.h"
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDIL.h"
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "SIMachineFunctionInfo.h"
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunctionPass.h"
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineInstrBuilder.h"
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineRegisterInfo.h"
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm;
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace {
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass SIAssignInterpRegsPass : public MachineFunctionPass {
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  static char ID;
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  TargetMachine &TM;
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  void AddLiveIn(MachineFunction * MF,  MachineRegisterInfo & MRI,
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                 unsigned physReg, unsigned virtReg);
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  SIAssignInterpRegsPass(TargetMachine &tm) :
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    MachineFunctionPass(ID), TM(tm) { }
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  virtual bool runOnMachineFunction(MachineFunction &MF);
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  const char *getPassName() const { return "SI Assign intrpolation registers"; }
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // End anonymous namespace
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgchar SIAssignInterpRegsPass::ID = 0;
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define INTERP_VALUES 16
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct interp_info {
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  bool enabled;
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned regs[3];
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned reg_count;
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return new SIAssignInterpRegsPass(tm);
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  struct interp_info InterpUse[INTERP_VALUES] = {
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::POS_X_FLOAT}, 1},
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::POS_Y_FLOAT}, 1},
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::POS_Z_FLOAT}, 1},
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::POS_W_FLOAT}, 1},
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::FRONT_FACE}, 1},
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::ANCILLARY}, 1},
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    {false, {AMDGPU::POS_FIXED_PT}, 1}
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  };
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  MachineRegisterInfo &MRI = MF.getRegInfo();
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  /* First pass, mark the interpolation values that are used. */
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                                               reg_idx++) {
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      InterpUse[interp_idx].enabled =
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                            !MRI.use_empty(InterpUse[interp_idx].regs[reg_idx]);
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  unsigned used_vgprs = 0;
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  /* Second pass, replace with VGPRs. */
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (unsigned interp_idx = 0; interp_idx < INTERP_VALUES; interp_idx++) {
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!InterpUse[interp_idx].enabled) {
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      continue;
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    MFI->spi_ps_input_addr |= (1 << interp_idx);
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    for (unsigned reg_idx = 0; reg_idx < InterpUse[interp_idx].reg_count;
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                                  reg_idx++, used_vgprs++) {
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned new_reg = AMDGPU::VReg_32RegClass.getRegister(used_vgprs);
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned virt_reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      MRI.replaceRegWith(InterpUse[interp_idx].regs[reg_idx], virt_reg);
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      AddLiveIn(&MF, MRI, new_reg, virt_reg);
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SIAssignInterpRegsPass::AddLiveIn(MachineFunction * MF,
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                           MachineRegisterInfo & MRI,
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                           unsigned physReg, unsigned virtReg)
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    const TargetInstrInfo * TII = TM.getInstrInfo();
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    if (!MRI.isLiveIn(physReg)) {
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      MRI.addLiveIn(physReg, virtReg);
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      MF->front().addLiveIn(physReg);
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org              TII->get(TargetOpcode::COPY), virtReg)
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                .addReg(physReg);
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    } else {
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
137