1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//                     The LLVM Compiler Infrastructure
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details.
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===//
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Vector, Reduction, and Cube instructions need to fill the entire instruction
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// group to work correctly.  This pass expands these individual instructions
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// into several instructions that will completely fill the instruction group.
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===//
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDGPU.h"
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "R600Defines.h"
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "R600InstrInfo.h"
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "R600RegisterInfo.h"
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunctionPass.h"
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineInstrBuilder.h"
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineRegisterInfo.h"
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm;
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace {
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass R600ExpandSpecialInstrsPass : public MachineFunctionPass {
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate:
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  static char ID;
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  const R600InstrInfo *TII;
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  virtual bool runOnMachineFunction(MachineFunction &MF);
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  const char *getPassName() const {
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    return "R600 Expand special instructions pass";
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // End anonymous namespace
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgchar R600ExpandSpecialInstrsPass::ID = 0;
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return new R600ExpandSpecialInstrsPass(TM);
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  const R600RegisterInfo &TRI = TII->getRegisterInfo();
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                                                  BB != BB_E; ++BB) {
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    MachineBasicBlock &MBB = *BB;
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    MachineBasicBlock::iterator I = MBB.begin();
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    while (I != MBB.end()) {
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      MachineInstr &MI = *I;
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      I = llvm::next(I);
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bool IsReduction = TII->isReductionOp(MI.getOpcode());
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      bool IsVector = TII->isVector(MI);
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    bool IsCube = TII->isCubeOp(MI.getOpcode());
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!IsReduction && !IsVector && !IsCube) {
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        continue;
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Expand the instruction
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      //
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Reduction instructions:
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_X = DP4 T1_XYZW, T2_XYZW
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // becomes:
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // TO_X = DP4 T1_X, T2_X
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // TO_Y (write masked) = DP4 T1_Y, T2_Y
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // TO_Z (write masked) = DP4 T1_Z, T2_Z
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // TO_W (write masked) = DP4 T1_W, T2_W
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      //
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Vector instructions:
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_X = MULLO_INT T1_X, T2_X
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // becomes:
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_X = MULLO_INT T1_X, T2_X
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_W (write masked) = MULLO_INT T1_X, T2_X
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      //
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // Cube instructions:
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_XYZW = CUBE T1_XYZW
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // becomes:
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // TO_X = CUBE T1_Z, T1_Y
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_Y = CUBE T1_Z, T1_X
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_Z = CUBE T1_X, T1_Z
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      // T0_W = CUBE T1_Y, T1_Z
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned Chan = 0; Chan < 4; Chan++) {
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned DstReg = MI.getOperand(0).getReg();
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned Src0 = MI.getOperand(1).getReg();
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned Src1 = 0;
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        // Determine the correct source registers
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        if (!IsCube) {
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Src1 = MI.getOperand(2).getReg();
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        }
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        if (IsReduction) {
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Src0 = TRI.getSubReg(Src0, SubRegIndex);
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Src1 = TRI.getSubReg(Src1, SubRegIndex);
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        } else if (IsCube) {
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          static const int CubeSrcSwz[] = {2, 2, 0, 1};
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        }
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        // Determine the correct destination registers;
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned Flags = 0;
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        if (IsCube) {
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        } else {
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          // Mask the write if the original instruction does not write to
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          // the current Channel.
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          unsigned DstBase = TRI.getHWRegIndex(DstReg);
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        }
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        // Set the IsLast bit
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        // Add the new instruction
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        unsigned Opcode;
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        if (IsCube) {
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          switch (MI.getOpcode()) {
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          case AMDGPU::CUBE_r600_pseudo:
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Opcode = AMDGPU::CUBE_r600_real;
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          case AMDGPU::CUBE_eg_pseudo:
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Opcode = AMDGPU::CUBE_eg_real;
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          default:
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            assert(!"Unknown CUBE instruction");
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            Opcode = 0;
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org            break;
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          }
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        } else {
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          Opcode = MI.getOpcode();
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        }
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        MachineInstr *NewMI =
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  .addReg(Src0)
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  .addReg(Src1)
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org                  .addImm(0); // Flag
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        NewMI->setIsInsideBundle(Chan != 0);
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org        TII->addFlag(NewMI, 0, Flags);
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      MI.eraseFromParent();
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    }
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  }
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org  return false;
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
164