R600ExpandSpecialInstrs.cpp revision 1cb07bd3b8abd5e52e9dbd80bb1666058545387e
1cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//
3cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//                     The LLVM Compiler Infrastructure
4cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//
5cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// This file is distributed under the University of Illinois Open Source
6cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// License. See LICENSE.TXT for details.
7cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//
8cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//===----------------------------------------------------------------------===//
9cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// Vector, Reduction, and Cube instructions need to fill the entire instruction
103aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com// group to work correctly.  This pass expands these individual instructions
11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// into several instructions that will completely fill the instruction group.
12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//===----------------------------------------------------------------------===//
13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "AMDGPU.h"
15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "R600InstrInfo.h"
16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "R600RegisterInfo.h"
1778b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com#include "llvm/CodeGen/MachineFunctionPass.h"
18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "llvm/CodeGen/MachineInstrBuilder.h"
19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "llvm/CodeGen/MachineRegisterInfo.h"
20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comusing namespace llvm;
22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comnamespace {
24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass R600ExpandSpecialInstrsPass : public MachineFunctionPass {
26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate:
28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  static char ID;
29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  const R600InstrInfo *TII;
30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic:
32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  virtual bool runOnMachineFunction(MachineFunction &MF);
36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  const char *getPassName() const {
38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    return "R600 Expand special instructions pass";
39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  }
40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com};
41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com} // End anonymous namespace
43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comchar R600ExpandSpecialInstrsPass::ID = 0;
45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  return new R600ExpandSpecialInstrsPass(TM);
48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}
49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.combool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  const R600RegisterInfo &TRI = TII->getRegisterInfo();
53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                                                  BB != BB_E; ++BB) {
56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    MachineBasicBlock &MBB = *BB;
57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    MachineBasicBlock::iterator I = MBB.begin();
58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com    while (I != MBB.end()) {
59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      MachineInstr &MI = *I;
60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      I = llvm::next(I);
61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      bool IsReduction = TII->isReductionOp(MI.getOpcode());
63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      bool IsVector = TII->isVector(MI);
64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com	    bool IsCube = TII->isCubeOp(MI.getOpcode());
65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      if (!IsReduction && !IsVector && !IsCube) {
66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        continue;
67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      }
68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
694ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com      // Expand the instruction
70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      //
714ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com      // Reduction instructions:
724ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com      // T0_X = DP4 T1_XYZW, T2_XYZW
73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // becomes:
74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // TO_X = DP4 T1_X, T2_X
75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // TO_Y (write masked) = DP4 T1_Y, T2_Y
76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // TO_Z (write masked) = DP4 T1_Z, T2_Z
77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // TO_W (write masked) = DP4 T1_W, T2_W
78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      //
79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // Vector instructions:
80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // T0_X = MULLO_INT T1_X, T2_X
81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // becomes:
823aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com      // T0_X = MULLO_INT T1_X, T2_X
83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // T0_W (write masked) = MULLO_INT T1_X, T2_X
863aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com      //
87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // Cube instructions:
883aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com      // T0_XYZW = CUBE T1_XYZW
89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // becomes:
90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // TO_X = CUBE T1_Z, T1_Y
913aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com      // T0_Y = CUBE T1_Z, T1_X
92a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com      // T0_Z = CUBE T1_X, T1_Z
93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      // T0_W = CUBE T1_Y, T1_Z
943aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com      for (unsigned Chan = 0; Chan < 4; Chan++) {
95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        unsigned DstReg = MI.getOperand(0).getReg();
96a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        unsigned Src0 = MI.getOperand(1).getReg();
97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        unsigned Src1 = 0;
98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        // Determine the correct source registers
100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        if (!IsCube) {
101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          Src1 = MI.getOperand(2).getReg();
102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        }
1033aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com        if (IsReduction) {
104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          Src0 = TRI.getSubReg(Src0, SubRegIndex);
106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          Src1 = TRI.getSubReg(Src1, SubRegIndex);
107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        } else if (IsCube) {
108571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          static const int CubeSrcSwz[] = {2, 2, 0, 1};
109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
112a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
1137b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com        }
114a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com
115a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com        // Determine the correct destination registers;
116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        unsigned Flags = 0;
117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        if (IsCube) {
1183aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
1193aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        } else {
121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          // Mask the write if the original instruction does not write to
122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          // the current Channel.
123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          unsigned DstBase = TRI.getHWRegIndex(DstReg);
125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        }
127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        // Set the IsLast bit
1293aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com        Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com
131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        // Add the new instruction
132571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        unsigned Opcode;
133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        if (IsCube) {
134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          switch (MI.getOpcode()) {
135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          case AMDGPU::CUBE_r600_pseudo:
136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com            Opcode = AMDGPU::CUBE_r600_real;
13733f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com            break;
13833f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com          case AMDGPU::CUBE_eg_pseudo:
139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com            Opcode = AMDGPU::CUBE_eg_real;
1403aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com            break;
141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          default:
142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com            assert(!"Unknown CUBE instruction");
143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com            Opcode = 0;
144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com            break;
14578b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com          }
146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        } else {
147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com          Opcode = MI.getOpcode();
1483aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com        }
149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true);
150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com        NewDstOp.addTargetFlag(Flags);
1512ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com
1522ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode))
1532ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com                .addOperand(NewDstOp)
154571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                .addReg(Src0)
155571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                .addReg(Src1)
156571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com                ->setIsInsideBundle(Chan != 0);
157571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com      }
1583aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com      MI.eraseFromParent();
1593aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com    }
160cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com  }
161  return false;
162}
163