R600ExpandSpecialInstrs.cpp revision 3a7a56e7aa56bc6cb847c241ef6bd749713ae6e1
1//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Vector, Reduction, and Cube instructions need to fill the entire instruction
10// group to work correctly.  This pass expands these individual instructions
11// into several instructions that will completely fill the instruction group.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "R600InstrInfo.h"
16#include "R600RegisterInfo.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20
21using namespace llvm;
22
23namespace {
24
25class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
26
27private:
28  static char ID;
29  const R600InstrInfo *TII;
30
31public:
32  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
33    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
34
35  virtual bool runOnMachineFunction(MachineFunction &MF);
36
37  const char *getPassName() const {
38    return "R600 Expand special instructions pass";
39  }
40};
41
42} // End anonymous namespace
43
44char R600ExpandSpecialInstrsPass::ID = 0;
45
46FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
47  return new R600ExpandSpecialInstrsPass(TM);
48}
49
50bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
51
52  const R600RegisterInfo &TRI = TII->getRegisterInfo();
53
54  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
55                                                  BB != BB_E; ++BB) {
56    MachineBasicBlock &MBB = *BB;
57    MachineBasicBlock::iterator I = MBB.begin();
58    while (I != MBB.end()) {
59      MachineInstr &MI = *I;
60      I = llvm::next(I);
61
62      bool IsReduction = TII->isReductionOp(MI.getOpcode());
63      bool IsVector = TII->isVector(MI);
64	    bool IsCube = TII->isCubeOp(MI.getOpcode());
65      if (!IsReduction && !IsVector && !IsCube) {
66        continue;
67      }
68
69      // Expand the instruction
70      //
71      // Reduction instructions:
72      // T0_X = DP4 T1_XYZW, T2_XYZW
73      // becomes:
74      // TO_X = DP4 T1_X, T2_X
75      // TO_Y (write masked) = DP4 T1_Y, T2_Y
76      // TO_Z (write masked) = DP4 T1_Z, T2_Z
77      // TO_W (write masked) = DP4 T1_W, T2_W
78      //
79      // Vector instructions:
80      // T0_X = MULLO_INT T1_X, T2_X
81      // becomes:
82      // T0_X = MULLO_INT T1_X, T2_X
83      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
84      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
85      // T0_W (write masked) = MULLO_INT T1_X, T2_X
86      //
87      // Cube instructions:
88      // T0_XYZW = CUBE T1_XYZW
89      // becomes:
90      // TO_X = CUBE T1_Z, T1_Y
91      // T0_Y = CUBE T1_Z, T1_X
92      // T0_Z = CUBE T1_X, T1_Z
93      // T0_W = CUBE T1_Y, T1_Z
94      for (unsigned Chan = 0; Chan < 4; Chan++) {
95        unsigned DstReg = MI.getOperand(0).getReg();
96        unsigned Src0 = MI.getOperand(1).getReg();
97        unsigned Src1 = 0;
98
99        // Determine the correct source registers
100        if (!IsCube) {
101          Src1 = MI.getOperand(2).getReg();
102        }
103        if (IsReduction) {
104          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
105          Src0 = TRI.getSubReg(Src0, SubRegIndex);
106          Src1 = TRI.getSubReg(Src1, SubRegIndex);
107        } else if (IsCube) {
108          static const int CubeSrcSwz[] = {2, 2, 0, 1};
109          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
110          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
111          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
112          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
113        }
114
115        // Determine the correct destination registers;
116        unsigned Flags = 0;
117        if (IsCube) {
118          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
119          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
120        } else {
121          // Mask the write if the original instruction does not write to
122          // the current Channel.
123          Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
124          unsigned DstBase = TRI.getHWRegIndex(DstReg);
125          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
126        }
127
128        // Set the IsLast bit
129        Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
130
131        // Add the new instruction
132        unsigned Opcode;
133        if (IsCube) {
134          switch (MI.getOpcode()) {
135          case AMDGPU::CUBE_r600_pseudo:
136            Opcode = AMDGPU::CUBE_r600_real;
137            break;
138          case AMDGPU::CUBE_eg_pseudo:
139            Opcode = AMDGPU::CUBE_eg_real;
140            break;
141          default:
142            assert(!"Unknown CUBE instruction");
143            Opcode = 0;
144            break;
145          }
146        } else {
147          Opcode = MI.getOpcode();
148        }
149        MachineInstr *NewMI =
150          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
151                  .addReg(Src0)
152                  .addReg(Src1);
153
154        NewMI->setIsInsideBundle(Chan != 0);
155        TII->AddFlag(NewMI, 0, Flags);
156      }
157      MI.eraseFromParent();
158    }
159  }
160  return false;
161}
162