1//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Vector, Reduction, and Cube instructions need to fill the entire instruction
10// group to work correctly.  This pass expands these individual instructions
11// into several instructions that will completely fill the instruction group.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "R600Defines.h"
16#include "R600InstrInfo.h"
17#include "R600RegisterInfo.h"
18#include "llvm/CodeGen/MachineFunctionPass.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21
22using namespace llvm;
23
24namespace {
25
26class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
27
28private:
29  static char ID;
30  const R600InstrInfo *TII;
31
32public:
33  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
34    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
35
36  virtual bool runOnMachineFunction(MachineFunction &MF);
37
38  const char *getPassName() const {
39    return "R600 Expand special instructions pass";
40  }
41};
42
43} // End anonymous namespace
44
45char R600ExpandSpecialInstrsPass::ID = 0;
46
47FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
48  return new R600ExpandSpecialInstrsPass(TM);
49}
50
51bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
52
53  const R600RegisterInfo &TRI = TII->getRegisterInfo();
54
55  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
56                                                  BB != BB_E; ++BB) {
57    MachineBasicBlock &MBB = *BB;
58    MachineBasicBlock::iterator I = MBB.begin();
59    while (I != MBB.end()) {
60      MachineInstr &MI = *I;
61      I = llvm::next(I);
62
63      bool IsReduction = TII->isReductionOp(MI.getOpcode());
64      bool IsVector = TII->isVector(MI);
65	    bool IsCube = TII->isCubeOp(MI.getOpcode());
66      if (!IsReduction && !IsVector && !IsCube) {
67        continue;
68      }
69
70      // Expand the instruction
71      //
72      // Reduction instructions:
73      // T0_X = DP4 T1_XYZW, T2_XYZW
74      // becomes:
75      // TO_X = DP4 T1_X, T2_X
76      // TO_Y (write masked) = DP4 T1_Y, T2_Y
77      // TO_Z (write masked) = DP4 T1_Z, T2_Z
78      // TO_W (write masked) = DP4 T1_W, T2_W
79      //
80      // Vector instructions:
81      // T0_X = MULLO_INT T1_X, T2_X
82      // becomes:
83      // T0_X = MULLO_INT T1_X, T2_X
84      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
85      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
86      // T0_W (write masked) = MULLO_INT T1_X, T2_X
87      //
88      // Cube instructions:
89      // T0_XYZW = CUBE T1_XYZW
90      // becomes:
91      // TO_X = CUBE T1_Z, T1_Y
92      // T0_Y = CUBE T1_Z, T1_X
93      // T0_Z = CUBE T1_X, T1_Z
94      // T0_W = CUBE T1_Y, T1_Z
95      for (unsigned Chan = 0; Chan < 4; Chan++) {
96        unsigned DstReg = MI.getOperand(0).getReg();
97        unsigned Src0 = MI.getOperand(1).getReg();
98        unsigned Src1 = 0;
99
100        // Determine the correct source registers
101        if (!IsCube) {
102          Src1 = MI.getOperand(2).getReg();
103        }
104        if (IsReduction) {
105          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
106          Src0 = TRI.getSubReg(Src0, SubRegIndex);
107          Src1 = TRI.getSubReg(Src1, SubRegIndex);
108        } else if (IsCube) {
109          static const int CubeSrcSwz[] = {2, 2, 0, 1};
110          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
111          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
112          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
113          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
114        }
115
116        // Determine the correct destination registers;
117        unsigned Flags = 0;
118        if (IsCube) {
119          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
120          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
121        } else {
122          // Mask the write if the original instruction does not write to
123          // the current Channel.
124          Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
125          unsigned DstBase = TRI.getHWRegIndex(DstReg);
126          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
127        }
128
129        // Set the IsLast bit
130        Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
131
132        // Add the new instruction
133        unsigned Opcode;
134        if (IsCube) {
135          switch (MI.getOpcode()) {
136          case AMDGPU::CUBE_r600_pseudo:
137            Opcode = AMDGPU::CUBE_r600_real;
138            break;
139          case AMDGPU::CUBE_eg_pseudo:
140            Opcode = AMDGPU::CUBE_eg_real;
141            break;
142          default:
143            assert(!"Unknown CUBE instruction");
144            Opcode = 0;
145            break;
146          }
147        } else {
148          Opcode = MI.getOpcode();
149        }
150        MachineInstr *NewMI =
151          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
152                  .addReg(Src0)
153                  .addReg(Src1)
154                  .addImm(0); // Flag
155
156        NewMI->setIsInsideBundle(Chan != 0);
157        TII->addFlag(NewMI, 0, Flags);
158      }
159      MI.eraseFromParent();
160    }
161  }
162  return false;
163}
164