R600ExpandSpecialInstrs.cpp revision 6c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6
1//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Vector, Reduction, and Cube instructions need to fill the entire instruction
10// group to work correctly.  This pass expands these individual instructions
11// into several instructions that will completely fill the instruction group.
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "R600InstrInfo.h"
16#include "R600RegisterInfo.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20
21using namespace llvm;
22
23namespace {
24
25class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
26
27private:
28  static char ID;
29  const R600InstrInfo *TII;
30
31public:
32  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
33    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
34
35  virtual bool runOnMachineFunction(MachineFunction &MF);
36
37  const char *getPassName() const {
38    return "R600 Expand special instructions pass";
39  }
40};
41
42} // End anonymous namespace
43
44char R600ExpandSpecialInstrsPass::ID = 0;
45
46FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
47  return new R600ExpandSpecialInstrsPass(TM);
48}
49
50bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
51
52  const R600RegisterInfo &TRI = TII->getRegisterInfo();
53
54  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
55                                                  BB != BB_E; ++BB) {
56    MachineBasicBlock &MBB = *BB;
57    MachineBasicBlock::iterator I = MBB.begin();
58    while (I != MBB.end()) {
59      MachineInstr &MI = *I;
60      I = llvm::next(I);
61
62      bool IsReduction = TII->isReductionOp(MI.getOpcode());
63      bool IsVector = TII->isVector(MI);
64      if (!IsReduction && !IsVector) {
65        continue;
66      }
67
68      // Expand the instruction
69      //
70      // Reduction instructions:
71      // T0_X = DP4 T1_XYZW, T2_XYZW
72      // becomes:
73      // TO_X = DP4 T1_X, T2_X
74      // TO_Y (write masked) = DP4 T1_Y, T2_Y
75      // TO_Z (write masked) = DP4 T1_Z, T2_Z
76      // TO_W (write masked) = DP4 T1_W, T2_W
77      //
78      // Vector instructions:
79      // T0_X = MULLO_INT T1_X, T2_X
80      // becomes:
81      // T0_X = MULLO_INT T1_X, T2_X
82      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
83      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
84      // T0_W (write masked) = MULLO_INT T1_X, T2_X
85      for (unsigned Chan = 0; Chan < 4; Chan++) {
86        unsigned DstReg = MI.getOperand(0).getReg();
87        unsigned Src0 = MI.getOperand(1).getReg();
88        unsigned Src1 = MI.getOperand(2).getReg();
89        if (IsReduction) {
90          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
91          Src0 = TRI.getSubReg(Src0, SubRegIndex);
92          Src1 = TRI.getSubReg(Src1, SubRegIndex);
93        }
94        unsigned DstBase = TRI.getHWRegIndex(DstReg);
95        unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
96        unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
97        Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
98        MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true);
99        NewDstOp.addTargetFlag(Flags);
100
101        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
102                .addOperand(NewDstOp)
103                .addReg(Src0)
104                .addReg(Src1)
105                ->setIsInsideBundle(Chan != 0);
106      }
107      MI.eraseFromParent();
108    }
109  }
110  return false;
111}
112