R600ExpandSpecialInstrs.cpp revision 6c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6
1//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// Vector, Reduction, and Cube instructions need to fill the entire instruction 10// group to work correctly. This pass expands these individual instructions 11// into several instructions that will completely fill the instruction group. 12//===----------------------------------------------------------------------===// 13 14#include "AMDGPU.h" 15#include "R600InstrInfo.h" 16#include "R600RegisterInfo.h" 17#include "llvm/CodeGen/MachineFunctionPass.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20 21using namespace llvm; 22 23namespace { 24 25class R600ExpandSpecialInstrsPass : public MachineFunctionPass { 26 27private: 28 static char ID; 29 const R600InstrInfo *TII; 30 31public: 32 R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 33 TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } 34 35 virtual bool runOnMachineFunction(MachineFunction &MF); 36 37 const char *getPassName() const { 38 return "R600 Expand special instructions pass"; 39 } 40}; 41 42} // End anonymous namespace 43 44char R600ExpandSpecialInstrsPass::ID = 0; 45 46FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 47 return new R600ExpandSpecialInstrsPass(TM); 48} 49 50bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 51 52 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 53 54 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 55 BB != BB_E; ++BB) { 56 MachineBasicBlock &MBB = *BB; 57 MachineBasicBlock::iterator I = MBB.begin(); 58 while (I != MBB.end()) { 59 MachineInstr &MI = *I; 60 I = llvm::next(I); 61 62 bool IsReduction = TII->isReductionOp(MI.getOpcode()); 63 bool IsVector = TII->isVector(MI); 64 if (!IsReduction && !IsVector) { 65 continue; 66 } 67 68 // Expand the instruction 69 // 70 // Reduction instructions: 71 // T0_X = DP4 T1_XYZW, T2_XYZW 72 // becomes: 73 // TO_X = DP4 T1_X, T2_X 74 // TO_Y (write masked) = DP4 T1_Y, T2_Y 75 // TO_Z (write masked) = DP4 T1_Z, T2_Z 76 // TO_W (write masked) = DP4 T1_W, T2_W 77 // 78 // Vector instructions: 79 // T0_X = MULLO_INT T1_X, T2_X 80 // becomes: 81 // T0_X = MULLO_INT T1_X, T2_X 82 // T0_Y (write masked) = MULLO_INT T1_X, T2_X 83 // T0_Z (write masked) = MULLO_INT T1_X, T2_X 84 // T0_W (write masked) = MULLO_INT T1_X, T2_X 85 for (unsigned Chan = 0; Chan < 4; Chan++) { 86 unsigned DstReg = MI.getOperand(0).getReg(); 87 unsigned Src0 = MI.getOperand(1).getReg(); 88 unsigned Src1 = MI.getOperand(2).getReg(); 89 if (IsReduction) { 90 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 91 Src0 = TRI.getSubReg(Src0, SubRegIndex); 92 Src1 = TRI.getSubReg(Src1, SubRegIndex); 93 } 94 unsigned DstBase = TRI.getHWRegIndex(DstReg); 95 unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 96 unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); 97 Flags |= (Chan == 3 ? MO_FLAG_LAST : 0); 98 MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true); 99 NewDstOp.addTargetFlag(Flags); 100 101 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode())) 102 .addOperand(NewDstOp) 103 .addReg(Src0) 104 .addReg(Src1) 105 ->setIsInsideBundle(Chan != 0); 106 } 107 MI.eraseFromParent(); 108 } 109 } 110 return false; 111} 112