R600ExpandSpecialInstrs.cpp revision 1cb07bd3b8abd5e52e9dbd80bb1666058545387e
1cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// 3cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// The LLVM Compiler Infrastructure 4cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// 5cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// This file is distributed under the University of Illinois Open Source 6cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// License. See LICENSE.TXT for details. 7cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// 8cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com//===----------------------------------------------------------------------===// 9cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com// Vector, Reduction, and Cube instructions need to fill the entire instruction 103aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com// group to work correctly. This pass expands these individual instructions 11571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com// into several instructions that will completely fill the instruction group. 12571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com//===----------------------------------------------------------------------===// 13571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 14571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "AMDGPU.h" 15571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "R600InstrInfo.h" 16571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "R600RegisterInfo.h" 1778b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com#include "llvm/CodeGen/MachineFunctionPass.h" 18571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "llvm/CodeGen/MachineInstrBuilder.h" 19571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com#include "llvm/CodeGen/MachineRegisterInfo.h" 20571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 21571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comusing namespace llvm; 22571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 23571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comnamespace { 24571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 25571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 26571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 27571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comprivate: 28571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com static char ID; 29571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const R600InstrInfo *TII; 30571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 31571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.compublic: 32571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 33571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } 34571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 35571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com virtual bool runOnMachineFunction(MachineFunction &MF); 36571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 37571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const char *getPassName() const { 38571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com return "R600 Expand special instructions pass"; 39571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 40571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com}; 41571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 42571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com} // End anonymous namespace 43571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 44571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comchar R600ExpandSpecialInstrsPass::ID = 0; 45571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 46571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.comFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 47571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com return new R600ExpandSpecialInstrsPass(TM); 48571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com} 49571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 50571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.combool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 51571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 52571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com const R600RegisterInfo &TRI = TII->getRegisterInfo(); 53571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 54571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 55571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com BB != BB_E; ++BB) { 56571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com MachineBasicBlock &MBB = *BB; 57571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com MachineBasicBlock::iterator I = MBB.begin(); 58571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com while (I != MBB.end()) { 59571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com MachineInstr &MI = *I; 60571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com I = llvm::next(I); 61571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 62571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool IsReduction = TII->isReductionOp(MI.getOpcode()); 63571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool IsVector = TII->isVector(MI); 64571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com bool IsCube = TII->isCubeOp(MI.getOpcode()); 65571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com if (!IsReduction && !IsVector && !IsCube) { 66571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com continue; 67571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 68571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 694ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com // Expand the instruction 70571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // 714ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com // Reduction instructions: 724ef4bed00efd247a0ea005b95b7239a9d4c14c68edisonn@google.com // T0_X = DP4 T1_XYZW, T2_XYZW 73571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // becomes: 74571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TO_X = DP4 T1_X, T2_X 75571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TO_Y (write masked) = DP4 T1_Y, T2_Y 76571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TO_Z (write masked) = DP4 T1_Z, T2_Z 77571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TO_W (write masked) = DP4 T1_W, T2_W 78571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // 79571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // Vector instructions: 80571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // T0_X = MULLO_INT T1_X, T2_X 81571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // becomes: 823aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com // T0_X = MULLO_INT T1_X, T2_X 83571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // T0_Y (write masked) = MULLO_INT T1_X, T2_X 84571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // T0_Z (write masked) = MULLO_INT T1_X, T2_X 85571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // T0_W (write masked) = MULLO_INT T1_X, T2_X 863aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com // 87571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // Cube instructions: 883aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com // T0_XYZW = CUBE T1_XYZW 89571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // becomes: 90571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // TO_X = CUBE T1_Z, T1_Y 913aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com // T0_Y = CUBE T1_Z, T1_X 92a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com // T0_Z = CUBE T1_X, T1_Z 93571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // T0_W = CUBE T1_Y, T1_Z 943aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com for (unsigned Chan = 0; Chan < 4; Chan++) { 95571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned DstReg = MI.getOperand(0).getReg(); 96a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com unsigned Src0 = MI.getOperand(1).getReg(); 97571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned Src1 = 0; 98571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 99571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // Determine the correct source registers 100571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com if (!IsCube) { 101571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Src1 = MI.getOperand(2).getReg(); 102571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 1033aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com if (IsReduction) { 104571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 105571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Src0 = TRI.getSubReg(Src0, SubRegIndex); 106571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Src1 = TRI.getSubReg(Src1, SubRegIndex); 107571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } else if (IsCube) { 108571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com static const int CubeSrcSwz[] = {2, 2, 0, 1}; 109a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 110571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 111571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Src1 = TRI.getSubReg(Src0, SubRegIndex1); 112a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com Src0 = TRI.getSubReg(Src0, SubRegIndex0); 1137b328fddf94eea5f05ffa36de02d7d9922f504daedisonn@google.com } 114a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com 115a5aaa7998fc18489701660f781d7daa33ffc6f6eedisonn@google.com // Determine the correct destination registers; 116571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned Flags = 0; 117571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com if (IsCube) { 1183aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 1193aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com DstReg = TRI.getSubReg(DstReg, SubRegIndex); 120571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } else { 121571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // Mask the write if the original instruction does not write to 122571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // the current Channel. 123571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); 124571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned DstBase = TRI.getHWRegIndex(DstReg); 125571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 126571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 127571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 128571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // Set the IsLast bit 1293aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com Flags |= (Chan == 3 ? MO_FLAG_LAST : 0); 130571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com 131571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com // Add the new instruction 132571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com unsigned Opcode; 133571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com if (IsCube) { 134571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com switch (MI.getOpcode()) { 135571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com case AMDGPU::CUBE_r600_pseudo: 136571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Opcode = AMDGPU::CUBE_r600_real; 13733f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com break; 13833f11b6fcdb7dfce27f953803be40fbacedc7450edisonn@google.com case AMDGPU::CUBE_eg_pseudo: 139571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Opcode = AMDGPU::CUBE_eg_real; 1403aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com break; 141571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com default: 142571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com assert(!"Unknown CUBE instruction"); 143571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Opcode = 0; 144571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com break; 14578b38b130deb8bcfa41611039875ce0162542ac1edisonn@google.com } 146571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } else { 147571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com Opcode = MI.getOpcode(); 1483aa355527a3b91d3e12b8bee49e5637d00a736caedisonn@google.com } 149571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true); 150571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com NewDstOp.addTargetFlag(Flags); 1512ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com 1522ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode)) 1532ccc3afa474f9485c39c2e863252ddaa3f35724bedisonn@google.com .addOperand(NewDstOp) 154571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com .addReg(Src0) 155571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com .addReg(Src1) 156571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com ->setIsInsideBundle(Chan != 0); 157571c70b95f56e22b5a7d6f4f288aa6c9a925a64fedisonn@google.com } 1583aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com MI.eraseFromParent(); 1593aac1f9f308192f3787265830fe86ce8874e7382edisonn@google.com } 160cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com } 161 return false; 162} 163