1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The LLVM Compiler Infrastructure 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details. 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Vector, Reduction, and Cube instructions need to fill the entire instruction 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// group to work correctly. This pass expands these individual instructions 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// into several instructions that will completely fill the instruction group. 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDGPU.h" 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "R600Defines.h" 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "R600InstrInfo.h" 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "R600RegisterInfo.h" 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineFunctionPass.h" 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineInstrBuilder.h" 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineRegisterInfo.h" 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm; 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace { 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org static char ID; 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const R600InstrInfo *TII; 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic: 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool runOnMachineFunction(MachineFunction &MF); 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char *getPassName() const { 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return "R600 Expand special instructions pass"; 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} // End anonymous namespace 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgchar R600ExpandSpecialInstrsPass::ID = 0; 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return new R600ExpandSpecialInstrsPass(TM); 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const R600RegisterInfo &TRI = TII->getRegisterInfo(); 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BB != BB_E; ++BB) { 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock &MBB = *BB; 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock::iterator I = MBB.begin(); 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while (I != MBB.end()) { 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineInstr &MI = *I; 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org I = llvm::next(I); 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool IsReduction = TII->isReductionOp(MI.getOpcode()); 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool IsVector = TII->isVector(MI); 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool IsCube = TII->isCubeOp(MI.getOpcode()); 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!IsReduction && !IsVector && !IsCube) { 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Expand the instruction 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Reduction instructions: 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_X = DP4 T1_XYZW, T2_XYZW 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // becomes: 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TO_X = DP4 T1_X, T2_X 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TO_Y (write masked) = DP4 T1_Y, T2_Y 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TO_Z (write masked) = DP4 T1_Z, T2_Z 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TO_W (write masked) = DP4 T1_W, T2_W 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Vector instructions: 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_X = MULLO_INT T1_X, T2_X 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // becomes: 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_X = MULLO_INT T1_X, T2_X 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_Y (write masked) = MULLO_INT T1_X, T2_X 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_Z (write masked) = MULLO_INT T1_X, T2_X 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_W (write masked) = MULLO_INT T1_X, T2_X 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Cube instructions: 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_XYZW = CUBE T1_XYZW 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // becomes: 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TO_X = CUBE T1_Z, T1_Y 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_Y = CUBE T1_Z, T1_X 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_Z = CUBE T1_X, T1_Z 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // T0_W = CUBE T1_Y, T1_Z 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned Chan = 0; Chan < 4; Chan++) { 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned DstReg = MI.getOperand(0).getReg(); 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned Src0 = MI.getOperand(1).getReg(); 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned Src1 = 0; 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Determine the correct source registers 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!IsCube) { 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Src1 = MI.getOperand(2).getReg(); 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (IsReduction) { 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Src0 = TRI.getSubReg(Src0, SubRegIndex); 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Src1 = TRI.getSubReg(Src1, SubRegIndex); 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (IsCube) { 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org static const int CubeSrcSwz[] = {2, 2, 0, 1}; 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Src1 = TRI.getSubReg(Src0, SubRegIndex1); 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Src0 = TRI.getSubReg(Src0, SubRegIndex0); 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Determine the correct destination registers; 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned Flags = 0; 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (IsCube) { 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DstReg = TRI.getSubReg(DstReg, SubRegIndex); 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Mask the write if the original instruction does not write to 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // the current Channel. 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned DstBase = TRI.getHWRegIndex(DstReg); 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Set the IsLast bit 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0); 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Add the new instruction 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned Opcode; 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (IsCube) { 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (MI.getOpcode()) { 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::CUBE_r600_pseudo: 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Opcode = AMDGPU::CUBE_r600_real; 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::CUBE_eg_pseudo: 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Opcode = AMDGPU::CUBE_eg_real; 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"Unknown CUBE instruction"); 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Opcode = 0; 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Opcode = MI.getOpcode(); 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineInstr *NewMI = 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(Src0) 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(Src1) 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0); // Flag 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org NewMI->setIsInsideBundle(Chan != 0); 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TII->addFlag(NewMI, 0, Flags); 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI.eraseFromParent(); 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 164