1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// Vector, Reduction, and Cube instructions need to fill the entire instruction 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// group to work correctly. This pass expands these individual instructions 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// into several instructions that will completely fill the instruction group. 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h" 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600Defines.h" 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600InstrInfo.h" 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600MachineFunctionInfo.h" 2158a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "R600RegisterInfo.h" 22f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h" 23f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 24f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 25f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 26f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 27f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardnamespace { 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 31f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 32f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardprivate: 33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static char ID; 34f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600InstrInfo *TII; 35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool ExpandInputPerspective(MachineInstr& MI); 37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool ExpandInputConstant(MachineInstr& MI); 38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 39f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardpublic: 40f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 41b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling TII(0) { } 42f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 43f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard virtual bool runOnMachineFunction(MachineFunction &MF); 44f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 45f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const char *getPassName() const { 46f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return "R600 Expand special instructions pass"; 47f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 48f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}; 49f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 50f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} // End anonymous namespace 51f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 52f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0; 53f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 54f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 55f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return new R600ExpandSpecialInstrsPass(TM); 56f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 57f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 58f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 59b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); 60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 62f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 63f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 64f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BB != BB_E; ++BB) { 65f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *BB; 66f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock::iterator I = MBB.begin(); 67f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard while (I != MBB.end()) { 68f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr &MI = *I; 69f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard I = llvm::next(I); 70f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 71f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (MI.getOpcode()) { 72f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: break; 73f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand PRED_X to one of the PRED_SET instructions. 74f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::PRED_X: { 75f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard uint64_t Flags = MI.getOperand(3).getImm(); 76f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // The native opcode used by PRED_X is stored as an immediate in the 77f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // third operand. 78f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 79f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(2).getImm(), // opcode 80f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(0).getReg(), // dst 81f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(1).getReg(), // src0 82f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO); // src1 83f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(PredSet, 0, MO_FLAG_MASK); 84f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Flags & MO_FLAG_PUSH) { 855e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); 86f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 875e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1); 88f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 89f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 90f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 91f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 9229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 9329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_PAIR_XY: { 9429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 9529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 9629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(2).getImm()); 9729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 9829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 9929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg; 10029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 10129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 10229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = MI.getOperand(Chan).getReg(); 10329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard else 10429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W; 10529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 10629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY, 10729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); 10829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 10929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 11029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 11129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 11229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan >= 2) 11329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_MASK); 11429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 11529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 11629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 11729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 11829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 11929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 12029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 121f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 12229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_PAIR_ZW: { 12329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 12429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 12529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(2).getImm()); 12629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 12729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 12829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg; 12929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 13029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 13129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y; 13229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard else 13329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = MI.getOperand(Chan-2).getReg(); 13429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 13529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW, 13629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); 13729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 13829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 13929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 14029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 14129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 14229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_MASK); 14329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 14429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 14529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 14629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 14729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 14829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 14929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 15029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 15129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_VEC_LOAD: { 15229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 15329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 15429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 15529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(1).getImm()); 15629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 15729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 15829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 15929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0, 16029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg); 16129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 16229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 16329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 16429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 16529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 16629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 16729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 16829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 16929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 17029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 1714ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune case AMDGPU::DOT_4: { 1724ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 1734ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune const R600RegisterInfo &TRI = TII->getRegisterInfo(); 1744ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 1754ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned DstReg = MI.getOperand(0).getReg(); 1764ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 1774ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 1784ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune for (unsigned Chan = 0; Chan < 4; ++Chan) { 1794ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune bool Mask = (Chan != TRI.getHWRegChan(DstReg)); 1804ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned SubDstReg = 1814ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 1824ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune MachineInstr *BMI = 1834ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg); 1844ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Chan > 0) { 1854ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune BMI->bundleWithPred(); 1864ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 1874ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Mask) { 1884ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->addFlag(BMI, 0, MO_FLAG_MASK); 1894ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 1904ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Chan != 3) 1914ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 1924ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Opcode = BMI->getOpcode(); 1934ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune // While not strictly necessary from hw point of view, we force 1944ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune // all src operands of a dot4 inst to belong to the same slot. 1954ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Src0 = BMI->getOperand( 1965e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) 1974ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune .getReg(); 1984ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Src1 = BMI->getOperand( 1995e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) 2004ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune .getReg(); 201de2b854310f0a359224bfcb0fc7e4ed6339d60ecRafael Espindola (void) Src0; 202de2b854310f0a359224bfcb0fc7e4ed6339d60ecRafael Espindola (void) Src1; 203e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && 204e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune (TRI.getEncodingValue(Src1) & 0xff) < 127) 205e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1)); 2064ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2074ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune MI.eraseFromParent(); 2084ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune continue; 2094ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 21029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 211f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 212f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsReduction = TII->isReductionOp(MI.getOpcode()); 213f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsVector = TII->isVector(MI); 214f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsCube = TII->isCubeOp(MI.getOpcode()); 215f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsReduction && !IsVector && !IsCube) { 216f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 217f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 218f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 219f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand the instruction 220f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 221f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Reduction instructions: 222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = DP4 T1_XYZW, T2_XYZW 223f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 224f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = DP4 T1_X, T2_X 225f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Y (write masked) = DP4 T1_Y, T2_Y 226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Z (write masked) = DP4 T1_Z, T2_Z 227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_W (write masked) = DP4 T1_W, T2_W 228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 229f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Vector instructions: 230f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 233f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y (write masked) = MULLO_INT T1_X, T2_X 234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z (write masked) = MULLO_INT T1_X, T2_X 235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W (write masked) = MULLO_INT T1_X, T2_X 236f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Cube instructions: 238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_XYZW = CUBE T1_XYZW 239f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 240f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = CUBE T1_Z, T1_Y 241f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y = CUBE T1_Z, T1_X 242f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z = CUBE T1_X, T1_Z 243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W = CUBE T1_Y, T1_Z 244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned Chan = 0; Chan < 4; Chan++) { 245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstReg = MI.getOperand( 2465e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); 247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src0 = MI.getOperand( 2485e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); 249f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src1 = 0; 250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 251f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct source registers 252f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsCube) { 2535e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); 254f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Src1Idx != -1) { 255f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = MI.getOperand(Src1Idx).getReg(); 256f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 257f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 258f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsReduction) { 259f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex); 261f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src1, SubRegIndex); 262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (IsCube) { 263f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static const int CubeSrcSwz[] = {2, 2, 0, 1}; 264f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src0, SubRegIndex1); 267f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex0); 268f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 269f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct destination registers; 271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool Mask = false; 272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool NotLast = true; 273f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsCube) { 274f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = TRI.getSubReg(DstReg, SubRegIndex); 276f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 277f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Mask the write if the original instruction does not write to 278f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // the current Channel. 279f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Mask = (Chan != TRI.getHWRegChan(DstReg)); 280f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 281f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 282f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 283f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 284f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Set the IsLast bit 285f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard NotLast = (Chan != 3 ); 286f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 287f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Add the new instruction 288f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Opcode = MI.getOpcode(); 289f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (Opcode) { 290f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_r600_pseudo: 291f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_r600_real; 292f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 293f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_eg_pseudo: 294f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_eg_real; 295f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 296f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: 297f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 298f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 299f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 300f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *NewMI = 301f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); 302f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 3034397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen if (Chan != 0) 3044397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen NewMI->bundleWithPred(); 305f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Mask) { 306f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_MASK); 307f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 308f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (NotLast) { 309f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); 310f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 311f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 312f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 313f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 314f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 315f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 316f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 317