R600ExpandSpecialInstrs.cpp revision 5e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0
1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// Vector, Reduction, and Cube instructions need to fill the entire instruction 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// group to work correctly. This pass expands these individual instructions 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// into several instructions that will completely fill the instruction group. 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h" 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600Defines.h" 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600InstrInfo.h" 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600MachineFunctionInfo.h" 2158a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "R600RegisterInfo.h" 22f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h" 23f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 24f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 25f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 26f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 27f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardnamespace { 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 31f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 32f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardprivate: 33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static char ID; 34f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600InstrInfo *TII; 35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool ExpandInputPerspective(MachineInstr& MI); 37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool ExpandInputConstant(MachineInstr& MI); 38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 39f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardpublic: 40f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 41b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling TII(0) { } 42f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 43f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard virtual bool runOnMachineFunction(MachineFunction &MF); 44f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 45f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const char *getPassName() const { 46f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return "R600 Expand special instructions pass"; 47f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 48f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}; 49f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 50f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} // End anonymous namespace 51f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 52f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0; 53f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 54f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 55f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return new R600ExpandSpecialInstrsPass(TM); 56f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 57f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 58f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 59b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); 60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 62f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 63f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 64f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BB != BB_E; ++BB) { 65f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *BB; 66f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock::iterator I = MBB.begin(); 67f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard while (I != MBB.end()) { 68f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr &MI = *I; 69f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard I = llvm::next(I); 70f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 71f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (MI.getOpcode()) { 72f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: break; 73f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand PRED_X to one of the PRED_SET instructions. 74f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::PRED_X: { 75f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard uint64_t Flags = MI.getOperand(3).getImm(); 76f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // The native opcode used by PRED_X is stored as an immediate in the 77f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // third operand. 78f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 79f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(2).getImm(), // opcode 80f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(0).getReg(), // dst 81f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(1).getReg(), // src0 82f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO); // src1 83f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(PredSet, 0, MO_FLAG_MASK); 84f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Flags & MO_FLAG_PUSH) { 855e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); 86f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 875e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1); 88f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 89f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 90f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 91f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 9229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::BREAK: { 93f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 94f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::PRED_SETE_INT, 95f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::PREDICATE_BIT, 96f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO, 97f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO); 98f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(PredSet, 0, MO_FLAG_MASK); 995e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); 100f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 101f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BuildMI(MBB, I, MBB.findDebugLoc(I), 102f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->get(AMDGPU::PREDICATED_BREAK)) 103f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard .addReg(AMDGPU::PREDICATE_BIT); 104f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 105f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 10629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 10729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 10829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_PAIR_XY: { 10929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 11029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 11129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(2).getImm()); 11229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 11329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 11429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg; 11529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 11629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 11729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = MI.getOperand(Chan).getReg(); 11829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard else 11929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W; 12029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 12129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY, 12229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); 12329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 12429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 12529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 12629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 12729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan >= 2) 12829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_MASK); 12929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 13029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 13129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 13229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 13329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 13429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 13529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 136f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 13729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_PAIR_ZW: { 13829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 13929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 14029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(2).getImm()); 14129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 14229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 14329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg; 14429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 14529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 14629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y; 14729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard else 14829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = MI.getOperand(Chan-2).getReg(); 14929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 15029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW, 15129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); 15229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 15329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 15429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 15529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 15629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 15729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_MASK); 15829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 15929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 16029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 16129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 16229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 16329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 16429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 16529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 16629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_VEC_LOAD: { 16729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 16829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 16929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 17029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(1).getImm()); 17129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 17229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 17329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 17429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0, 17529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg); 17629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 17729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 17829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 17929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 18029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 18129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 18229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 18329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 18429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 18529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 1864ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune case AMDGPU::DOT_4: { 1874ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 1884ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune const R600RegisterInfo &TRI = TII->getRegisterInfo(); 1894ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 1904ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned DstReg = MI.getOperand(0).getReg(); 1914ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 1924ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 1934ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune for (unsigned Chan = 0; Chan < 4; ++Chan) { 1944ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune bool Mask = (Chan != TRI.getHWRegChan(DstReg)); 1954ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned SubDstReg = 1964ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 1974ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune MachineInstr *BMI = 1984ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg); 1994ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Chan > 0) { 2004ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune BMI->bundleWithPred(); 2014ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2024ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Mask) { 2034ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->addFlag(BMI, 0, MO_FLAG_MASK); 2044ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2054ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Chan != 3) 2064ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 2074ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Opcode = BMI->getOpcode(); 2084ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune // While not strictly necessary from hw point of view, we force 2094ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune // all src operands of a dot4 inst to belong to the same slot. 2104ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Src0 = BMI->getOperand( 2115e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) 2124ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune .getReg(); 2134ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Src1 = BMI->getOperand( 2145e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) 2154ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune .getReg(); 216de2b854310f0a359224bfcb0fc7e4ed6339d60ecRafael Espindola (void) Src0; 217de2b854310f0a359224bfcb0fc7e4ed6339d60ecRafael Espindola (void) Src1; 218e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && 219e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune (TRI.getEncodingValue(Src1) & 0xff) < 127) 220e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1)); 2214ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2224ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune MI.eraseFromParent(); 2234ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune continue; 2244ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 22529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsReduction = TII->isReductionOp(MI.getOpcode()); 228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsVector = TII->isVector(MI); 229f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsCube = TII->isCubeOp(MI.getOpcode()); 230f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsReduction && !IsVector && !IsCube) { 231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 233f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand the instruction 235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 236f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Reduction instructions: 237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = DP4 T1_XYZW, T2_XYZW 238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 239f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = DP4 T1_X, T2_X 240f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Y (write masked) = DP4 T1_Y, T2_Y 241f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Z (write masked) = DP4 T1_Z, T2_Z 242f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_W (write masked) = DP4 T1_W, T2_W 243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Vector instructions: 245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 246f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y (write masked) = MULLO_INT T1_X, T2_X 249f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z (write masked) = MULLO_INT T1_X, T2_X 250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W (write masked) = MULLO_INT T1_X, T2_X 251f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 252f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Cube instructions: 253f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_XYZW = CUBE T1_XYZW 254f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 255f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = CUBE T1_Z, T1_Y 256f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y = CUBE T1_Z, T1_X 257f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z = CUBE T1_X, T1_Z 258f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W = CUBE T1_Y, T1_Z 259f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned Chan = 0; Chan < 4; Chan++) { 260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstReg = MI.getOperand( 2615e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); 262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src0 = MI.getOperand( 2635e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); 264f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src1 = 0; 265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct source registers 267f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsCube) { 2685e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); 269f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Src1Idx != -1) { 270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = MI.getOperand(Src1Idx).getReg(); 271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 273f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsReduction) { 274f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex); 276f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src1, SubRegIndex); 277f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (IsCube) { 278f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static const int CubeSrcSwz[] = {2, 2, 0, 1}; 279f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 280f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 281f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src0, SubRegIndex1); 282f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex0); 283f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 284f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 285f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct destination registers; 286f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool Mask = false; 287f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool NotLast = true; 288f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsCube) { 289f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 290f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = TRI.getSubReg(DstReg, SubRegIndex); 291f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 292f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Mask the write if the original instruction does not write to 293f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // the current Channel. 294f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Mask = (Chan != TRI.getHWRegChan(DstReg)); 295f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 296f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 297f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 298f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 299f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Set the IsLast bit 300f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard NotLast = (Chan != 3 ); 301f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 302f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Add the new instruction 303f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Opcode = MI.getOpcode(); 304f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (Opcode) { 305f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_r600_pseudo: 306f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_r600_real; 307f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 308f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_eg_pseudo: 309f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_eg_real; 310f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 311f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: 312f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 313f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 314f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 315f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *NewMI = 316f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); 317f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 3184397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen if (Chan != 0) 3194397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen NewMI->bundleWithPred(); 320f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Mask) { 321f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_MASK); 322f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 323f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (NotLast) { 324f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); 325f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 326f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 327f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 328f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 329f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 330f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 331f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 332