1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// Vector, Reduction, and Cube instructions need to fill the entire instruction 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// group to work correctly. This pass expands these individual instructions 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// into several instructions that will completely fill the instruction group. 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h" 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600Defines.h" 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600InstrInfo.h" 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600MachineFunctionInfo.h" 2158a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "R600RegisterInfo.h" 2237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AMDGPUSubtarget.h" 23f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h" 24f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 25f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 26f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 27f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardnamespace { 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 31f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 32f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardprivate: 34f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static char ID; 35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600InstrInfo *TII; 36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI, 3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned Op); 39f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 40f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardpublic: 41f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 42dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines TII(nullptr) { } 43f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool runOnMachineFunction(MachineFunction &MF) override; 45f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const char *getPassName() const override { 47f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return "R600 Expand special instructions pass"; 48f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 49f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}; 50f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 51f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} // End anonymous namespace 52f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 53f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0; 54f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 55f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 56f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return new R600ExpandSpecialInstrsPass(TM); 57f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 58f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, 6036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MachineInstr *OldMI, unsigned Op) { 6136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int OpIdx = TII->getOperandIdx(*OldMI, Op); 6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (OpIdx > -1) { 6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t Val = OldMI->getOperand(OpIdx).getImm(); 6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TII->setImmOperand(NewMI, Op, Val); 6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 6636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 68f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 6937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); 70f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 71f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 72f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 73f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 74f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BB != BB_E; ++BB) { 75f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *BB; 76f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock::iterator I = MBB.begin(); 77f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard while (I != MBB.end()) { 78f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr &MI = *I; 7936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines I = std::next(I); 80f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 8119a99df130f5747da950faf4ca5170d71f05594cTom Stellard // Expand LDS_*_RET instructions 8219a99df130f5747da950faf4ca5170d71f05594cTom Stellard if (TII->isLDSRetInstr(MI.getOpcode())) { 8319a99df130f5747da950faf4ca5170d71f05594cTom Stellard int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); 8419a99df130f5747da950faf4ca5170d71f05594cTom Stellard assert(DstIdx != -1); 8519a99df130f5747da950faf4ca5170d71f05594cTom Stellard MachineOperand &DstOp = MI.getOperand(DstIdx); 8619a99df130f5747da950faf4ca5170d71f05594cTom Stellard MachineInstr *Mov = TII->buildMovInstr(&MBB, I, 8719a99df130f5747da950faf4ca5170d71f05594cTom Stellard DstOp.getReg(), AMDGPU::OQAP); 8819a99df130f5747da950faf4ca5170d71f05594cTom Stellard DstOp.setReg(AMDGPU::OQAP); 8919a99df130f5747da950faf4ca5170d71f05594cTom Stellard int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(), 9019a99df130f5747da950faf4ca5170d71f05594cTom Stellard AMDGPU::OpName::pred_sel); 9119a99df130f5747da950faf4ca5170d71f05594cTom Stellard int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(), 9219a99df130f5747da950faf4ca5170d71f05594cTom Stellard AMDGPU::OpName::pred_sel); 9319a99df130f5747da950faf4ca5170d71f05594cTom Stellard // Copy the pred_sel bit 9419a99df130f5747da950faf4ca5170d71f05594cTom Stellard Mov->getOperand(MovPredSelIdx).setReg( 9519a99df130f5747da950faf4ca5170d71f05594cTom Stellard MI.getOperand(LDSPredSelIdx).getReg()); 9619a99df130f5747da950faf4ca5170d71f05594cTom Stellard } 9719a99df130f5747da950faf4ca5170d71f05594cTom Stellard 98f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (MI.getOpcode()) { 99f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: break; 100f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand PRED_X to one of the PRED_SET instructions. 101f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::PRED_X: { 102f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard uint64_t Flags = MI.getOperand(3).getImm(); 103f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // The native opcode used by PRED_X is stored as an immediate in the 104f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // third operand. 105f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 106f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(2).getImm(), // opcode 107f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(0).getReg(), // dst 108f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(1).getReg(), // src0 109f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO); // src1 110f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(PredSet, 0, MO_FLAG_MASK); 111f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Flags & MO_FLAG_PUSH) { 1125e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); 113f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 1145e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1); 115f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 116f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 117f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 118f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 11929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 12029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_PAIR_XY: { 12129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 12229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 12329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(2).getImm()); 12429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 12529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 12629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg; 12729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 12829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 12929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = MI.getOperand(Chan).getReg(); 13029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard else 13129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W; 13229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 13329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY, 13429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); 13529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 13629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 13729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 13829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 13929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan >= 2) 14029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_MASK); 14129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 14229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 14329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 14429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 14529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 14629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 14729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 148f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 14929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_PAIR_ZW: { 15029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 15129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 15229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(2).getImm()); 15329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 15429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 15529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg; 15629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 15729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 15829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y; 15929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard else 16029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg = MI.getOperand(Chan-2).getReg(); 16129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 16229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW, 16329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); 16429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 16529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 16629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 16729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 16829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan < 2) 16929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_MASK); 17029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 17129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 17229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 17329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 17429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 17529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 17629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 17729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 17829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard case AMDGPU::INTERP_VEC_LOAD: { 17929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 18029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MachineInstr *BMI; 18129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 18229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.getOperand(1).getImm()); 18329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 18429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 18529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard for (unsigned Chan = 0; Chan < 4; ++Chan) { 18629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0, 18729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg); 18829b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan > 0) { 18929b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard BMI->bundleWithPred(); 19029b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 19129b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard if (Chan != 3) 19229b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 19329b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 19429b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard 19529b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard MI.eraseFromParent(); 19629b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard continue; 19729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 1984ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune case AMDGPU::DOT_4: { 1994ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 2004ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune const R600RegisterInfo &TRI = TII->getRegisterInfo(); 2014ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 2024ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned DstReg = MI.getOperand(0).getReg(); 2034ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 2044ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune 2054ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune for (unsigned Chan = 0; Chan < 4; ++Chan) { 2064ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune bool Mask = (Chan != TRI.getHWRegChan(DstReg)); 2074ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned SubDstReg = 2084ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 2094ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune MachineInstr *BMI = 2104ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg); 2114ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Chan > 0) { 2124ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune BMI->bundleWithPred(); 2134ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2144ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Mask) { 2154ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->addFlag(BMI, 0, MO_FLAG_MASK); 2164ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2174ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (Chan != 3) 2184ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); 2194ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Opcode = BMI->getOpcode(); 2204ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune // While not strictly necessary from hw point of view, we force 2214ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune // all src operands of a dot4 inst to belong to the same slot. 2224ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Src0 = BMI->getOperand( 2235e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) 2244ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune .getReg(); 2254ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune unsigned Src1 = BMI->getOperand( 2265e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) 2274ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune .getReg(); 228de2b854310f0a359224bfcb0fc7e4ed6339d60ecRafael Espindola (void) Src0; 229de2b854310f0a359224bfcb0fc7e4ed6339d60ecRafael Espindola (void) Src1; 230e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && 231e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune (TRI.getEncodingValue(Src1) & 0xff) < 127) 232e67a4afb5da59c02338622eea68e096ba143113fVincent Lejeune assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1)); 2334ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 2344ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune MI.eraseFromParent(); 2354ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune continue; 2364ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune } 23729b15a378045762ce09642ab9dd741ece41f59a3Tom Stellard } 238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 239f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsReduction = TII->isReductionOp(MI.getOpcode()); 240f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsVector = TII->isVector(MI); 241f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsCube = TII->isCubeOp(MI.getOpcode()); 242f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsReduction && !IsVector && !IsCube) { 243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 246f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand the instruction 247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Reduction instructions: 249f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = DP4 T1_XYZW, T2_XYZW 250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 251f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = DP4 T1_X, T2_X 252f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Y (write masked) = DP4 T1_Y, T2_Y 253f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Z (write masked) = DP4 T1_Z, T2_Z 254f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_W (write masked) = DP4 T1_W, T2_W 255f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 256f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Vector instructions: 257f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 258f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 259f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y (write masked) = MULLO_INT T1_X, T2_X 261f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z (write masked) = MULLO_INT T1_X, T2_X 262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W (write masked) = MULLO_INT T1_X, T2_X 263f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 264f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Cube instructions: 265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_XYZW = CUBE T1_XYZW 266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 267f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = CUBE T1_Z, T1_Y 268f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y = CUBE T1_Z, T1_X 269f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z = CUBE T1_X, T1_Z 270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W = CUBE T1_Y, T1_Z 271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned Chan = 0; Chan < 4; Chan++) { 272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstReg = MI.getOperand( 2735e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); 274f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src0 = MI.getOperand( 2755e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); 276f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src1 = 0; 277f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 278f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct source registers 279f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsCube) { 2805e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); 281f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Src1Idx != -1) { 282f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = MI.getOperand(Src1Idx).getReg(); 283f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 284f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 285f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsReduction) { 286f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 287f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex); 288f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src1, SubRegIndex); 289f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (IsCube) { 290f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static const int CubeSrcSwz[] = {2, 2, 0, 1}; 291f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 292f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 293f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src0, SubRegIndex1); 294f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex0); 295f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 296f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 297f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct destination registers; 298f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool Mask = false; 299f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool NotLast = true; 300f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsCube) { 301f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 302f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = TRI.getSubReg(DstReg, SubRegIndex); 303f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 304f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Mask the write if the original instruction does not write to 305f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // the current Channel. 306f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Mask = (Chan != TRI.getHWRegChan(DstReg)); 307f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 308f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 309f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 310f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 311f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Set the IsLast bit 312f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard NotLast = (Chan != 3 ); 313f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 314f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Add the new instruction 315f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Opcode = MI.getOpcode(); 316f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (Opcode) { 317f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_r600_pseudo: 318f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_r600_real; 319f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 320f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_eg_pseudo: 321f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_eg_real; 322f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 323f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: 324f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 325f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 326f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 327f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *NewMI = 328f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); 329f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 3304397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen if (Chan != 0) 3314397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen NewMI->bundleWithPred(); 332f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Mask) { 333f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_MASK); 334f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 335f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (NotLast) { 336f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); 337f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 33836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp); 33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal); 34036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs); 34136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs); 34236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg); 34336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg); 344f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 345f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 346f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 347f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 348f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 349f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 350