R600ExpandSpecialInstrs.cpp revision 4397294e78dbfda8e812a63adaf6f27fca7868a3
1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// Vector, Reduction, and Cube instructions need to fill the entire instruction 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// group to work correctly. This pass expands these individual instructions 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// into several instructions that will completely fill the instruction group. 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h" 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600Defines.h" 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600InstrInfo.h" 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600RegisterInfo.h" 21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "R600MachineFunctionInfo.h" 22f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h" 23f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 24f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 25f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 26f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 27f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardnamespace { 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 31f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 32f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardprivate: 33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static char ID; 34f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600InstrInfo *TII; 35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool ExpandInputPerspective(MachineInstr& MI); 37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool ExpandInputConstant(MachineInstr& MI); 38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 39f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardpublic: 40f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 41f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } 42f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 43f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard virtual bool runOnMachineFunction(MachineFunction &MF); 44f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 45f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const char *getPassName() const { 46f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return "R600 Expand special instructions pass"; 47f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 48f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}; 49f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 50f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} // End anonymous namespace 51f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 52f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0; 53f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 54f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 55f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return new R600ExpandSpecialInstrsPass(TM); 56f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 57f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 58f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) { 59f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (MI.getOpcode() != AMDGPU::input_perspective) 61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 62f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 63f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock::iterator I = &MI; 64f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 65f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard R600MachineFunctionInfo *MFI = MI.getParent()->getParent() 66f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard ->getInfo<R600MachineFunctionInfo>(); 67f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned IJIndexBase; 68f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 69f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // In Evergreen ISA doc section 8.3.2 : 70f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // We need to interpolate XY and ZW in two different instruction groups. 71f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // An INTERP_* must occupy all 4 slots of an instruction group. 72f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Output of INTERP_XY is written in X,Y slots 73f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Output of INTERP_ZW is written in Z,W slots 74f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 75f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Thus interpolation requires the following sequences : 76f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 77f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // AnyGPR.x = INTERP_ZW; (Write Masked Out) 78f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // AnyGPR.y = INTERP_ZW; (Write Masked Out) 79f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // DstGPR.z = INTERP_ZW; 80f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // DstGPR.w = INTERP_ZW; (End of first IG) 81f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // DstGPR.x = INTERP_XY; 82f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // DstGPR.y = INTERP_XY; 83f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // AnyGPR.z = INTERP_XY; (Write Masked Out) 84f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) 85f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 86f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (MI.getOperand(1).getImm()) { 87f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 0: 88f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard IJIndexBase = MFI->GetIJPerspectiveIndex(); 89f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 90f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 1: 91f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard IJIndexBase = MFI->GetIJLinearIndex(); 92f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 93f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: 94f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard assert(0 && "Unknow ij index"); 95f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 96f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 97f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned i = 0; i < 8; i++) { 98f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( 99f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 2 * IJIndexBase + ((i + 1) % 2)); 100f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 101f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(2).getImm()); 102f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 103f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 104f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Sel = AMDGPU::sel_x; 105f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (i % 4) { 106f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 0:Sel = AMDGPU::sel_x;break; 107f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 1:Sel = AMDGPU::sel_y;break; 108f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 2:Sel = AMDGPU::sel_z;break; 109f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 3:Sel = AMDGPU::sel_w;break; 110f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default:break; 111f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 112f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 113f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Res = TRI.getSubReg(DstReg, Sel); 114f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 115f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY; 116f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 117f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *(MI.getParent()); 118f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *NewMI = 119f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg); 120f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 121f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!(i> 1 && i < 6)) { 122f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_MASK); 123f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 124f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 125f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (i % 4 != 3) 126f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); 127f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 128f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 129f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 130f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 131f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return true; 132f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 133f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 134f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) { 135f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 136f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (MI.getOpcode() != AMDGPU::input_constant) 137f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 138f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 139f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock::iterator I = &MI; 140f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 141f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 142f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned i = 0; i < 4; i++) { 143f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( 144f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(1).getImm()); 145f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 146f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Sel = AMDGPU::sel_x; 147f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (i % 4) { 148f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 0:Sel = AMDGPU::sel_x;break; 149f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 1:Sel = AMDGPU::sel_y;break; 150f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 2:Sel = AMDGPU::sel_z;break; 151f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case 3:Sel = AMDGPU::sel_w;break; 152f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default:break; 153f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 154f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 155f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Res = TRI.getSubReg(DstReg, Sel); 156f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 157f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *(MI.getParent()); 158f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *NewMI = TII->buildDefaultInstruction( 159f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg); 160f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 161f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (i % 4 != 3) 162f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); 163f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 164f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 165f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 166f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 167f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return true; 168f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 169f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 170f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 171f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 172f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 173f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 174f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 175f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BB != BB_E; ++BB) { 176f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *BB; 177f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock::iterator I = MBB.begin(); 178f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard while (I != MBB.end()) { 179f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr &MI = *I; 180f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard I = llvm::next(I); 181f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 182f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (MI.getOpcode()) { 183f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: break; 184f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand PRED_X to one of the PRED_SET instructions. 185f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::PRED_X: { 186f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard uint64_t Flags = MI.getOperand(3).getImm(); 187f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // The native opcode used by PRED_X is stored as an immediate in the 188f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // third operand. 189f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 190f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(2).getImm(), // opcode 191f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(0).getReg(), // dst 192f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.getOperand(1).getReg(), // src0 193f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO); // src1 194f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(PredSet, 0, MO_FLAG_MASK); 195f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Flags & MO_FLAG_PUSH) { 196f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); 197f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 198f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1); 199f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 200f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 201f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 202f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 203f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::BREAK: 204f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 205f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::PRED_SETE_INT, 206f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::PREDICATE_BIT, 207f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO, 208f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard AMDGPU::ZERO); 209f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(PredSet, 0, MO_FLAG_MASK); 210f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); 211f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 212f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BuildMI(MBB, I, MBB.findDebugLoc(I), 213f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->get(AMDGPU::PREDICATED_BREAK)) 214f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard .addReg(AMDGPU::PREDICATE_BIT); 215f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 216f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 217f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 218f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 219f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (ExpandInputPerspective(MI)) 220f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 221f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (ExpandInputConstant(MI)) 222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 223f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 224f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsReduction = TII->isReductionOp(MI.getOpcode()); 225f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsVector = TII->isVector(MI); 226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool IsCube = TII->isCubeOp(MI.getOpcode()); 227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsReduction && !IsVector && !IsCube) { 228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 229f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 230f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Expand the instruction 232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 233f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Reduction instructions: 234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = DP4 T1_XYZW, T2_XYZW 235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 236f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = DP4 T1_X, T2_X 237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Y (write masked) = DP4 T1_Y, T2_Y 238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_Z (write masked) = DP4 T1_Z, T2_Z 239f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_W (write masked) = DP4 T1_W, T2_W 240f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 241f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Vector instructions: 242f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y (write masked) = MULLO_INT T1_X, T2_X 246f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z (write masked) = MULLO_INT T1_X, T2_X 247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W (write masked) = MULLO_INT T1_X, T2_X 248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // 249f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Cube instructions: 250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_XYZW = CUBE T1_XYZW 251f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // becomes: 252f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // TO_X = CUBE T1_Z, T1_Y 253f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Y = CUBE T1_Z, T1_X 254f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_Z = CUBE T1_X, T1_Z 255f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // T0_W = CUBE T1_Y, T1_Z 256f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned Chan = 0; Chan < 4; Chan++) { 257f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstReg = MI.getOperand( 258f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->getOperandIdx(MI, R600Operands::DST)).getReg(); 259f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src0 = MI.getOperand( 260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->getOperandIdx(MI, R600Operands::SRC0)).getReg(); 261f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Src1 = 0; 262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 263f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct source registers 264f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!IsCube) { 265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1); 266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Src1Idx != -1) { 267f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = MI.getOperand(Src1Idx).getReg(); 268f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 269f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsReduction) { 271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex); 273f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src1, SubRegIndex); 274f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (IsCube) { 275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static const int CubeSrcSwz[] = {2, 2, 0, 1}; 276f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 277f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 278f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src1 = TRI.getSubReg(Src0, SubRegIndex1); 279f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex0); 280f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 281f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 282f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Determine the correct destination registers; 283f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool Mask = false; 284f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool NotLast = true; 285f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (IsCube) { 286f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 287f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = TRI.getSubReg(DstReg, SubRegIndex); 288f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 289f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Mask the write if the original instruction does not write to 290f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // the current Channel. 291f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Mask = (Chan != TRI.getHWRegChan(DstReg)); 292f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 293f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 294f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 295f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 296f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Set the IsLast bit 297f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard NotLast = (Chan != 3 ); 298f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 299f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // Add the new instruction 300f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned Opcode = MI.getOpcode(); 301f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (Opcode) { 302f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_r600_pseudo: 303f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_r600_real; 304f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 305f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::CUBE_eg_pseudo: 306f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::CUBE_eg_real; 307f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 308f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::DOT4_r600_pseudo: 309f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::DOT4_r600_real; 310f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 311f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::DOT4_eg_pseudo: 312f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard Opcode = AMDGPU::DOT4_eg_real; 313f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 314f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: 315f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 316f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 317f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 318f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr *NewMI = 319f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); 320f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 3214397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen if (Chan != 0) 3224397294e78dbfda8e812a63adaf6f27fca7868a3Jakob Stoklund Olesen NewMI->bundleWithPred(); 323f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (Mask) { 324f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_MASK); 325f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 326f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (NotLast) { 327f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); 328f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 329f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 330f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MI.eraseFromParent(); 331f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 332f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 333f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 334f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 335