R600ExpandSpecialInstrs.cpp revision 3a7a56e7aa56bc6cb847c241ef6bd749713ae6e1
182a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 282a5d0c64142990236b40567561b6e99b7158216Tom Stellard// 382a5d0c64142990236b40567561b6e99b7158216Tom Stellard// The LLVM Compiler Infrastructure 482a5d0c64142990236b40567561b6e99b7158216Tom Stellard// 582a5d0c64142990236b40567561b6e99b7158216Tom Stellard// This file is distributed under the University of Illinois Open Source 682a5d0c64142990236b40567561b6e99b7158216Tom Stellard// License. See LICENSE.TXT for details. 782a5d0c64142990236b40567561b6e99b7158216Tom Stellard// 882a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===// 982a5d0c64142990236b40567561b6e99b7158216Tom Stellard// Vector, Reduction, and Cube instructions need to fill the entire instruction 1082a5d0c64142990236b40567561b6e99b7158216Tom Stellard// group to work correctly. This pass expands these individual instructions 1182a5d0c64142990236b40567561b6e99b7158216Tom Stellard// into several instructions that will completely fill the instruction group. 1282a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===// 1382a5d0c64142990236b40567561b6e99b7158216Tom Stellard 1482a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "AMDGPU.h" 1582a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600InstrInfo.h" 1682a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600RegisterInfo.h" 1782a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h" 1882a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 1982a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 2082a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2182a5d0c64142990236b40567561b6e99b7158216Tom Stellardusing namespace llvm; 2282a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2382a5d0c64142990236b40567561b6e99b7158216Tom Stellardnamespace { 2482a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2582a5d0c64142990236b40567561b6e99b7158216Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 2682a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2782a5d0c64142990236b40567561b6e99b7158216Tom Stellardprivate: 2882a5d0c64142990236b40567561b6e99b7158216Tom Stellard static char ID; 2982a5d0c64142990236b40567561b6e99b7158216Tom Stellard const R600InstrInfo *TII; 3082a5d0c64142990236b40567561b6e99b7158216Tom Stellard 3182a5d0c64142990236b40567561b6e99b7158216Tom Stellardpublic: 3282a5d0c64142990236b40567561b6e99b7158216Tom Stellard R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 3382a5d0c64142990236b40567561b6e99b7158216Tom Stellard TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } 3482a5d0c64142990236b40567561b6e99b7158216Tom Stellard 3582a5d0c64142990236b40567561b6e99b7158216Tom Stellard virtual bool runOnMachineFunction(MachineFunction &MF); 3682a5d0c64142990236b40567561b6e99b7158216Tom Stellard 3782a5d0c64142990236b40567561b6e99b7158216Tom Stellard const char *getPassName() const { 3882a5d0c64142990236b40567561b6e99b7158216Tom Stellard return "R600 Expand special instructions pass"; 3982a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 4082a5d0c64142990236b40567561b6e99b7158216Tom Stellard}; 4182a5d0c64142990236b40567561b6e99b7158216Tom Stellard 4282a5d0c64142990236b40567561b6e99b7158216Tom Stellard} // End anonymous namespace 4382a5d0c64142990236b40567561b6e99b7158216Tom Stellard 4482a5d0c64142990236b40567561b6e99b7158216Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0; 4582a5d0c64142990236b40567561b6e99b7158216Tom Stellard 4682a5d0c64142990236b40567561b6e99b7158216Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 4782a5d0c64142990236b40567561b6e99b7158216Tom Stellard return new R600ExpandSpecialInstrsPass(TM); 4882a5d0c64142990236b40567561b6e99b7158216Tom Stellard} 4982a5d0c64142990236b40567561b6e99b7158216Tom Stellard 5082a5d0c64142990236b40567561b6e99b7158216Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 5182a5d0c64142990236b40567561b6e99b7158216Tom Stellard 5282a5d0c64142990236b40567561b6e99b7158216Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 5382a5d0c64142990236b40567561b6e99b7158216Tom Stellard 5482a5d0c64142990236b40567561b6e99b7158216Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 5582a5d0c64142990236b40567561b6e99b7158216Tom Stellard BB != BB_E; ++BB) { 5682a5d0c64142990236b40567561b6e99b7158216Tom Stellard MachineBasicBlock &MBB = *BB; 5782a5d0c64142990236b40567561b6e99b7158216Tom Stellard MachineBasicBlock::iterator I = MBB.begin(); 5882a5d0c64142990236b40567561b6e99b7158216Tom Stellard while (I != MBB.end()) { 5982a5d0c64142990236b40567561b6e99b7158216Tom Stellard MachineInstr &MI = *I; 6082a5d0c64142990236b40567561b6e99b7158216Tom Stellard I = llvm::next(I); 6182a5d0c64142990236b40567561b6e99b7158216Tom Stellard 626c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard bool IsReduction = TII->isReductionOp(MI.getOpcode()); 636c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard bool IsVector = TII->isVector(MI); 641cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard bool IsCube = TII->isCubeOp(MI.getOpcode()); 651cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (!IsReduction && !IsVector && !IsCube) { 6682a5d0c64142990236b40567561b6e99b7158216Tom Stellard continue; 6782a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 6882a5d0c64142990236b40567561b6e99b7158216Tom Stellard 6982a5d0c64142990236b40567561b6e99b7158216Tom Stellard // Expand the instruction 706c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // 716c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // Reduction instructions: 726c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_X = DP4 T1_XYZW, T2_XYZW 736c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // becomes: 746c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_X = DP4 T1_X, T2_X 756c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_Y (write masked) = DP4 T1_Y, T2_Y 766c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_Z (write masked) = DP4 T1_Z, T2_Z 776c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_W (write masked) = DP4 T1_W, T2_W 786c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // 796c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // Vector instructions: 806c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 816c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // becomes: 826c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 836c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_Y (write masked) = MULLO_INT T1_X, T2_X 846c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_Z (write masked) = MULLO_INT T1_X, T2_X 856c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_W (write masked) = MULLO_INT T1_X, T2_X 861cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // 871cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Cube instructions: 881cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_XYZW = CUBE T1_XYZW 891cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // becomes: 901cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // TO_X = CUBE T1_Z, T1_Y 911cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_Y = CUBE T1_Z, T1_X 921cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_Z = CUBE T1_X, T1_Z 931cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_W = CUBE T1_Y, T1_Z 9482a5d0c64142990236b40567561b6e99b7158216Tom Stellard for (unsigned Chan = 0; Chan < 4; Chan++) { 9582a5d0c64142990236b40567561b6e99b7158216Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 9682a5d0c64142990236b40567561b6e99b7158216Tom Stellard unsigned Src0 = MI.getOperand(1).getReg(); 971cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned Src1 = 0; 981cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 991cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Determine the correct source registers 1001cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (!IsCube) { 1011cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Src1 = MI.getOperand(2).getReg(); 1021cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1036c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard if (IsReduction) { 1046c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 1056c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex); 1066c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard Src1 = TRI.getSubReg(Src1, SubRegIndex); 1071cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } else if (IsCube) { 1081cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard static const int CubeSrcSwz[] = {2, 2, 0, 1}; 1091cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 1101cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 1111cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Src1 = TRI.getSubReg(Src0, SubRegIndex1); 1121cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex0); 1131cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1141cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1151cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Determine the correct destination registers; 1161cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned Flags = 0; 1171cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (IsCube) { 1181cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 1191cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard DstReg = TRI.getSubReg(DstReg, SubRegIndex); 1201cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } else { 1211cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Mask the write if the original instruction does not write to 1221cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // the current Channel. 1231cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); 1241cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned DstBase = TRI.getHWRegIndex(DstReg); 1251cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 1266c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard } 1271cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1281cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Set the IsLast bit 12982a5d0c64142990236b40567561b6e99b7158216Tom Stellard Flags |= (Chan == 3 ? MO_FLAG_LAST : 0); 1301cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1311cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Add the new instruction 1321cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned Opcode; 1331cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (IsCube) { 1341cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard switch (MI.getOpcode()) { 1351cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard case AMDGPU::CUBE_r600_pseudo: 1361cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = AMDGPU::CUBE_r600_real; 1371cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard break; 1381cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard case AMDGPU::CUBE_eg_pseudo: 1391cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = AMDGPU::CUBE_eg_real; 1401cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard break; 1411cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard default: 1421cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard assert(!"Unknown CUBE instruction"); 1431cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = 0; 1441cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard break; 1451cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1461cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } else { 1471cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = MI.getOpcode(); 1481cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1493a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard MachineInstr *NewMI = 1503a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) 1513a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard .addReg(Src0) 1523a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard .addReg(Src1); 1533a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard 1543a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard NewMI->setIsInsideBundle(Chan != 0); 1553a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard TII->AddFlag(NewMI, 0, Flags); 15682a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 15782a5d0c64142990236b40567561b6e99b7158216Tom Stellard MI.eraseFromParent(); 15882a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 15982a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 16082a5d0c64142990236b40567561b6e99b7158216Tom Stellard return false; 16182a5d0c64142990236b40567561b6e99b7158216Tom Stellard} 162