182a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// 282a5d0c64142990236b40567561b6e99b7158216Tom Stellard// 382a5d0c64142990236b40567561b6e99b7158216Tom Stellard// The LLVM Compiler Infrastructure 482a5d0c64142990236b40567561b6e99b7158216Tom Stellard// 582a5d0c64142990236b40567561b6e99b7158216Tom Stellard// This file is distributed under the University of Illinois Open Source 682a5d0c64142990236b40567561b6e99b7158216Tom Stellard// License. See LICENSE.TXT for details. 782a5d0c64142990236b40567561b6e99b7158216Tom Stellard// 882a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===// 982a5d0c64142990236b40567561b6e99b7158216Tom Stellard// Vector, Reduction, and Cube instructions need to fill the entire instruction 1082a5d0c64142990236b40567561b6e99b7158216Tom Stellard// group to work correctly. This pass expands these individual instructions 1182a5d0c64142990236b40567561b6e99b7158216Tom Stellard// into several instructions that will completely fill the instruction group. 1282a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===// 1382a5d0c64142990236b40567561b6e99b7158216Tom Stellard 1482a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "AMDGPU.h" 1590bd1d52bbf95947955a66ec67f5f6c7dc87119aTom Stellard#include "R600Defines.h" 1682a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600InstrInfo.h" 1782a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600RegisterInfo.h" 1882a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h" 1982a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 2082a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 2182a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2282a5d0c64142990236b40567561b6e99b7158216Tom Stellardusing namespace llvm; 2382a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2482a5d0c64142990236b40567561b6e99b7158216Tom Stellardnamespace { 2582a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2682a5d0c64142990236b40567561b6e99b7158216Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass { 2782a5d0c64142990236b40567561b6e99b7158216Tom Stellard 2882a5d0c64142990236b40567561b6e99b7158216Tom Stellardprivate: 2982a5d0c64142990236b40567561b6e99b7158216Tom Stellard static char ID; 3082a5d0c64142990236b40567561b6e99b7158216Tom Stellard const R600InstrInfo *TII; 3182a5d0c64142990236b40567561b6e99b7158216Tom Stellard 3282a5d0c64142990236b40567561b6e99b7158216Tom Stellardpublic: 3382a5d0c64142990236b40567561b6e99b7158216Tom Stellard R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), 3482a5d0c64142990236b40567561b6e99b7158216Tom Stellard TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } 3582a5d0c64142990236b40567561b6e99b7158216Tom Stellard 3682a5d0c64142990236b40567561b6e99b7158216Tom Stellard virtual bool runOnMachineFunction(MachineFunction &MF); 3782a5d0c64142990236b40567561b6e99b7158216Tom Stellard 3882a5d0c64142990236b40567561b6e99b7158216Tom Stellard const char *getPassName() const { 3982a5d0c64142990236b40567561b6e99b7158216Tom Stellard return "R600 Expand special instructions pass"; 4082a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 4182a5d0c64142990236b40567561b6e99b7158216Tom Stellard}; 4282a5d0c64142990236b40567561b6e99b7158216Tom Stellard 4382a5d0c64142990236b40567561b6e99b7158216Tom Stellard} // End anonymous namespace 4482a5d0c64142990236b40567561b6e99b7158216Tom Stellard 4582a5d0c64142990236b40567561b6e99b7158216Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0; 4682a5d0c64142990236b40567561b6e99b7158216Tom Stellard 4782a5d0c64142990236b40567561b6e99b7158216Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { 4882a5d0c64142990236b40567561b6e99b7158216Tom Stellard return new R600ExpandSpecialInstrsPass(TM); 4982a5d0c64142990236b40567561b6e99b7158216Tom Stellard} 5082a5d0c64142990236b40567561b6e99b7158216Tom Stellard 5182a5d0c64142990236b40567561b6e99b7158216Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 5282a5d0c64142990236b40567561b6e99b7158216Tom Stellard 5382a5d0c64142990236b40567561b6e99b7158216Tom Stellard const R600RegisterInfo &TRI = TII->getRegisterInfo(); 5482a5d0c64142990236b40567561b6e99b7158216Tom Stellard 5582a5d0c64142990236b40567561b6e99b7158216Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 5682a5d0c64142990236b40567561b6e99b7158216Tom Stellard BB != BB_E; ++BB) { 5782a5d0c64142990236b40567561b6e99b7158216Tom Stellard MachineBasicBlock &MBB = *BB; 5882a5d0c64142990236b40567561b6e99b7158216Tom Stellard MachineBasicBlock::iterator I = MBB.begin(); 5982a5d0c64142990236b40567561b6e99b7158216Tom Stellard while (I != MBB.end()) { 6082a5d0c64142990236b40567561b6e99b7158216Tom Stellard MachineInstr &MI = *I; 6182a5d0c64142990236b40567561b6e99b7158216Tom Stellard I = llvm::next(I); 6282a5d0c64142990236b40567561b6e99b7158216Tom Stellard 636c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard bool IsReduction = TII->isReductionOp(MI.getOpcode()); 646c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard bool IsVector = TII->isVector(MI); 651cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard bool IsCube = TII->isCubeOp(MI.getOpcode()); 661cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (!IsReduction && !IsVector && !IsCube) { 6782a5d0c64142990236b40567561b6e99b7158216Tom Stellard continue; 6882a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 6982a5d0c64142990236b40567561b6e99b7158216Tom Stellard 7082a5d0c64142990236b40567561b6e99b7158216Tom Stellard // Expand the instruction 716c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // 726c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // Reduction instructions: 736c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_X = DP4 T1_XYZW, T2_XYZW 746c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // becomes: 756c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_X = DP4 T1_X, T2_X 766c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_Y (write masked) = DP4 T1_Y, T2_Y 776c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_Z (write masked) = DP4 T1_Z, T2_Z 786c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // TO_W (write masked) = DP4 T1_W, T2_W 796c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // 806c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // Vector instructions: 816c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 826c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // becomes: 836c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_X = MULLO_INT T1_X, T2_X 846c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_Y (write masked) = MULLO_INT T1_X, T2_X 856c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_Z (write masked) = MULLO_INT T1_X, T2_X 866c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard // T0_W (write masked) = MULLO_INT T1_X, T2_X 871cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // 881cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Cube instructions: 891cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_XYZW = CUBE T1_XYZW 901cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // becomes: 911cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // TO_X = CUBE T1_Z, T1_Y 921cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_Y = CUBE T1_Z, T1_X 931cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_Z = CUBE T1_X, T1_Z 941cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // T0_W = CUBE T1_Y, T1_Z 9582a5d0c64142990236b40567561b6e99b7158216Tom Stellard for (unsigned Chan = 0; Chan < 4; Chan++) { 9682a5d0c64142990236b40567561b6e99b7158216Tom Stellard unsigned DstReg = MI.getOperand(0).getReg(); 9782a5d0c64142990236b40567561b6e99b7158216Tom Stellard unsigned Src0 = MI.getOperand(1).getReg(); 981cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned Src1 = 0; 991cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1001cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Determine the correct source registers 1011cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (!IsCube) { 1021cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Src1 = MI.getOperand(2).getReg(); 1031cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1046c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard if (IsReduction) { 1056c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 1066c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex); 1076c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard Src1 = TRI.getSubReg(Src1, SubRegIndex); 1081cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } else if (IsCube) { 1091cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard static const int CubeSrcSwz[] = {2, 2, 0, 1}; 1101cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); 1111cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 1121cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Src1 = TRI.getSubReg(Src0, SubRegIndex1); 1131cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Src0 = TRI.getSubReg(Src0, SubRegIndex0); 1141cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1151cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1161cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Determine the correct destination registers; 1171cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned Flags = 0; 1181cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (IsCube) { 1191cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); 1201cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard DstReg = TRI.getSubReg(DstReg, SubRegIndex); 1211cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } else { 1221cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Mask the write if the original instruction does not write to 1231cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // the current Channel. 1241cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); 1251cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned DstBase = TRI.getHWRegIndex(DstReg); 1261cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 1276c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard } 1281cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1291cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Set the IsLast bit 13090bd1d52bbf95947955a66ec67f5f6c7dc87119aTom Stellard Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0); 1311cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard 1321cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard // Add the new instruction 1331cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard unsigned Opcode; 1341cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard if (IsCube) { 1351cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard switch (MI.getOpcode()) { 1361cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard case AMDGPU::CUBE_r600_pseudo: 1371cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = AMDGPU::CUBE_r600_real; 1381cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard break; 1391cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard case AMDGPU::CUBE_eg_pseudo: 1401cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = AMDGPU::CUBE_eg_real; 1411cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard break; 1421cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard default: 1431cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard assert(!"Unknown CUBE instruction"); 1441cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = 0; 1451cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard break; 1461cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1471cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } else { 1481cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard Opcode = MI.getOpcode(); 1491cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard } 1503a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard MachineInstr *NewMI = 1513a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) 1523a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard .addReg(Src0) 15367a47a445b544ac638d10303dc697d70f25d12fbTom Stellard .addReg(Src1) 15467a47a445b544ac638d10303dc697d70f25d12fbTom Stellard .addImm(0); // Flag 1553a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard 1563a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard NewMI->setIsInsideBundle(Chan != 0); 1572ad8608cb3e6a8d2f375ad2295504167b082711fTom Stellard TII->addFlag(NewMI, 0, Flags); 15882a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 15982a5d0c64142990236b40567561b6e99b7158216Tom Stellard MI.eraseFromParent(); 16082a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 16182a5d0c64142990236b40567561b6e99b7158216Tom Stellard } 16282a5d0c64142990236b40567561b6e99b7158216Tom Stellard return false; 16382a5d0c64142990236b40567561b6e99b7158216Tom Stellard} 164