R600ExpandSpecialInstrs.cpp revision 3a7a56e7aa56bc6cb847c241ef6bd749713ae6e1
182a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
282a5d0c64142990236b40567561b6e99b7158216Tom Stellard//
382a5d0c64142990236b40567561b6e99b7158216Tom Stellard//                     The LLVM Compiler Infrastructure
482a5d0c64142990236b40567561b6e99b7158216Tom Stellard//
582a5d0c64142990236b40567561b6e99b7158216Tom Stellard// This file is distributed under the University of Illinois Open Source
682a5d0c64142990236b40567561b6e99b7158216Tom Stellard// License. See LICENSE.TXT for details.
782a5d0c64142990236b40567561b6e99b7158216Tom Stellard//
882a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===//
982a5d0c64142990236b40567561b6e99b7158216Tom Stellard// Vector, Reduction, and Cube instructions need to fill the entire instruction
1082a5d0c64142990236b40567561b6e99b7158216Tom Stellard// group to work correctly.  This pass expands these individual instructions
1182a5d0c64142990236b40567561b6e99b7158216Tom Stellard// into several instructions that will completely fill the instruction group.
1282a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===//
1382a5d0c64142990236b40567561b6e99b7158216Tom Stellard
1482a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "AMDGPU.h"
1582a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600InstrInfo.h"
1682a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600RegisterInfo.h"
1782a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h"
1882a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h"
1982a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h"
2082a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2182a5d0c64142990236b40567561b6e99b7158216Tom Stellardusing namespace llvm;
2282a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2382a5d0c64142990236b40567561b6e99b7158216Tom Stellardnamespace {
2482a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2582a5d0c64142990236b40567561b6e99b7158216Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass {
2682a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2782a5d0c64142990236b40567561b6e99b7158216Tom Stellardprivate:
2882a5d0c64142990236b40567561b6e99b7158216Tom Stellard  static char ID;
2982a5d0c64142990236b40567561b6e99b7158216Tom Stellard  const R600InstrInfo *TII;
3082a5d0c64142990236b40567561b6e99b7158216Tom Stellard
3182a5d0c64142990236b40567561b6e99b7158216Tom Stellardpublic:
3282a5d0c64142990236b40567561b6e99b7158216Tom Stellard  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
3382a5d0c64142990236b40567561b6e99b7158216Tom Stellard    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
3482a5d0c64142990236b40567561b6e99b7158216Tom Stellard
3582a5d0c64142990236b40567561b6e99b7158216Tom Stellard  virtual bool runOnMachineFunction(MachineFunction &MF);
3682a5d0c64142990236b40567561b6e99b7158216Tom Stellard
3782a5d0c64142990236b40567561b6e99b7158216Tom Stellard  const char *getPassName() const {
3882a5d0c64142990236b40567561b6e99b7158216Tom Stellard    return "R600 Expand special instructions pass";
3982a5d0c64142990236b40567561b6e99b7158216Tom Stellard  }
4082a5d0c64142990236b40567561b6e99b7158216Tom Stellard};
4182a5d0c64142990236b40567561b6e99b7158216Tom Stellard
4282a5d0c64142990236b40567561b6e99b7158216Tom Stellard} // End anonymous namespace
4382a5d0c64142990236b40567561b6e99b7158216Tom Stellard
4482a5d0c64142990236b40567561b6e99b7158216Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0;
4582a5d0c64142990236b40567561b6e99b7158216Tom Stellard
4682a5d0c64142990236b40567561b6e99b7158216Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
4782a5d0c64142990236b40567561b6e99b7158216Tom Stellard  return new R600ExpandSpecialInstrsPass(TM);
4882a5d0c64142990236b40567561b6e99b7158216Tom Stellard}
4982a5d0c64142990236b40567561b6e99b7158216Tom Stellard
5082a5d0c64142990236b40567561b6e99b7158216Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
5182a5d0c64142990236b40567561b6e99b7158216Tom Stellard
5282a5d0c64142990236b40567561b6e99b7158216Tom Stellard  const R600RegisterInfo &TRI = TII->getRegisterInfo();
5382a5d0c64142990236b40567561b6e99b7158216Tom Stellard
5482a5d0c64142990236b40567561b6e99b7158216Tom Stellard  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
5582a5d0c64142990236b40567561b6e99b7158216Tom Stellard                                                  BB != BB_E; ++BB) {
5682a5d0c64142990236b40567561b6e99b7158216Tom Stellard    MachineBasicBlock &MBB = *BB;
5782a5d0c64142990236b40567561b6e99b7158216Tom Stellard    MachineBasicBlock::iterator I = MBB.begin();
5882a5d0c64142990236b40567561b6e99b7158216Tom Stellard    while (I != MBB.end()) {
5982a5d0c64142990236b40567561b6e99b7158216Tom Stellard      MachineInstr &MI = *I;
6082a5d0c64142990236b40567561b6e99b7158216Tom Stellard      I = llvm::next(I);
6182a5d0c64142990236b40567561b6e99b7158216Tom Stellard
626c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      bool IsReduction = TII->isReductionOp(MI.getOpcode());
636c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      bool IsVector = TII->isVector(MI);
641cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard	    bool IsCube = TII->isCubeOp(MI.getOpcode());
651cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      if (!IsReduction && !IsVector && !IsCube) {
6682a5d0c64142990236b40567561b6e99b7158216Tom Stellard        continue;
6782a5d0c64142990236b40567561b6e99b7158216Tom Stellard      }
6882a5d0c64142990236b40567561b6e99b7158216Tom Stellard
6982a5d0c64142990236b40567561b6e99b7158216Tom Stellard      // Expand the instruction
706c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      //
716c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // Reduction instructions:
726c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_X = DP4 T1_XYZW, T2_XYZW
736c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // becomes:
746c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_X = DP4 T1_X, T2_X
756c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_Y (write masked) = DP4 T1_Y, T2_Y
766c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_Z (write masked) = DP4 T1_Z, T2_Z
776c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_W (write masked) = DP4 T1_W, T2_W
786c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      //
796c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // Vector instructions:
806c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_X = MULLO_INT T1_X, T2_X
816c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // becomes:
826c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_X = MULLO_INT T1_X, T2_X
836c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
846c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
856c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_W (write masked) = MULLO_INT T1_X, T2_X
861cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      //
871cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // Cube instructions:
881cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_XYZW = CUBE T1_XYZW
891cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // becomes:
901cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // TO_X = CUBE T1_Z, T1_Y
911cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_Y = CUBE T1_Z, T1_X
921cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_Z = CUBE T1_X, T1_Z
931cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_W = CUBE T1_Y, T1_Z
9482a5d0c64142990236b40567561b6e99b7158216Tom Stellard      for (unsigned Chan = 0; Chan < 4; Chan++) {
9582a5d0c64142990236b40567561b6e99b7158216Tom Stellard        unsigned DstReg = MI.getOperand(0).getReg();
9682a5d0c64142990236b40567561b6e99b7158216Tom Stellard        unsigned Src0 = MI.getOperand(1).getReg();
971cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        unsigned Src1 = 0;
981cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
991cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Determine the correct source registers
1001cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        if (!IsCube) {
1011cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Src1 = MI.getOperand(2).getReg();
1021cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        }
1036c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard        if (IsReduction) {
1046c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
1056c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard          Src0 = TRI.getSubReg(Src0, SubRegIndex);
1066c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard          Src1 = TRI.getSubReg(Src1, SubRegIndex);
1071cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        } else if (IsCube) {
1081cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          static const int CubeSrcSwz[] = {2, 2, 0, 1};
1091cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
1101cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
1111cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
1121cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
1131cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        }
1141cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1151cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Determine the correct destination registers;
1161cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        unsigned Flags = 0;
1171cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        if (IsCube) {
1181cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
1191cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
1201cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        } else {
1211cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          // Mask the write if the original instruction does not write to
1221cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          // the current Channel.
1231cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
1241cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned DstBase = TRI.getHWRegIndex(DstReg);
1251cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
1266c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard        }
1271cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1281cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Set the IsLast bit
12982a5d0c64142990236b40567561b6e99b7158216Tom Stellard        Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
1301cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1311cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Add the new instruction
1321cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        unsigned Opcode;
1331cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        if (IsCube) {
1341cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          switch (MI.getOpcode()) {
1351cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          case AMDGPU::CUBE_r600_pseudo:
1361cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            Opcode = AMDGPU::CUBE_r600_real;
1371cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            break;
1381cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          case AMDGPU::CUBE_eg_pseudo:
1391cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            Opcode = AMDGPU::CUBE_eg_real;
1401cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            break;
1411cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          default:
1421cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            assert(!"Unknown CUBE instruction");
1431cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            Opcode = 0;
1441cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            break;
1451cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          }
1461cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        } else {
1471cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Opcode = MI.getOpcode();
1481cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        }
1493a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard        MachineInstr *NewMI =
1503a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
1513a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard                  .addReg(Src0)
1523a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard                  .addReg(Src1);
1533a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard
1543a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard        NewMI->setIsInsideBundle(Chan != 0);
1553a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard        TII->AddFlag(NewMI, 0, Flags);
15682a5d0c64142990236b40567561b6e99b7158216Tom Stellard      }
15782a5d0c64142990236b40567561b6e99b7158216Tom Stellard      MI.eraseFromParent();
15882a5d0c64142990236b40567561b6e99b7158216Tom Stellard    }
15982a5d0c64142990236b40567561b6e99b7158216Tom Stellard  }
16082a5d0c64142990236b40567561b6e99b7158216Tom Stellard  return false;
16182a5d0c64142990236b40567561b6e99b7158216Tom Stellard}
162