182a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
282a5d0c64142990236b40567561b6e99b7158216Tom Stellard//
382a5d0c64142990236b40567561b6e99b7158216Tom Stellard//                     The LLVM Compiler Infrastructure
482a5d0c64142990236b40567561b6e99b7158216Tom Stellard//
582a5d0c64142990236b40567561b6e99b7158216Tom Stellard// This file is distributed under the University of Illinois Open Source
682a5d0c64142990236b40567561b6e99b7158216Tom Stellard// License. See LICENSE.TXT for details.
782a5d0c64142990236b40567561b6e99b7158216Tom Stellard//
882a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===//
982a5d0c64142990236b40567561b6e99b7158216Tom Stellard// Vector, Reduction, and Cube instructions need to fill the entire instruction
1082a5d0c64142990236b40567561b6e99b7158216Tom Stellard// group to work correctly.  This pass expands these individual instructions
1182a5d0c64142990236b40567561b6e99b7158216Tom Stellard// into several instructions that will completely fill the instruction group.
1282a5d0c64142990236b40567561b6e99b7158216Tom Stellard//===----------------------------------------------------------------------===//
1382a5d0c64142990236b40567561b6e99b7158216Tom Stellard
1482a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "AMDGPU.h"
1590bd1d52bbf95947955a66ec67f5f6c7dc87119aTom Stellard#include "R600Defines.h"
1682a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600InstrInfo.h"
1782a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "R600RegisterInfo.h"
1882a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineFunctionPass.h"
1982a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h"
2082a5d0c64142990236b40567561b6e99b7158216Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h"
2182a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2282a5d0c64142990236b40567561b6e99b7158216Tom Stellardusing namespace llvm;
2382a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2482a5d0c64142990236b40567561b6e99b7158216Tom Stellardnamespace {
2582a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2682a5d0c64142990236b40567561b6e99b7158216Tom Stellardclass R600ExpandSpecialInstrsPass : public MachineFunctionPass {
2782a5d0c64142990236b40567561b6e99b7158216Tom Stellard
2882a5d0c64142990236b40567561b6e99b7158216Tom Stellardprivate:
2982a5d0c64142990236b40567561b6e99b7158216Tom Stellard  static char ID;
3082a5d0c64142990236b40567561b6e99b7158216Tom Stellard  const R600InstrInfo *TII;
3182a5d0c64142990236b40567561b6e99b7158216Tom Stellard
3282a5d0c64142990236b40567561b6e99b7158216Tom Stellardpublic:
3382a5d0c64142990236b40567561b6e99b7158216Tom Stellard  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
3482a5d0c64142990236b40567561b6e99b7158216Tom Stellard    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
3582a5d0c64142990236b40567561b6e99b7158216Tom Stellard
3682a5d0c64142990236b40567561b6e99b7158216Tom Stellard  virtual bool runOnMachineFunction(MachineFunction &MF);
3782a5d0c64142990236b40567561b6e99b7158216Tom Stellard
3882a5d0c64142990236b40567561b6e99b7158216Tom Stellard  const char *getPassName() const {
3982a5d0c64142990236b40567561b6e99b7158216Tom Stellard    return "R600 Expand special instructions pass";
4082a5d0c64142990236b40567561b6e99b7158216Tom Stellard  }
4182a5d0c64142990236b40567561b6e99b7158216Tom Stellard};
4282a5d0c64142990236b40567561b6e99b7158216Tom Stellard
4382a5d0c64142990236b40567561b6e99b7158216Tom Stellard} // End anonymous namespace
4482a5d0c64142990236b40567561b6e99b7158216Tom Stellard
4582a5d0c64142990236b40567561b6e99b7158216Tom Stellardchar R600ExpandSpecialInstrsPass::ID = 0;
4682a5d0c64142990236b40567561b6e99b7158216Tom Stellard
4782a5d0c64142990236b40567561b6e99b7158216Tom StellardFunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
4882a5d0c64142990236b40567561b6e99b7158216Tom Stellard  return new R600ExpandSpecialInstrsPass(TM);
4982a5d0c64142990236b40567561b6e99b7158216Tom Stellard}
5082a5d0c64142990236b40567561b6e99b7158216Tom Stellard
5182a5d0c64142990236b40567561b6e99b7158216Tom Stellardbool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
5282a5d0c64142990236b40567561b6e99b7158216Tom Stellard
5382a5d0c64142990236b40567561b6e99b7158216Tom Stellard  const R600RegisterInfo &TRI = TII->getRegisterInfo();
5482a5d0c64142990236b40567561b6e99b7158216Tom Stellard
5582a5d0c64142990236b40567561b6e99b7158216Tom Stellard  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
5682a5d0c64142990236b40567561b6e99b7158216Tom Stellard                                                  BB != BB_E; ++BB) {
5782a5d0c64142990236b40567561b6e99b7158216Tom Stellard    MachineBasicBlock &MBB = *BB;
5882a5d0c64142990236b40567561b6e99b7158216Tom Stellard    MachineBasicBlock::iterator I = MBB.begin();
5982a5d0c64142990236b40567561b6e99b7158216Tom Stellard    while (I != MBB.end()) {
6082a5d0c64142990236b40567561b6e99b7158216Tom Stellard      MachineInstr &MI = *I;
6182a5d0c64142990236b40567561b6e99b7158216Tom Stellard      I = llvm::next(I);
6282a5d0c64142990236b40567561b6e99b7158216Tom Stellard
636c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      bool IsReduction = TII->isReductionOp(MI.getOpcode());
646c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      bool IsVector = TII->isVector(MI);
651cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard	    bool IsCube = TII->isCubeOp(MI.getOpcode());
661cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      if (!IsReduction && !IsVector && !IsCube) {
6782a5d0c64142990236b40567561b6e99b7158216Tom Stellard        continue;
6882a5d0c64142990236b40567561b6e99b7158216Tom Stellard      }
6982a5d0c64142990236b40567561b6e99b7158216Tom Stellard
7082a5d0c64142990236b40567561b6e99b7158216Tom Stellard      // Expand the instruction
716c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      //
726c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // Reduction instructions:
736c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_X = DP4 T1_XYZW, T2_XYZW
746c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // becomes:
756c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_X = DP4 T1_X, T2_X
766c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_Y (write masked) = DP4 T1_Y, T2_Y
776c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_Z (write masked) = DP4 T1_Z, T2_Z
786c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // TO_W (write masked) = DP4 T1_W, T2_W
796c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      //
806c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // Vector instructions:
816c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_X = MULLO_INT T1_X, T2_X
826c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // becomes:
836c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_X = MULLO_INT T1_X, T2_X
846c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
856c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
866c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard      // T0_W (write masked) = MULLO_INT T1_X, T2_X
871cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      //
881cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // Cube instructions:
891cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_XYZW = CUBE T1_XYZW
901cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // becomes:
911cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // TO_X = CUBE T1_Z, T1_Y
921cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_Y = CUBE T1_Z, T1_X
931cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_Z = CUBE T1_X, T1_Z
941cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard      // T0_W = CUBE T1_Y, T1_Z
9582a5d0c64142990236b40567561b6e99b7158216Tom Stellard      for (unsigned Chan = 0; Chan < 4; Chan++) {
9682a5d0c64142990236b40567561b6e99b7158216Tom Stellard        unsigned DstReg = MI.getOperand(0).getReg();
9782a5d0c64142990236b40567561b6e99b7158216Tom Stellard        unsigned Src0 = MI.getOperand(1).getReg();
981cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        unsigned Src1 = 0;
991cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1001cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Determine the correct source registers
1011cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        if (!IsCube) {
1021cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Src1 = MI.getOperand(2).getReg();
1031cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        }
1046c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard        if (IsReduction) {
1056c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
1066c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard          Src0 = TRI.getSubReg(Src0, SubRegIndex);
1076c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard          Src1 = TRI.getSubReg(Src1, SubRegIndex);
1081cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        } else if (IsCube) {
1091cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          static const int CubeSrcSwz[] = {2, 2, 0, 1};
1101cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
1111cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
1121cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
1131cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
1141cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        }
1151cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1161cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Determine the correct destination registers;
1171cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        unsigned Flags = 0;
1181cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        if (IsCube) {
1191cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
1201cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
1211cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        } else {
1221cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          // Mask the write if the original instruction does not write to
1231cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          // the current Channel.
1241cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
1251cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          unsigned DstBase = TRI.getHWRegIndex(DstReg);
1261cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
1276c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6Tom Stellard        }
1281cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1291cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Set the IsLast bit
13090bd1d52bbf95947955a66ec67f5f6c7dc87119aTom Stellard        Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
1311cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard
1321cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        // Add the new instruction
1331cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        unsigned Opcode;
1341cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        if (IsCube) {
1351cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          switch (MI.getOpcode()) {
1361cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          case AMDGPU::CUBE_r600_pseudo:
1371cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            Opcode = AMDGPU::CUBE_r600_real;
1381cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            break;
1391cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          case AMDGPU::CUBE_eg_pseudo:
1401cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            Opcode = AMDGPU::CUBE_eg_real;
1411cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            break;
1421cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          default:
1431cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            assert(!"Unknown CUBE instruction");
1441cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            Opcode = 0;
1451cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard            break;
1461cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          }
1471cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        } else {
1481cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard          Opcode = MI.getOpcode();
1491cb07bd3b8abd5e52e9dbd80bb1666058545387eTom Stellard        }
1503a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard        MachineInstr *NewMI =
1513a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
1523a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard                  .addReg(Src0)
15367a47a445b544ac638d10303dc697d70f25d12fbTom Stellard                  .addReg(Src1)
15467a47a445b544ac638d10303dc697d70f25d12fbTom Stellard                  .addImm(0); // Flag
1553a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard
1563a7a56e7aa56bc6cb847c241ef6bd749713ae6e1Tom Stellard        NewMI->setIsInsideBundle(Chan != 0);
1572ad8608cb3e6a8d2f375ad2295504167b082711fTom Stellard        TII->addFlag(NewMI, 0, Flags);
15882a5d0c64142990236b40567561b6e99b7158216Tom Stellard      }
15982a5d0c64142990236b40567561b6e99b7158216Tom Stellard      MI.eraseFromParent();
16082a5d0c64142990236b40567561b6e99b7158216Tom Stellard    }
16182a5d0c64142990236b40567561b6e99b7158216Tom Stellard  }
16282a5d0c64142990236b40567561b6e99b7158216Tom Stellard  return false;
16382a5d0c64142990236b40567561b6e99b7158216Tom Stellard}
164