1//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12/// This pass is merging consecutive CFAlus where applicable.
13/// It needs to be called after IfCvt for best results.
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "R600Defines.h"
19#include "R600InstrInfo.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "r600mergeclause"
31
32namespace {
33
34static bool isCFAlu(const MachineInstr &MI) {
35  switch (MI.getOpcode()) {
36  case AMDGPU::CF_ALU:
37  case AMDGPU::CF_ALU_PUSH_BEFORE:
38    return true;
39  default:
40    return false;
41  }
42}
43
44class R600ClauseMergePass : public MachineFunctionPass {
45
46private:
47  static char ID;
48  const R600InstrInfo *TII;
49
50  unsigned getCFAluSize(const MachineInstr &MI) const;
51  bool isCFAluEnabled(const MachineInstr &MI) const;
52
53  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54  /// removed and their content affected to the previous alu clause.
55  /// This function parse instructions after CFAlu until it find a disabled
56  /// CFAlu and merge the content, or an enabled CFAlu.
57  void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58
59  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60  /// it is the case.
61  bool mergeIfPossible(MachineInstr &RootCFAlu,
62                       const MachineInstr &LatrCFAlu) const;
63
64public:
65  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66
67  bool runOnMachineFunction(MachineFunction &MF) override;
68
69  const char *getPassName() const override;
70};
71
72char R600ClauseMergePass::ID = 0;
73
74unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
75  assert(isCFAlu(MI));
76  return MI
77      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
78      .getImm();
79}
80
81bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
82  assert(isCFAlu(MI));
83  return MI
84      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
85      .getImm();
86}
87
88void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
89    MachineInstr &CFAlu) const {
90  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
91  MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
92  I++;
93  do {
94    while (I != E && !isCFAlu(*I))
95      I++;
96    if (I == E)
97      return;
98    MachineInstr &MI = *I++;
99    if (isCFAluEnabled(MI))
100      break;
101    CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
102    MI.eraseFromParent();
103  } while (I != E);
104}
105
106bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
107                                          const MachineInstr &LatrCFAlu) const {
108  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
109  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
110  unsigned RootInstCount = getCFAluSize(RootCFAlu),
111      LaterInstCount = getCFAluSize(LatrCFAlu);
112  unsigned CumuledInsts = RootInstCount + LaterInstCount;
113  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
114    DEBUG(dbgs() << "Excess inst counts\n");
115    return false;
116  }
117  if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
118    return false;
119  // Is KCache Bank 0 compatible ?
120  int Mode0Idx =
121      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
122  int KBank0Idx =
123      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
124  int KBank0LineIdx =
125      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
126  if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
127      RootCFAlu.getOperand(Mode0Idx).getImm() &&
128      (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
129           RootCFAlu.getOperand(KBank0Idx).getImm() ||
130       LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
131           RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
132    DEBUG(dbgs() << "Wrong KC0\n");
133    return false;
134  }
135  // Is KCache Bank 1 compatible ?
136  int Mode1Idx =
137      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
138  int KBank1Idx =
139      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
140  int KBank1LineIdx =
141      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
142  if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
143      RootCFAlu.getOperand(Mode1Idx).getImm() &&
144      (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
145           RootCFAlu.getOperand(KBank1Idx).getImm() ||
146       LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
147           RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
148    DEBUG(dbgs() << "Wrong KC0\n");
149    return false;
150  }
151  if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
152    RootCFAlu.getOperand(Mode0Idx).setImm(
153        LatrCFAlu.getOperand(Mode0Idx).getImm());
154    RootCFAlu.getOperand(KBank0Idx).setImm(
155        LatrCFAlu.getOperand(KBank0Idx).getImm());
156    RootCFAlu.getOperand(KBank0LineIdx)
157        .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
158  }
159  if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
160    RootCFAlu.getOperand(Mode1Idx).setImm(
161        LatrCFAlu.getOperand(Mode1Idx).getImm());
162    RootCFAlu.getOperand(KBank1Idx).setImm(
163        LatrCFAlu.getOperand(KBank1Idx).getImm());
164    RootCFAlu.getOperand(KBank1LineIdx)
165        .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
166  }
167  RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
168  RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
169  return true;
170}
171
172bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
173  if (skipFunction(*MF.getFunction()))
174    return false;
175
176  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
177  TII = ST.getInstrInfo();
178
179  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180                                                  BB != BB_E; ++BB) {
181    MachineBasicBlock &MBB = *BB;
182    MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
183    MachineBasicBlock::iterator LatestCFAlu = E;
184    while (I != E) {
185      MachineInstr &MI = *I++;
186      if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187          TII->mustBeLastInClause(MI.getOpcode()))
188        LatestCFAlu = E;
189      if (!isCFAlu(MI))
190        continue;
191      cleanPotentialDisabledCFAlu(MI);
192
193      if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
194        MI.eraseFromParent();
195      } else {
196        assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
197        LatestCFAlu = MI;
198      }
199    }
200  }
201  return false;
202}
203
204const char *R600ClauseMergePass::getPassName() const {
205  return "R600 Merge Clause Markers Pass";
206}
207
208} // end anonymous namespace
209
210
211llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
212  return new R600ClauseMergePass(TM);
213}
214