R600ClauseMergePass.cpp revision dfef7cbfc6a96d129b99750f554c7dbc000d3228
1//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12/// This pass is merging consecutive CFAlus where applicable.
13/// It needs to be called after IfCvt for best results.
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "r600mergeclause"
17#include "AMDGPU.h"
18#include "R600Defines.h"
19#include "R600InstrInfo.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30namespace {
31
32static bool isCFAlu(const MachineInstr *MI) {
33  switch (MI->getOpcode()) {
34  case AMDGPU::CF_ALU:
35  case AMDGPU::CF_ALU_PUSH_BEFORE:
36    return true;
37  default:
38    return false;
39  }
40}
41
42class R600ClauseMergePass : public MachineFunctionPass {
43
44private:
45  static char ID;
46  const R600InstrInfo *TII;
47
48  unsigned getCFAluSize(const MachineInstr *MI) const;
49  bool isCFAluEnabled(const MachineInstr *MI) const;
50
51  /// IfCvt pass can generate "disabled" ALU clause marker that need to be
52  /// removed and their content affected to the previous alu clause.
53  /// This function parse instructions after CFAlu untill it find a disabled
54  /// CFAlu and merge the content, or an enabled CFAlu.
55  void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
56
57  /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
58  /// it is the case.
59  bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
60      const;
61
62public:
63  R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
64
65  virtual bool runOnMachineFunction(MachineFunction &MF);
66
67  const char *getPassName() const;
68};
69
70char R600ClauseMergePass::ID = 0;
71
72unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
73  assert(isCFAlu(MI));
74  return MI->getOperand(
75      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
76}
77
78bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
79  assert(isCFAlu(MI));
80  return MI->getOperand(
81      TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
82}
83
84void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
85    const {
86  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
87  MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
88  I++;
89  do {
90    while (I!= E && !isCFAlu(I))
91      I++;
92    if (I == E)
93      return;
94    MachineInstr *MI = I++;
95    if (isCFAluEnabled(MI))
96      break;
97    CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
98    MI->eraseFromParent();
99  } while (I != E);
100}
101
102bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
103                                          const MachineInstr *LatrCFAlu) const {
104  assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
105  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
106  unsigned RootInstCount = getCFAluSize(RootCFAlu),
107      LaterInstCount = getCFAluSize(LatrCFAlu);
108  unsigned CumuledInsts = RootInstCount + LaterInstCount;
109  if (CumuledInsts >= TII->getMaxAlusPerClause()) {
110    DEBUG(dbgs() << "Excess inst counts\n");
111    return false;
112  }
113  if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
114    return false;
115  // Is KCache Bank 0 compatible ?
116  int Mode0Idx =
117      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
118  int KBank0Idx =
119      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
120  int KBank0LineIdx =
121      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
122  if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
123      RootCFAlu->getOperand(Mode0Idx).getImm() &&
124      (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
125       RootCFAlu->getOperand(KBank0Idx).getImm() ||
126      LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
127      RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
128    DEBUG(dbgs() << "Wrong KC0\n");
129    return false;
130  }
131  // Is KCache Bank 1 compatible ?
132  int Mode1Idx =
133      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
134  int KBank1Idx =
135      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
136  int KBank1LineIdx =
137      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
138  if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
139      RootCFAlu->getOperand(Mode1Idx).getImm() &&
140      (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
141      RootCFAlu->getOperand(KBank1Idx).getImm() ||
142      LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
143      RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
144    DEBUG(dbgs() << "Wrong KC0\n");
145    return false;
146  }
147  if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
148    RootCFAlu->getOperand(Mode0Idx).setImm(
149        LatrCFAlu->getOperand(Mode0Idx).getImm());
150    RootCFAlu->getOperand(KBank0Idx).setImm(
151        LatrCFAlu->getOperand(KBank0Idx).getImm());
152    RootCFAlu->getOperand(KBank0LineIdx).setImm(
153        LatrCFAlu->getOperand(KBank0LineIdx).getImm());
154  }
155  if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
156    RootCFAlu->getOperand(Mode1Idx).setImm(
157        LatrCFAlu->getOperand(Mode1Idx).getImm());
158    RootCFAlu->getOperand(KBank1Idx).setImm(
159        LatrCFAlu->getOperand(KBank1Idx).getImm());
160    RootCFAlu->getOperand(KBank1LineIdx).setImm(
161        LatrCFAlu->getOperand(KBank1LineIdx).getImm());
162  }
163  RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
164  RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
165  return true;
166}
167
168bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
169  TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
170  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
171                                                  BB != BB_E; ++BB) {
172    MachineBasicBlock &MBB = *BB;
173    MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
174    MachineBasicBlock::iterator LatestCFAlu = E;
175    while (I != E) {
176      MachineInstr *MI = I++;
177      if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
178          TII->mustBeLastInClause(MI->getOpcode()))
179        LatestCFAlu = E;
180      if (!isCFAlu(MI))
181        continue;
182      cleanPotentialDisabledCFAlu(MI);
183
184      if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
185        MI->eraseFromParent();
186      } else {
187        assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
188        LatestCFAlu = MI;
189      }
190    }
191  }
192  return false;
193}
194
195const char *R600ClauseMergePass::getPassName() const {
196  return "R600 Merge Clause Markers Pass";
197}
198
199} // end anonymous namespace
200
201
202llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
203  return new R600ClauseMergePass(TM);
204}
205