R600ControlFlowFinalizer.cpp revision 39cd6fae34e3cc525bc98b6b0bd24fb8e6202cc5
1//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This pass compute turns all control flow pseudo instructions into native one
12/// computing their address on the fly ; it also sets STACK_SIZE info.
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "r600cf"
16#include "llvm/Support/Debug.h"
17#include "llvm/Support/raw_ostream.h"
18
19#include "AMDGPU.h"
20#include "R600Defines.h"
21#include "R600InstrInfo.h"
22#include "R600MachineFunctionInfo.h"
23#include "R600RegisterInfo.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27
28namespace llvm {
29
30class R600ControlFlowFinalizer : public MachineFunctionPass {
31
32private:
33  static char ID;
34  const R600InstrInfo *TII;
35  unsigned MaxFetchInst;
36
37  bool isFetch(const MachineInstr *MI) const {
38    switch (MI->getOpcode()) {
39    case AMDGPU::TEX_VTX_CONSTBUF:
40    case AMDGPU::TEX_VTX_TEXBUF:
41    case AMDGPU::TEX_LD:
42    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
43    case AMDGPU::TEX_GET_GRADIENTS_H:
44    case AMDGPU::TEX_GET_GRADIENTS_V:
45    case AMDGPU::TEX_SET_GRADIENTS_H:
46    case AMDGPU::TEX_SET_GRADIENTS_V:
47    case AMDGPU::TEX_SAMPLE:
48    case AMDGPU::TEX_SAMPLE_C:
49    case AMDGPU::TEX_SAMPLE_L:
50    case AMDGPU::TEX_SAMPLE_C_L:
51    case AMDGPU::TEX_SAMPLE_LB:
52    case AMDGPU::TEX_SAMPLE_C_LB:
53    case AMDGPU::TEX_SAMPLE_G:
54    case AMDGPU::TEX_SAMPLE_C_G:
55    case AMDGPU::TXD:
56    case AMDGPU::TXD_SHADOW:
57     return true;
58    default:
59      return false;
60    }
61  }
62
63  bool IsTrivialInst(MachineInstr *MI) const {
64    switch (MI->getOpcode()) {
65    case AMDGPU::KILL:
66    case AMDGPU::RETURN:
67      return true;
68    default:
69      return false;
70    }
71  }
72
73  MachineBasicBlock::iterator
74  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
75      unsigned CfAddress) const {
76    MachineBasicBlock::iterator ClauseHead = I;
77    unsigned AluInstCount = 0;
78    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
79      if (IsTrivialInst(I))
80        continue;
81      if (!isFetch(I))
82        break;
83      AluInstCount ++;
84      if (AluInstCount > MaxFetchInst)
85        break;
86    }
87    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
88        TII->get(AMDGPU::CF_TC))
89        .addImm(CfAddress) // ADDR
90        .addImm(AluInstCount); // COUNT
91    return I;
92  }
93  void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
94    MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
95  }
96  void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
97      const {
98    for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
99        It != E; ++It) {
100      MachineInstr *MI = *It;
101      CounterPropagateAddr(MI, Addr);
102    }
103  }
104
105public:
106  R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
107    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
108      const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
109      if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
110        MaxFetchInst = 8;
111      else
112        MaxFetchInst = 16;
113  }
114
115  virtual bool runOnMachineFunction(MachineFunction &MF) {
116    unsigned MaxStack = 0;
117    unsigned CurrentStack = 0;
118    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
119        ++MB) {
120      MachineBasicBlock &MBB = *MB;
121      unsigned CfCount = 0;
122      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
123      std::vector<MachineInstr * > IfThenElseStack;
124      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
125      if (MFI->ShaderType == 1) {
126        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
127            TII->get(AMDGPU::CF_CALL_FS));
128        CfCount++;
129      }
130      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
131          I != E;) {
132        if (isFetch(I)) {
133          DEBUG(dbgs() << CfCount << ":"; I->dump(););
134          I = MakeFetchClause(MBB, I, 0);
135          CfCount++;
136          continue;
137        }
138
139        MachineBasicBlock::iterator MI = I;
140        I++;
141        switch (MI->getOpcode()) {
142        case AMDGPU::CF_ALU_PUSH_BEFORE:
143          CurrentStack++;
144          MaxStack = std::max(MaxStack, CurrentStack);
145        case AMDGPU::CF_ALU:
146        case AMDGPU::EG_ExportBuf:
147        case AMDGPU::EG_ExportSwz:
148        case AMDGPU::R600_ExportBuf:
149        case AMDGPU::R600_ExportSwz:
150          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
151          CfCount++;
152          break;
153        case AMDGPU::WHILELOOP: {
154          CurrentStack++;
155          MaxStack = std::max(MaxStack, CurrentStack);
156          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
157              TII->get(AMDGPU::WHILE_LOOP))
158              .addImm(2);
159          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
160              std::set<MachineInstr *>());
161          Pair.second.insert(MIb);
162          LoopStack.push_back(Pair);
163          MI->eraseFromParent();
164          CfCount++;
165          break;
166        }
167        case AMDGPU::ENDLOOP: {
168          CurrentStack--;
169          std::pair<unsigned, std::set<MachineInstr *> > Pair =
170              LoopStack.back();
171          LoopStack.pop_back();
172          CounterPropagateAddr(Pair.second, CfCount);
173          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
174              .addImm(Pair.first + 1);
175          MI->eraseFromParent();
176          CfCount++;
177          break;
178        }
179        case AMDGPU::IF_PREDICATE_SET: {
180          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
181              TII->get(AMDGPU::CF_JUMP))
182              .addImm(0)
183              .addImm(0);
184          IfThenElseStack.push_back(MIb);
185          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
186          MI->eraseFromParent();
187          CfCount++;
188          break;
189        }
190        case AMDGPU::ELSE: {
191          MachineInstr * JumpInst = IfThenElseStack.back();
192          IfThenElseStack.pop_back();
193          CounterPropagateAddr(JumpInst, CfCount);
194          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
195              TII->get(AMDGPU::CF_ELSE))
196              .addImm(0)
197              .addImm(1);
198          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
199          IfThenElseStack.push_back(MIb);
200          MI->eraseFromParent();
201          CfCount++;
202          break;
203        }
204        case AMDGPU::ENDIF: {
205          CurrentStack--;
206          MachineInstr *IfOrElseInst = IfThenElseStack.back();
207          IfThenElseStack.pop_back();
208          CounterPropagateAddr(IfOrElseInst, CfCount);
209          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
210              TII->get(AMDGPU::POP))
211              .addImm(CfCount + 1)
212              .addImm(1);
213          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
214          MI->eraseFromParent();
215          CfCount++;
216          break;
217        }
218        case AMDGPU::PREDICATED_BREAK: {
219          CurrentStack--;
220          CfCount += 3;
221          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
222              .addImm(CfCount)
223              .addImm(1);
224          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
225              TII->get(AMDGPU::LOOP_BREAK))
226              .addImm(0);
227          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
228              .addImm(CfCount)
229              .addImm(1);
230          LoopStack.back().second.insert(MIb);
231          MI->eraseFromParent();
232          break;
233        }
234        case AMDGPU::CONTINUE: {
235          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
236              TII->get(AMDGPU::CF_CONTINUE))
237              .addImm(0);
238          LoopStack.back().second.insert(MIb);
239          MI->eraseFromParent();
240          CfCount++;
241          break;
242        }
243        default:
244          break;
245        }
246      }
247      BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
248          TII->get(AMDGPU::STACK_SIZE))
249          .addImm(MaxStack);
250    }
251
252    return false;
253  }
254
255  const char *getPassName() const {
256    return "R600 Control Flow Finalizer Pass";
257  }
258};
259
260char R600ControlFlowFinalizer::ID = 0;
261
262}
263
264
265llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
266  return new R600ControlFlowFinalizer(TM);
267}
268
269