108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//
308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//                     The LLVM Compiler Infrastructure
408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//
508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// This file is distributed under the University of Illinois Open Source
608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// License. See LICENSE.TXT for details.
708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//
808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//===----------------------------------------------------------------------===//
908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//
1008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune/// \file
1108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune/// This pass compute turns all control flow pseudo instructions into native one
1208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune/// computing their address on the fly ; it also sets STACK_SIZE info.
1308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//===----------------------------------------------------------------------===//
1408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
15375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune#include "llvm/Support/Debug.h"
1608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "AMDGPU.h"
17c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines#include "AMDGPUSubtarget.h"
1808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600Defines.h"
1908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600InstrInfo.h"
2008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600MachineFunctionInfo.h"
2108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600RegisterInfo.h"
2208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "llvm/CodeGen/MachineFunctionPass.h"
2308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "llvm/CodeGen/MachineInstrBuilder.h"
2408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "llvm/CodeGen/MachineRegisterInfo.h"
255c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/Support/raw_ostream.h"
2608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramerusing namespace llvm;
285c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer
29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "r600cf"
30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
315c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramernamespace {
3208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
3336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstruct CFStack {
3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
3536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  enum StackItem {
3636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ENTRY = 0,
3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SUB_ENTRY = 1,
3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    FIRST_NON_WQM_PUSH = 2,
3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  };
4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
42ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  const AMDGPUSubtarget *ST;
4336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  std::vector<StackItem> BranchStack;
4436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  std::vector<StackItem> LoopStack;
4536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned MaxStackSize;
4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned CurrentEntries;
4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned CurrentSubEntries;
4836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
49ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // We need to reserve a stack entry for CALL_FS in vertex shaders.
5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      CurrentEntries(0), CurrentSubEntries(0) { }
5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned getLoopDepth();
5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool branchStackContains(CFStack::StackItem);
5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool requiresWorkAroundForInst(unsigned Opcode);
5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned getSubEntrySize(CFStack::StackItem Item);
5836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void updateMaxStackSize();
5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void pushBranch(unsigned Opcode, bool isWQM = false);
6036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void pushLoop();
6136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void popBranch();
6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void popLoop();
6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines};
6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesunsigned CFStack::getLoopDepth() {
6636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return LoopStack.size();
6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool CFStack::branchStackContains(CFStack::StackItem Item) {
7036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines       E = BranchStack.end(); I != E; ++I) {
7236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (*I == Item)
7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return true;
7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
7536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return false;
7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
7736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      getLoopDepth() > 1)
8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (!ST->hasCFAluBug())
8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch(Opcode) {
8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default: return false;
8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case AMDGPU::CF_ALU_PUSH_BEFORE:
8936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case AMDGPU::CF_ALU_ELSE_AFTER:
9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case AMDGPU::CF_ALU_BREAK:
9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case AMDGPU::CF_ALU_CONTINUE:
9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (CurrentSubEntries == 0)
9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return false;
94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (ST->getWavefrontSize() == 64) {
9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // We are being conservative here.  We only require this work-around if
9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // CurrentSubEntries > 3 &&
9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      //
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // We have to be conservative, because we don't know for certain that
10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // resources without any problems.
10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return CurrentSubEntries > 3;
10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    } else {
105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      assert(ST->getWavefrontSize() == 32);
10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // We are being conservative here.  We only require the work-around if
10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // CurrentSubEntries > 7 &&
10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // See the comment on the wavefront size == 64 case for why we are
11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // being conservative.
11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return CurrentSubEntries > 7;
11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
11336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
11436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
11536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
11636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesunsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
11736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch(Item) {
11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
11936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 0;
12036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case CFStack::FIRST_NON_WQM_PUSH:
121ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  assert(!ST->hasCaymanISA());
122ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // +1 For the push operation.
12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // +2 Extra space required.
12536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 3;
12636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  } else {
12736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Some documentation says that this is not necessary on Evergreen,
12836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // but experimentation has show that we need to allocate 1 extra
12936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // sub-entry for the first non-WQM push.
13036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // +1 For the push operation.
13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // +1 Extra space required.
13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 2;
13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
13436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
135ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
13636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // +1 For the push operation.
13736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // +1 Extra space required.
13836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 2;
13936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case CFStack::SUB_ENTRY:
14036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 1;
14136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
14436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::updateMaxStackSize() {
14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned CurrentStackSize = CurrentEntries +
14636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
14736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
14936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
15036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::pushBranch(unsigned Opcode, bool isWQM) {
15136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  CFStack::StackItem Item = CFStack::ENTRY;
15236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch(Opcode) {
15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case AMDGPU::CF_PUSH_EG:
15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case AMDGPU::CF_ALU_PUSH_BEFORE:
15536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (!isWQM) {
156ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (!ST->hasCaymanISA() &&
157ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
15836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
15936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                             // See comment in
16036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                             // CFStack::getSubEntrySize()
16136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      else if (CurrentEntries > 0 &&
162ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines               ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
163ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines               !ST->hasCaymanISA() &&
16436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines               !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      else
16736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Item = CFStack::SUB_ENTRY;
16836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    } else
16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Item = CFStack::ENTRY;
17036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    break;
17136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
17236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  BranchStack.push_back(Item);
17336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Item == CFStack::ENTRY)
17436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    CurrentEntries++;
17536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  else
17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    CurrentSubEntries += getSubEntrySize(Item);
17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  updateMaxStackSize();
17836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
18036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::pushLoop() {
18136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  LoopStack.push_back(CFStack::ENTRY);
18236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  CurrentEntries++;
18336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  updateMaxStackSize();
18436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
18636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::popBranch() {
18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  CFStack::StackItem Top = BranchStack.back();
18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Top == CFStack::ENTRY)
18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    CurrentEntries--;
19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  else
19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    CurrentSubEntries-= getSubEntrySize(Top);
19236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  BranchStack.pop_back();
19336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
19436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
19536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::popLoop() {
19636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  CurrentEntries--;
19736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  LoopStack.pop_back();
19836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
19936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
20008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeuneclass R600ControlFlowFinalizer : public MachineFunctionPass {
20108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
20208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeuneprivate:
203b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune  typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
204b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune
205bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune  enum ControlFlowInstruction {
206bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_TC,
207631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune    CF_VC,
208bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_CALL_FS,
209bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_WHILE_LOOP,
210bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_END_LOOP,
211bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_LOOP_BREAK,
212bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_LOOP_CONTINUE,
213bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_JUMP,
214bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    CF_ELSE,
2157a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    CF_POP,
2167a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    CF_END
217bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune  };
218e7a040f9ab8f7e8defa2b9b95d1ea87911636131NAKAMURA Takumi
21908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  static char ID;
22008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  const R600InstrInfo *TII;
221b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling  const R600RegisterInfo *TRI;
22208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  unsigned MaxFetchInst;
223ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  const AMDGPUSubtarget *ST;
22408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
22508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  bool IsTrivialInst(MachineInstr *MI) const {
22608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    switch (MI->getOpcode()) {
22708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    case AMDGPU::KILL:
22808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    case AMDGPU::RETURN:
22908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      return true;
23008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    default:
23108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      return false;
23208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    }
23308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  }
23408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
235bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
2367a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    unsigned Opcode = 0;
237ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
2387a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    switch (CFI) {
2397a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_TC:
2407a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
2417a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
242631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune    case CF_VC:
243631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
244631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune      break;
2457a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_CALL_FS:
2467a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
2477a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2487a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_WHILE_LOOP:
2497a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
2507a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2517a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_END_LOOP:
2527a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
2537a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2547a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_LOOP_BREAK:
2557a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
2567a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2577a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_LOOP_CONTINUE:
2587a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
2597a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2607a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_JUMP:
2617a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
2627a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2637a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_ELSE:
2647a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
2657a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2667a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_POP:
2677a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
2687a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
2697a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    case CF_END:
270ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (ST->hasCaymanISA()) {
2717a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune        Opcode = AMDGPU::CF_END_CM;
2727a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune        break;
273bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune      }
2747a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
2757a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune      break;
276bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune    }
2777a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    assert (Opcode && "No opcode selected");
2787a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune    return TII->get(Opcode);
279bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune  }
280bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune
2817097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune  bool isCompatibleWithClause(const MachineInstr *MI,
282b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune      std::set<unsigned> &DstRegs) const {
2837097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune    unsigned DstMI, SrcMI;
2847097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune    for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
2857097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune        E = MI->operands_end(); I != E; ++I) {
2867097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      const MachineOperand &MO = *I;
2877097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      if (!MO.isReg())
2887097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune        continue;
289d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard      if (MO.isDef()) {
290d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard        unsigned Reg = MO.getReg();
291d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
292d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard          DstMI = Reg;
293d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard        else
294b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling          DstMI = TRI->getMatchingSuperReg(Reg,
295b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling              TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
296d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard              &AMDGPU::R600_Reg128RegClass);
297d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard      }
2987097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      if (MO.isUse()) {
2997097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune        unsigned Reg = MO.getReg();
3007097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
3017097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune          SrcMI = Reg;
3027097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune        else
303b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling          SrcMI = TRI->getMatchingSuperReg(Reg,
304b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling              TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
3057097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune              &AMDGPU::R600_Reg128RegClass);
3067097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      }
3077097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune    }
308b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune    if ((DstRegs.find(SrcMI) == DstRegs.end())) {
3097097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      DstRegs.insert(DstMI);
3107097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      return true;
3117097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune    } else
3127097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune      return false;
3137097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune  }
3147097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune
315b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune  ClauseFile
316b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
317b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune      const {
31808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    MachineBasicBlock::iterator ClauseHead = I;
319b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    std::vector<MachineInstr *> ClauseContent;
32008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    unsigned AluInstCount = 0;
321631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune    bool IsTex = TII->usesTextureCache(ClauseHead);
322b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune    std::set<unsigned> DstRegs;
32308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
32408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      if (IsTrivialInst(I))
32508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        continue;
326dcfcf1d1ffe72d9c25564a2b8b53763a28648e97Vincent Lejeune      if (AluInstCount >= MaxFetchInst)
327b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune        break;
328631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune      if ((IsTex && !TII->usesTextureCache(I)) ||
329631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune          (!IsTex && !TII->usesVertexCache(I)))
33008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        break;
331b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune      if (!isCompatibleWithClause(I, DstRegs))
3327097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune        break;
33308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      AluInstCount ++;
334b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune      ClauseContent.push_back(I);
33508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    }
336b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
337631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune        getHWInstrDesc(IsTex?CF_TC:CF_VC))
338b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune        .addImm(0) // ADDR
339b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune        .addImm(AluInstCount - 1); // COUNT
34037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return ClauseFile(MIb, std::move(ClauseContent));
34108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  }
342b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune
3435ed88013e86b14e4cd54132408354f6305d43be6Vincent Lejeune  void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
344787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper    static const unsigned LiteralRegs[] = {
3452c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      AMDGPU::ALU_LITERAL_X,
3462c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      AMDGPU::ALU_LITERAL_Y,
3472c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      AMDGPU::ALU_LITERAL_Z,
3482c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      AMDGPU::ALU_LITERAL_W
3492c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    };
35025c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune    const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
35125c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune        TII->getSrcs(MI);
35225c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune    for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
35325c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune      if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
3542c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        continue;
35525c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune      int64_t Imm = Srcs[i].second;
3565ed88013e86b14e4cd54132408354f6305d43be6Vincent Lejeune      std::vector<int64_t>::iterator It =
3572c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          std::find(Lits.begin(), Lits.end(), Imm);
3582c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      if (It != Lits.end()) {
3592c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        unsigned Index = It - Lits.begin();
36025c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune        Srcs[i].first->setReg(LiteralRegs[Index]);
3612c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      } else {
3622c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        assert(Lits.size() < 4 && "Too many literals in Instruction Group");
36325c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune        Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
3642c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        Lits.push_back(Imm);
3652c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      }
3662c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    }
3672c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  }
3682c836f84dba99e7b041909160c739db779760b79Vincent Lejeune
3692c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  MachineBasicBlock::iterator insertLiterals(
3702c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      MachineBasicBlock::iterator InsertPos,
3712c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      const std::vector<unsigned> &Literals) const {
3722c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    MachineBasicBlock *MBB = InsertPos->getParent();
3732c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
3742c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      unsigned LiteralPair0 = Literals[i];
3752c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
3762c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
3772c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          TII->get(AMDGPU::LITERALS))
3782c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          .addImm(LiteralPair0)
3792c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          .addImm(LiteralPair1);
3802c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    }
3812c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    return InsertPos;
3822c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  }
3832c836f84dba99e7b041909160c739db779760b79Vincent Lejeune
3842c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  ClauseFile
3852c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
3862c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      const {
3872c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    MachineBasicBlock::iterator ClauseHead = I;
3882c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    std::vector<MachineInstr *> ClauseContent;
3892c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    I++;
3902c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
3912c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      if (IsTrivialInst(I)) {
3922c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        ++I;
3932c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        continue;
3942c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      }
3952c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
3962c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        break;
3975ed88013e86b14e4cd54132408354f6305d43be6Vincent Lejeune      std::vector<int64_t> Literals;
3982c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      if (I->isBundle()) {
3992c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        MachineInstr *DeleteMI = I;
4002c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
4012c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        while (++BI != E && BI->isBundledWithPred()) {
4022c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          BI->unbundleFromPred();
4032c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
4042c836f84dba99e7b041909160c739db779760b79Vincent Lejeune            MachineOperand &MO = BI->getOperand(i);
4052c836f84dba99e7b041909160c739db779760b79Vincent Lejeune            if (MO.isReg() && MO.isInternalRead())
4062c836f84dba99e7b041909160c739db779760b79Vincent Lejeune              MO.setIsInternalRead(false);
4072c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          }
408cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar          getLiteral(&*BI, Literals);
409cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar          ClauseContent.push_back(&*BI);
4102c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        }
4112c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        I = BI;
4122c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        DeleteMI->eraseFromParent();
4132c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      } else {
4142c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        getLiteral(I, Literals);
4152c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        ClauseContent.push_back(I);
4162c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        I++;
4172c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      }
4182c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
4192c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        unsigned literal0 = Literals[i];
4202c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
4212c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
4222c836f84dba99e7b041909160c739db779760b79Vincent Lejeune            TII->get(AMDGPU::LITERALS))
4232c836f84dba99e7b041909160c739db779760b79Vincent Lejeune            .addImm(literal0)
4242c836f84dba99e7b041909160c739db779760b79Vincent Lejeune            .addImm(literal2);
4252c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        ClauseContent.push_back(MILit);
4262c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      }
4272c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    }
428f2cfef8172fd2eceb036b8caff50623a189ba2ffVincent Lejeune    assert(ClauseContent.size() < 128 && "ALU clause is too big");
4292c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
43037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return ClauseFile(ClauseHead, std::move(ClauseContent));
4312c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  }
4322c836f84dba99e7b041909160c739db779760b79Vincent Lejeune
433b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune  void
434b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune  EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
435b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune      unsigned &CfCount) {
436b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    CounterPropagateAddr(Clause.first, CfCount);
437b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    MachineBasicBlock *BB = Clause.first->getParent();
438b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
439b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune        .addImm(CfCount);
440b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
441b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune      BB->splice(InsertPos, BB, Clause.second[i]);
442b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    }
443b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune    CfCount += 2 * Clause.second.size();
444b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune  }
445b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune
4462c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  void
4472c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
4482c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      unsigned &CfCount) {
449f2cfef8172fd2eceb036b8caff50623a189ba2ffVincent Lejeune    Clause.first->getOperand(0).setImm(0);
4502c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    CounterPropagateAddr(Clause.first, CfCount);
4512c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    MachineBasicBlock *BB = Clause.first->getParent();
4522c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
4532c836f84dba99e7b041909160c739db779760b79Vincent Lejeune        .addImm(CfCount);
4542c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
4552c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      BB->splice(InsertPos, BB, Clause.second[i]);
4562c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    }
4572c836f84dba99e7b041909160c739db779760b79Vincent Lejeune    CfCount += Clause.second.size();
4582c836f84dba99e7b041909160c739db779760b79Vincent Lejeune  }
4592c836f84dba99e7b041909160c739db779760b79Vincent Lejeune
46008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
461375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune    MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
46208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  }
46337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
46437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                            unsigned Addr) const {
46537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (MachineInstr *MI : MIs) {
46608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      CounterPropagateAddr(MI, Addr);
46708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    }
46808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  }
46908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
47008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeunepublic:
471ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  R600ControlFlowFinalizer(TargetMachine &tm)
472ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
47308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
474dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool runOnMachineFunction(MachineFunction &MF) override {
475ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    ST = &MF.getSubtarget<AMDGPUSubtarget>();
476ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    MaxFetchInst = ST->getTexVTXClauseSize();
477ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
478ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
47936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
480b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling
48137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    CFStack CFStack(ST, MFI->getShaderType());
48208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
48308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        ++MB) {
48408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      MachineBasicBlock &MBB = *MB;
48508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      unsigned CfCount = 0;
48608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
487375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune      std::vector<MachineInstr * > IfThenElseStack;
48837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (MFI->getShaderType() == ShaderType::VERTEX) {
48908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
490bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune            getHWInstrDesc(CF_CALL_FS));
49108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        CfCount++;
49208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      }
4932c836f84dba99e7b041909160c739db779760b79Vincent Lejeune      std::vector<ClauseFile> FetchClauses, AluClauses;
494272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune      std::vector<MachineInstr *> LastAlu(1);
495272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune      std::vector<MachineInstr *> ToPopAfter;
496272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune
49708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
49808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          I != E;) {
499631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
500375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          DEBUG(dbgs() << CfCount << ":"; I->dump(););
501b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune          FetchClauses.push_back(MakeFetchClause(MBB, I));
50208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
503dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          LastAlu.back() = nullptr;
50408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          continue;
50508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
50608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
50708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        MachineBasicBlock::iterator MI = I;
508272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune        if (MI->getOpcode() != AMDGPU::ENDIF)
509dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          LastAlu.back() = nullptr;
510272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune        if (MI->getOpcode() == AMDGPU::CF_ALU)
511272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          LastAlu.back() = MI;
51208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        I++;
51336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        bool RequiresWorkAround =
51436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            CFStack.requiresWorkAroundForInst(MI->getOpcode());
51508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        switch (MI->getOpcode()) {
51608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::CF_ALU_PUSH_BEFORE:
51736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          if (RequiresWorkAround) {
51836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
51936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                .addImm(CfCount + 1)
52136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                .addImm(1);
52236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            MI->setDesc(TII->get(AMDGPU::CF_ALU));
52336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            CfCount++;
52436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
52536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          } else
52636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
52736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
52808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::CF_ALU:
5292c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          I = MI;
5302c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          AluClauses.push_back(MakeALUClause(MBB, I));
531375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
53208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
53308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
53408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::WHILELOOP: {
53536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          CFStack.pushLoop();
53608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
537bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune              getHWInstrDesc(CF_WHILE_LOOP))
538daefc0f9c80363f55c75806dd704d5815e69353bVincent Lejeune              .addImm(1);
53908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
54008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune              std::set<MachineInstr *>());
54108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          Pair.second.insert(MIb);
54237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          LoopStack.push_back(std::move(Pair));
54308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
54408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
54508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
54608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
54708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::ENDLOOP: {
54836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          CFStack.popLoop();
54908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          std::pair<unsigned, std::set<MachineInstr *> > Pair =
55037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              std::move(LoopStack.back());
55108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          LoopStack.pop_back();
55208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CounterPropagateAddr(Pair.second, CfCount);
553bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
55408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune              .addImm(Pair.first + 1);
55508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
55608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
55708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
55808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
55908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::IF_PREDICATE_SET: {
560dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          LastAlu.push_back(nullptr);
56108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
562bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune              getHWInstrDesc(CF_JUMP))
56308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune              .addImm(0)
56408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune              .addImm(0);
565375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          IfThenElseStack.push_back(MIb);
566375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
56708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
56808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
56908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
57008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
57108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::ELSE: {
572375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          MachineInstr * JumpInst = IfThenElseStack.back();
57308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          IfThenElseStack.pop_back();
574375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          CounterPropagateAddr(JumpInst, CfCount);
57508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
576bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune              getHWInstrDesc(CF_ELSE))
57708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune              .addImm(0)
578272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune              .addImm(0);
579375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
580375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          IfThenElseStack.push_back(MIb);
58108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
58208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
58308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
58408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
58508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::ENDIF: {
58636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          CFStack.popBranch();
587272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          if (LastAlu.back()) {
588272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            ToPopAfter.push_back(LastAlu.back());
589272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          } else {
590272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
591272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune                getHWInstrDesc(CF_POP))
592272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune                .addImm(CfCount + 1)
593272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune                .addImm(1);
594272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            (void)MIb;
595272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
596272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            CfCount++;
597272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          }
598272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune
599375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune          MachineInstr *IfOrElseInst = IfThenElseStack.back();
60008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          IfThenElseStack.pop_back();
601272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          CounterPropagateAddr(IfOrElseInst, CfCount);
602272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          IfOrElseInst->getOperand(1).setImm(1);
603272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune          LastAlu.pop_back();
60408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
60508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
60608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
607acf73503851815f8251b78e3b2e7cf91ef738c50Vincent Lejeune        case AMDGPU::BREAK: {
608acf73503851815f8251b78e3b2e7cf91ef738c50Vincent Lejeune          CfCount ++;
60908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
610bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune              getHWInstrDesc(CF_LOOP_BREAK))
61108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune              .addImm(0);
61208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          LoopStack.back().second.insert(MIb);
61308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
61408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
61508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
61608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        case AMDGPU::CONTINUE: {
61708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
618bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune              getHWInstrDesc(CF_LOOP_CONTINUE))
619375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune              .addImm(0);
62008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          LoopStack.back().second.insert(MIb);
62108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          MI->eraseFromParent();
62208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          CfCount++;
62308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
62408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
6257a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune        case AMDGPU::RETURN: {
6267a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
6277a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune          CfCount++;
6287a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune          MI->eraseFromParent();
6297a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune          if (CfCount % 2) {
6307a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune            BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
6317a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune            CfCount++;
6327a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune          }
633b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune          for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
634b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune            EmitFetchClause(I, FetchClauses[i], CfCount);
6352c836f84dba99e7b041909160c739db779760b79Vincent Lejeune          for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
6362c836f84dba99e7b041909160c739db779760b79Vincent Lejeune            EmitALUClause(I, AluClauses[i], CfCount);
6377a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune        }
63808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        default:
639e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard          if (TII->isExport(MI->getOpcode())) {
640e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard            DEBUG(dbgs() << CfCount << ":"; MI->dump(););
641e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard            CfCount++;
642e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard          }
64308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune          break;
64408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune        }
64508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune      }
646272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune      for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
647272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune        MachineInstr *Alu = ToPopAfter[i];
648272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune        BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
649272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            TII->get(AMDGPU::CF_ALU_POP_AFTER))
650272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(0).getImm())
651272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(1).getImm())
652272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(2).getImm())
653272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(3).getImm())
654272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(4).getImm())
655272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(5).getImm())
656272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(6).getImm())
657272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(7).getImm())
658272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune            .addImm(Alu->getOperand(8).getImm());
659272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune        Alu->eraseFromParent();
660272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune      }
66136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      MFI->StackSize = CFStack.MaxStackSize;
66208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    }
66308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
66408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    return false;
66508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  }
66608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
667dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const char *getPassName() const override {
66808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune    return "R600 Control Flow Finalizer Pass";
66908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  }
67008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune};
67108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
67208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeunechar R600ControlFlowFinalizer::ID = 0;
67308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
6745c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer} // end anonymous namespace
67508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
67608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune
67708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeunellvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
67808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune  return new R600ControlFlowFinalizer(TM);
67908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune}
680