108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===// 208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// 308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// The LLVM Compiler Infrastructure 408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// 508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// This file is distributed under the University of Illinois Open Source 608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// License. See LICENSE.TXT for details. 708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// 808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//===----------------------------------------------------------------------===// 908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune// 1008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune/// \file 1108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune/// This pass compute turns all control flow pseudo instructions into native one 1208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune/// computing their address on the fly ; it also sets STACK_SIZE info. 1308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune//===----------------------------------------------------------------------===// 1408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 15375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune#include "llvm/Support/Debug.h" 1608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "AMDGPU.h" 17c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines#include "AMDGPUSubtarget.h" 1808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600Defines.h" 1908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600InstrInfo.h" 2008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600MachineFunctionInfo.h" 2108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "R600RegisterInfo.h" 2208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "llvm/CodeGen/MachineFunctionPass.h" 2308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "llvm/CodeGen/MachineInstrBuilder.h" 2408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune#include "llvm/CodeGen/MachineRegisterInfo.h" 255c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/Support/raw_ostream.h" 2608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramerusing namespace llvm; 285c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer 29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "r600cf" 30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 315c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramernamespace { 3208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 3336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstruct CFStack { 3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines enum StackItem { 3636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ENTRY = 0, 3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SUB_ENTRY = 1, 3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines FIRST_NON_WQM_PUSH = 2, 3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines }; 4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 42ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines const AMDGPUSubtarget *ST; 4336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::vector<StackItem> BranchStack; 4436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::vector<StackItem> LoopStack; 4536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned MaxStackSize; 4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned CurrentEntries; 4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned CurrentSubEntries; 4836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 49ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st), 5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We need to reserve a stack entry for CALL_FS in vertex shaders. 5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0), 5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentEntries(0), CurrentSubEntries(0) { } 5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned getLoopDepth(); 5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool branchStackContains(CFStack::StackItem); 5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool requiresWorkAroundForInst(unsigned Opcode); 5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned getSubEntrySize(CFStack::StackItem Item); 5836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void updateMaxStackSize(); 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void pushBranch(unsigned Opcode, bool isWQM = false); 6036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void pushLoop(); 6136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void popBranch(); 6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void popLoop(); 6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}; 6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesunsigned CFStack::getLoopDepth() { 6636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return LoopStack.size(); 6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool CFStack::branchStackContains(CFStack::StackItem Item) { 7036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines E = BranchStack.end(); I != E; ++I) { 7236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (*I == Item) 7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 7536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 7736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines getLoopDepth() > 1) 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!ST->hasCFAluBug()) 8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch(Opcode) { 8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: return false; 8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::CF_ALU_PUSH_BEFORE: 8936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::CF_ALU_ELSE_AFTER: 9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::CF_ALU_BREAK: 9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::CF_ALU_CONTINUE: 9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (CurrentSubEntries == 0) 9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (ST->getWavefrontSize() == 64) { 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We are being conservative here. We only require this work-around if 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // CurrentSubEntries > 3 && 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We have to be conservative, because we don't know for certain that 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // our stack allocation algorithm for Evergreen/NI is correct. Applying this 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // resources without any problems. 10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return CurrentSubEntries > 3; 10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else { 105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(ST->getWavefrontSize() == 32); 10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We are being conservative here. We only require the work-around if 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // CurrentSubEntries > 7 && 10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // See the comment on the wavefront size == 64 case for why we are 11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // being conservative. 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return CurrentSubEntries > 7; 11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 11336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 11436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 11536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 11636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesunsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 11736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch(Item) { 11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 11936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 0; 12036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case CFStack::FIRST_NON_WQM_PUSH: 121ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(!ST->hasCaymanISA()); 122ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // +1 For the push operation. 12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // +2 Extra space required. 12536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 3; 12636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else { 12736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Some documentation says that this is not necessary on Evergreen, 12836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // but experimentation has show that we need to allocate 1 extra 12936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // sub-entry for the first non-WQM push. 13036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // +1 For the push operation. 13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // +1 Extra space required. 13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 2; 13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 13436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 135ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 13636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // +1 For the push operation. 13736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // +1 Extra space required. 13836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 2; 13936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case CFStack::SUB_ENTRY: 14036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 1; 14136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 14436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::updateMaxStackSize() { 14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned CurrentStackSize = CurrentEntries + 14636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (RoundUpToAlignment(CurrentSubEntries, 4) / 4); 14736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 14936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 15036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::pushBranch(unsigned Opcode, bool isWQM) { 15136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack::StackItem Item = CFStack::ENTRY; 15236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch(Opcode) { 15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::CF_PUSH_EG: 15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::CF_ALU_PUSH_BEFORE: 15536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!isWQM) { 156ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!ST->hasCaymanISA() && 157ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 15836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 15936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // See comment in 16036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // CFStack::getSubEntrySize() 16136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines else if (CurrentEntries > 0 && 162ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 163ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines !ST->hasCaymanISA() && 16436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines else 16736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Item = CFStack::SUB_ENTRY; 16836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else 16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Item = CFStack::ENTRY; 17036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 17136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 17236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BranchStack.push_back(Item); 17336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Item == CFStack::ENTRY) 17436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentEntries++; 17536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines else 17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentSubEntries += getSubEntrySize(Item); 17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines updateMaxStackSize(); 17836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 18036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::pushLoop() { 18136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines LoopStack.push_back(CFStack::ENTRY); 18236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentEntries++; 18336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines updateMaxStackSize(); 18436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 18636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::popBranch() { 18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack::StackItem Top = BranchStack.back(); 18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Top == CFStack::ENTRY) 18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentEntries--; 19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines else 19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentSubEntries-= getSubEntrySize(Top); 19236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BranchStack.pop_back(); 19336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 19436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 19536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid CFStack::popLoop() { 19636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurrentEntries--; 19736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines LoopStack.pop_back(); 19836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 19936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 20008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeuneclass R600ControlFlowFinalizer : public MachineFunctionPass { 20108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 20208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeuneprivate: 203b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile; 204b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune 205bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune enum ControlFlowInstruction { 206bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_TC, 207631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune CF_VC, 208bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_CALL_FS, 209bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_WHILE_LOOP, 210bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_END_LOOP, 211bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_LOOP_BREAK, 212bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_LOOP_CONTINUE, 213bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_JUMP, 214bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune CF_ELSE, 2157a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune CF_POP, 2167a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune CF_END 217bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune }; 218e7a040f9ab8f7e8defa2b9b95d1ea87911636131NAKAMURA Takumi 21908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune static char ID; 22008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune const R600InstrInfo *TII; 221b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling const R600RegisterInfo *TRI; 22208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune unsigned MaxFetchInst; 223ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines const AMDGPUSubtarget *ST; 22408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 22508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune bool IsTrivialInst(MachineInstr *MI) const { 22608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune switch (MI->getOpcode()) { 22708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::KILL: 22808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::RETURN: 22908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune return true; 23008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune default: 23108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune return false; 23208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 23308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 23408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 235bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 2367a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune unsigned Opcode = 0; 237ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 2387a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune switch (CFI) { 2397a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_TC: 2407a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; 2417a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 242631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune case CF_VC: 243631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600; 244631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune break; 2457a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_CALL_FS: 2467a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; 2477a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2487a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_WHILE_LOOP: 2497a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600; 2507a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2517a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_END_LOOP: 2527a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600; 2537a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2547a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_LOOP_BREAK: 2557a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600; 2567a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2577a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_LOOP_CONTINUE: 2587a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600; 2597a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2607a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_JUMP: 2617a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600; 2627a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2637a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_ELSE: 2647a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600; 2657a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2667a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_POP: 2677a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; 2687a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 2697a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case CF_END: 270ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (ST->hasCaymanISA()) { 2717a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = AMDGPU::CF_END_CM; 2727a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 273bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune } 2747a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600; 2757a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune break; 276bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune } 2777a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune assert (Opcode && "No opcode selected"); 2787a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune return TII->get(Opcode); 279bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune } 280bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune 2817097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune bool isCompatibleWithClause(const MachineInstr *MI, 282b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune std::set<unsigned> &DstRegs) const { 2837097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune unsigned DstMI, SrcMI; 2847097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 2857097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune E = MI->operands_end(); I != E; ++I) { 2867097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune const MachineOperand &MO = *I; 2877097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune if (!MO.isReg()) 2887097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune continue; 289d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard if (MO.isDef()) { 290d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard unsigned Reg = MO.getReg(); 291d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard if (AMDGPU::R600_Reg128RegClass.contains(Reg)) 292d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard DstMI = Reg; 293d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard else 294b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling DstMI = TRI->getMatchingSuperReg(Reg, 295b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), 296d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard &AMDGPU::R600_Reg128RegClass); 297d078070f6a76326853885bfa661ff4fa9755e2b8Tom Stellard } 2987097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune if (MO.isUse()) { 2997097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune unsigned Reg = MO.getReg(); 3007097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune if (AMDGPU::R600_Reg128RegClass.contains(Reg)) 3017097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune SrcMI = Reg; 3027097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune else 303b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling SrcMI = TRI->getMatchingSuperReg(Reg, 304b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), 3057097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune &AMDGPU::R600_Reg128RegClass); 3067097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune } 3077097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune } 308b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune if ((DstRegs.find(SrcMI) == DstRegs.end())) { 3097097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune DstRegs.insert(DstMI); 3107097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune return true; 3117097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune } else 3127097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune return false; 3137097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune } 3147097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune 315b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune ClauseFile 316b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 317b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune const { 31808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineBasicBlock::iterator ClauseHead = I; 319b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune std::vector<MachineInstr *> ClauseContent; 32008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune unsigned AluInstCount = 0; 321631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune bool IsTex = TII->usesTextureCache(ClauseHead); 322b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune std::set<unsigned> DstRegs; 32308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 32408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune if (IsTrivialInst(I)) 32508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune continue; 326dcfcf1d1ffe72d9c25564a2b8b53763a28648e97Vincent Lejeune if (AluInstCount >= MaxFetchInst) 327b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune break; 328631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune if ((IsTex && !TII->usesTextureCache(I)) || 329631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune (!IsTex && !TII->usesVertexCache(I))) 33008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 331b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecdVincent Lejeune if (!isCompatibleWithClause(I, DstRegs)) 3327097b1ddeab32b72edaf9e0177360b0576b7ecafVincent Lejeune break; 33308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune AluInstCount ++; 334b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune ClauseContent.push_back(I); 33508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 336b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 337631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune getHWInstrDesc(IsTex?CF_TC:CF_VC)) 338b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune .addImm(0) // ADDR 339b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune .addImm(AluInstCount - 1); // COUNT 34037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return ClauseFile(MIb, std::move(ClauseContent)); 34108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 342b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune 3435ed88013e86b14e4cd54132408354f6305d43be6Vincent Lejeune void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const { 344787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper static const unsigned LiteralRegs[] = { 3452c836f84dba99e7b041909160c739db779760b79Vincent Lejeune AMDGPU::ALU_LITERAL_X, 3462c836f84dba99e7b041909160c739db779760b79Vincent Lejeune AMDGPU::ALU_LITERAL_Y, 3472c836f84dba99e7b041909160c739db779760b79Vincent Lejeune AMDGPU::ALU_LITERAL_Z, 3482c836f84dba99e7b041909160c739db779760b79Vincent Lejeune AMDGPU::ALU_LITERAL_W 3492c836f84dba99e7b041909160c739db779760b79Vincent Lejeune }; 35025c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs = 35125c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune TII->getSrcs(MI); 35225c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune for (unsigned i = 0, e = Srcs.size(); i < e; ++i) { 35325c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X) 3542c836f84dba99e7b041909160c739db779760b79Vincent Lejeune continue; 35525c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune int64_t Imm = Srcs[i].second; 3565ed88013e86b14e4cd54132408354f6305d43be6Vincent Lejeune std::vector<int64_t>::iterator It = 3572c836f84dba99e7b041909160c739db779760b79Vincent Lejeune std::find(Lits.begin(), Lits.end(), Imm); 3582c836f84dba99e7b041909160c739db779760b79Vincent Lejeune if (It != Lits.end()) { 3592c836f84dba99e7b041909160c739db779760b79Vincent Lejeune unsigned Index = It - Lits.begin(); 36025c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune Srcs[i].first->setReg(LiteralRegs[Index]); 3612c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } else { 3622c836f84dba99e7b041909160c739db779760b79Vincent Lejeune assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 36325c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune Srcs[i].first->setReg(LiteralRegs[Lits.size()]); 3642c836f84dba99e7b041909160c739db779760b79Vincent Lejeune Lits.push_back(Imm); 3652c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 3662c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 3672c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 3682c836f84dba99e7b041909160c739db779760b79Vincent Lejeune 3692c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineBasicBlock::iterator insertLiterals( 3702c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineBasicBlock::iterator InsertPos, 3712c836f84dba99e7b041909160c739db779760b79Vincent Lejeune const std::vector<unsigned> &Literals) const { 3722c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineBasicBlock *MBB = InsertPos->getParent(); 3732c836f84dba99e7b041909160c739db779760b79Vincent Lejeune for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 3742c836f84dba99e7b041909160c739db779760b79Vincent Lejeune unsigned LiteralPair0 = Literals[i]; 3752c836f84dba99e7b041909160c739db779760b79Vincent Lejeune unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 3762c836f84dba99e7b041909160c739db779760b79Vincent Lejeune InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 3772c836f84dba99e7b041909160c739db779760b79Vincent Lejeune TII->get(AMDGPU::LITERALS)) 3782c836f84dba99e7b041909160c739db779760b79Vincent Lejeune .addImm(LiteralPair0) 3792c836f84dba99e7b041909160c739db779760b79Vincent Lejeune .addImm(LiteralPair1); 3802c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 3812c836f84dba99e7b041909160c739db779760b79Vincent Lejeune return InsertPos; 3822c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 3832c836f84dba99e7b041909160c739db779760b79Vincent Lejeune 3842c836f84dba99e7b041909160c739db779760b79Vincent Lejeune ClauseFile 3852c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 3862c836f84dba99e7b041909160c739db779760b79Vincent Lejeune const { 3872c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineBasicBlock::iterator ClauseHead = I; 3882c836f84dba99e7b041909160c739db779760b79Vincent Lejeune std::vector<MachineInstr *> ClauseContent; 3892c836f84dba99e7b041909160c739db779760b79Vincent Lejeune I++; 3902c836f84dba99e7b041909160c739db779760b79Vincent Lejeune for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 3912c836f84dba99e7b041909160c739db779760b79Vincent Lejeune if (IsTrivialInst(I)) { 3922c836f84dba99e7b041909160c739db779760b79Vincent Lejeune ++I; 3932c836f84dba99e7b041909160c739db779760b79Vincent Lejeune continue; 3942c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 3952c836f84dba99e7b041909160c739db779760b79Vincent Lejeune if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 3962c836f84dba99e7b041909160c739db779760b79Vincent Lejeune break; 3975ed88013e86b14e4cd54132408354f6305d43be6Vincent Lejeune std::vector<int64_t> Literals; 3982c836f84dba99e7b041909160c739db779760b79Vincent Lejeune if (I->isBundle()) { 3992c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineInstr *DeleteMI = I; 4002c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 4012c836f84dba99e7b041909160c739db779760b79Vincent Lejeune while (++BI != E && BI->isBundledWithPred()) { 4022c836f84dba99e7b041909160c739db779760b79Vincent Lejeune BI->unbundleFromPred(); 4032c836f84dba99e7b041909160c739db779760b79Vincent Lejeune for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { 4042c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineOperand &MO = BI->getOperand(i); 4052c836f84dba99e7b041909160c739db779760b79Vincent Lejeune if (MO.isReg() && MO.isInternalRead()) 4062c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MO.setIsInternalRead(false); 4072c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 408cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar getLiteral(&*BI, Literals); 409cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar ClauseContent.push_back(&*BI); 4102c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 4112c836f84dba99e7b041909160c739db779760b79Vincent Lejeune I = BI; 4122c836f84dba99e7b041909160c739db779760b79Vincent Lejeune DeleteMI->eraseFromParent(); 4132c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } else { 4142c836f84dba99e7b041909160c739db779760b79Vincent Lejeune getLiteral(I, Literals); 4152c836f84dba99e7b041909160c739db779760b79Vincent Lejeune ClauseContent.push_back(I); 4162c836f84dba99e7b041909160c739db779760b79Vincent Lejeune I++; 4172c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 4182c836f84dba99e7b041909160c739db779760b79Vincent Lejeune for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 4192c836f84dba99e7b041909160c739db779760b79Vincent Lejeune unsigned literal0 = Literals[i]; 4202c836f84dba99e7b041909160c739db779760b79Vincent Lejeune unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0; 4212c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(), 4222c836f84dba99e7b041909160c739db779760b79Vincent Lejeune TII->get(AMDGPU::LITERALS)) 4232c836f84dba99e7b041909160c739db779760b79Vincent Lejeune .addImm(literal0) 4242c836f84dba99e7b041909160c739db779760b79Vincent Lejeune .addImm(literal2); 4252c836f84dba99e7b041909160c739db779760b79Vincent Lejeune ClauseContent.push_back(MILit); 4262c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 4272c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 428f2cfef8172fd2eceb036b8caff50623a189ba2ffVincent Lejeune assert(ClauseContent.size() < 128 && "ALU clause is too big"); 4292c836f84dba99e7b041909160c739db779760b79Vincent Lejeune ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); 43037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return ClauseFile(ClauseHead, std::move(ClauseContent)); 4312c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 4322c836f84dba99e7b041909160c739db779760b79Vincent Lejeune 433b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune void 434b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, 435b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune unsigned &CfCount) { 436b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune CounterPropagateAddr(Clause.first, CfCount); 437b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune MachineBasicBlock *BB = Clause.first->getParent(); 438b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE)) 439b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune .addImm(CfCount); 440b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 441b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune BB->splice(InsertPos, BB, Clause.second[i]); 442b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune } 443b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune CfCount += 2 * Clause.second.size(); 444b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune } 445b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune 4462c836f84dba99e7b041909160c739db779760b79Vincent Lejeune void 4472c836f84dba99e7b041909160c739db779760b79Vincent Lejeune EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, 4482c836f84dba99e7b041909160c739db779760b79Vincent Lejeune unsigned &CfCount) { 449f2cfef8172fd2eceb036b8caff50623a189ba2ffVincent Lejeune Clause.first->getOperand(0).setImm(0); 4502c836f84dba99e7b041909160c739db779760b79Vincent Lejeune CounterPropagateAddr(Clause.first, CfCount); 4512c836f84dba99e7b041909160c739db779760b79Vincent Lejeune MachineBasicBlock *BB = Clause.first->getParent(); 4522c836f84dba99e7b041909160c739db779760b79Vincent Lejeune BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) 4532c836f84dba99e7b041909160c739db779760b79Vincent Lejeune .addImm(CfCount); 4542c836f84dba99e7b041909160c739db779760b79Vincent Lejeune for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 4552c836f84dba99e7b041909160c739db779760b79Vincent Lejeune BB->splice(InsertPos, BB, Clause.second[i]); 4562c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 4572c836f84dba99e7b041909160c739db779760b79Vincent Lejeune CfCount += Clause.second.size(); 4582c836f84dba99e7b041909160c739db779760b79Vincent Lejeune } 4592c836f84dba99e7b041909160c739db779760b79Vincent Lejeune 46008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { 461375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); 46208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 46337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 46437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Addr) const { 46537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (MachineInstr *MI : MIs) { 46608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CounterPropagateAddr(MI, Addr); 46708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 46808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 46908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 47008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeunepublic: 471ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines R600ControlFlowFinalizer(TargetMachine &tm) 472ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {} 47308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 474dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool runOnMachineFunction(MachineFunction &MF) override { 475ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ST = &MF.getSubtarget<AMDGPUSubtarget>(); 476ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MaxFetchInst = ST->getTexVTXClauseSize(); 477ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo()); 478ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo()); 47936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 480b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling 48137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CFStack CFStack(ST, MFI->getShaderType()); 48208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 48308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune ++MB) { 48408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineBasicBlock &MBB = *MB; 48508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune unsigned CfCount = 0; 48608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; 487375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune std::vector<MachineInstr * > IfThenElseStack; 48837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MFI->getShaderType() == ShaderType::VERTEX) { 48908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 490bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune getHWInstrDesc(CF_CALL_FS)); 49108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 49208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 4932c836f84dba99e7b041909160c739db779760b79Vincent Lejeune std::vector<ClauseFile> FetchClauses, AluClauses; 494272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune std::vector<MachineInstr *> LastAlu(1); 495272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune std::vector<MachineInstr *> ToPopAfter; 496272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune 49708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 49808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune I != E;) { 499631591e6f3e5119d8a8b1c853279bc4ac7ace4a0Vincent Lejeune if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { 500375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune DEBUG(dbgs() << CfCount << ":"; I->dump();); 501b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune FetchClauses.push_back(MakeFetchClause(MBB, I)); 50208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 503dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines LastAlu.back() = nullptr; 50408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune continue; 50508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 50608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 50708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineBasicBlock::iterator MI = I; 508272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune if (MI->getOpcode() != AMDGPU::ENDIF) 509dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines LastAlu.back() = nullptr; 510272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune if (MI->getOpcode() == AMDGPU::CF_ALU) 511272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune LastAlu.back() = MI; 51208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune I++; 51336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool RequiresWorkAround = 51436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack.requiresWorkAroundForInst(MI->getOpcode()); 51508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune switch (MI->getOpcode()) { 51608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::CF_ALU_PUSH_BEFORE: 51736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (RequiresWorkAround) { 51836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 51936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) 52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(CfCount + 1) 52136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(1); 52236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MI->setDesc(TII->get(AMDGPU::CF_ALU)); 52336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CfCount++; 52436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack.pushBranch(AMDGPU::CF_PUSH_EG); 52536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else 52636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); 52736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 52808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::CF_ALU: 5292c836f84dba99e7b041909160c739db779760b79Vincent Lejeune I = MI; 5302c836f84dba99e7b041909160c739db779760b79Vincent Lejeune AluClauses.push_back(MakeALUClause(MBB, I)); 531375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune DEBUG(dbgs() << CfCount << ":"; MI->dump();); 53208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 53308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 53408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::WHILELOOP: { 53536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack.pushLoop(); 53608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 537bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune getHWInstrDesc(CF_WHILE_LOOP)) 538daefc0f9c80363f55c75806dd704d5815e69353bVincent Lejeune .addImm(1); 53908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, 54008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune std::set<MachineInstr *>()); 54108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune Pair.second.insert(MIb); 54237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LoopStack.push_back(std::move(Pair)); 54308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 54408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 54508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 54608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 54708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::ENDLOOP: { 54836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack.popLoop(); 54908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune std::pair<unsigned, std::set<MachineInstr *> > Pair = 55037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::move(LoopStack.back()); 55108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune LoopStack.pop_back(); 55208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CounterPropagateAddr(Pair.second, CfCount); 553bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 55408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune .addImm(Pair.first + 1); 55508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 55608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 55708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 55808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 55908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::IF_PREDICATE_SET: { 560dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines LastAlu.push_back(nullptr); 56108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 562bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune getHWInstrDesc(CF_JUMP)) 56308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune .addImm(0) 56408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune .addImm(0); 565375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune IfThenElseStack.push_back(MIb); 566375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 56708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 56808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 56908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 57008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 57108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::ELSE: { 572375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune MachineInstr * JumpInst = IfThenElseStack.back(); 57308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune IfThenElseStack.pop_back(); 574375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune CounterPropagateAddr(JumpInst, CfCount); 57508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 576bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune getHWInstrDesc(CF_ELSE)) 57708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune .addImm(0) 578272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(0); 579375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 580375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune IfThenElseStack.push_back(MIb); 58108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 58208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 58308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 58408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 58508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::ENDIF: { 58636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CFStack.popBranch(); 587272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune if (LastAlu.back()) { 588272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune ToPopAfter.push_back(LastAlu.back()); 589272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune } else { 590272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 591272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune getHWInstrDesc(CF_POP)) 592272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(CfCount + 1) 593272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(1); 594272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune (void)MIb; 595272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 596272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune CfCount++; 597272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune } 598272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune 599375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune MachineInstr *IfOrElseInst = IfThenElseStack.back(); 60008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune IfThenElseStack.pop_back(); 601272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune CounterPropagateAddr(IfOrElseInst, CfCount); 602272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune IfOrElseInst->getOperand(1).setImm(1); 603272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune LastAlu.pop_back(); 60408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 60508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 60608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 607acf73503851815f8251b78e3b2e7cf91ef738c50Vincent Lejeune case AMDGPU::BREAK: { 608acf73503851815f8251b78e3b2e7cf91ef738c50Vincent Lejeune CfCount ++; 60908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 610bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune getHWInstrDesc(CF_LOOP_BREAK)) 61108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune .addImm(0); 61208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune LoopStack.back().second.insert(MIb); 61308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 61408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 61508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 61608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune case AMDGPU::CONTINUE: { 61708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 618bd7c634ab90ed63ee409fe781360cd42b05780f3Vincent Lejeune getHWInstrDesc(CF_LOOP_CONTINUE)) 619375d767b5408cb2ba33185921f382c5f1115bce3Vincent Lejeune .addImm(0); 62008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune LoopStack.back().second.insert(MIb); 62108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune MI->eraseFromParent(); 62208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune CfCount++; 62308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 62408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 6257a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune case AMDGPU::RETURN: { 6267a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END)); 6277a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune CfCount++; 6287a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune MI->eraseFromParent(); 6297a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune if (CfCount % 2) { 6307a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD)); 6317a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune CfCount++; 6327a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune } 633b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 634b6379de427c009284d47c5fc764f11bbd2bf2484Vincent Lejeune EmitFetchClause(I, FetchClauses[i], CfCount); 6352c836f84dba99e7b041909160c739db779760b79Vincent Lejeune for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 6362c836f84dba99e7b041909160c739db779760b79Vincent Lejeune EmitALUClause(I, AluClauses[i], CfCount); 6377a28d8afa77ac3afce265f2b61fb321e4e0d84d7Vincent Lejeune } 63808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune default: 639e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard if (TII->isExport(MI->getOpcode())) { 640e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard DEBUG(dbgs() << CfCount << ":"; MI->dump();); 641e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard CfCount++; 642e7ac2ed1c268891a856ab38db1e34372a79da86aTom Stellard } 64308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune break; 64408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 64508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 646272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 647272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune MachineInstr *Alu = ToPopAfter[i]; 648272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 649272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune TII->get(AMDGPU::CF_ALU_POP_AFTER)) 650272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(0).getImm()) 651272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(1).getImm()) 652272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(2).getImm()) 653272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(3).getImm()) 654272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(4).getImm()) 655272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(5).getImm()) 656272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(6).getImm()) 657272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(7).getImm()) 658272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune .addImm(Alu->getOperand(8).getImm()); 659272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune Alu->eraseFromParent(); 660272458bd06d0c6d09e9bf776fb60735b0cdc8cf1Vincent Lejeune } 66136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MFI->StackSize = CFStack.MaxStackSize; 66208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 66308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 66408001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune return false; 66508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 66608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 667dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const char *getPassName() const override { 66808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune return "R600 Control Flow Finalizer Pass"; 66908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune } 67008001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune}; 67108001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 67208001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeunechar R600ControlFlowFinalizer::ID = 0; 67308001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 6745c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer} // end anonymous namespace 67508001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 67608001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune 67708001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeunellvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { 67808001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune return new R600ControlFlowFinalizer(TM); 67908001a5a1565adb8ce18b97537dd75075992d09aVincent Lejeune} 680