nv50_ir_lowering_nvc0.cpp revision 07d3972b4927841bb892af16ff0389f8a241b24c
1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/* 2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller 3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a 5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"), 6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation 7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the 9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions: 10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in 12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software. 13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 183d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER DEALINGS IN THE SOFTWARE. 21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */ 2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir.h" 245eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_build_util.h" 2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 265eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_target_nvc0.h" 273723ff52237194995d4f9f9fb5d66fb80110889eBen Skeggs#include "codegen/nv50_ir_lowering_nvc0.h" 2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller#include <limits> 3000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir { 3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_ADD 0 3457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUBR 1 3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUB 2 3657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_MOV2 3 3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 38717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller// UL UR LL LR 3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QUADOP(q, r, s, t) \ 40717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller ((QOP_##q << 6) | (QOP_##r << 4) | \ 41717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller (QOP_##s << 2) | (QOP_##t << 0)) 4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 4357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 4457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleDIV(Instruction *i) 4557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *call; 4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int builtin; 4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[2]; 4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 5157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); 5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); 5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 5457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; 5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; 5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); 6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); 6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); 6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); 6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->fixed = 1; 6557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->absolute = call->builtin = 1; 6657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->target.builtin = builtin; 6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 7157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleRCPRSQ(Instruction *i) 7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 7357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO 7457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 7557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 7757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(Function *fn) 7857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 7957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(fn->getProgram()); 8057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 8157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 8257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 8457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(BasicBlock *bb) 8557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 8657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 8757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 8857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 8957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_F32) 9057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 9157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 9257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 9357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 9457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleDIV(i); 9557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 9657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: 9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_F64) 9957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleRCPRSQ(i); 10057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 10157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 10257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 10357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 10457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 10557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 10657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 10757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 108e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph BumillerNVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) 10974be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee : rZero(NULL), 11074be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee carry(NULL), 11174be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee needTexBar(prog->getTarget()->getChipset() >= 0xe0) 112e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller{ 113e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller} 114e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller 11557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 11600fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insnDominatedBy(const Instruction *later, 11700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const Instruction *early) const 11800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 11900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (early->bb == later->bb) 12000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return early->serial < later->serial; 12100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return later->bb->dominatedBy(early->bb); 12200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 12300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 12400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillervoid 12540c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, 12640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller Instruction *usei, const Instruction *insn) 12740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 12840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller bool add = true; 12940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller for (std::list<TexUse>::iterator it = uses.begin(); 13040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller it != uses.end();) { 13140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (insnDominatedBy(usei, it->insn)) { 13240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller add = false; 13340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller break; 13440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 13540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (insnDominatedBy(it->insn, usei)) 13640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller it = uses.erase(it); 13740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller else 13840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller ++it; 13940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 14040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (add) 14140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller uses.push_back(TexUse(usei, insn)); 14240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 14340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 14440c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 14540c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, 14640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller Instruction *insn, 14740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller const BasicBlock *term, 14840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller std::list<TexUse> &uses) 14940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 15040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) 15140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller insn = insn->getSrc(0)->getUniqueInsn(); 15240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 15340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (!insn || !insn->bb->reachableBy(texi->bb, term)) 15440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller return; 15540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 15640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller switch (insn->op) { 15740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller /* Values not connected to the tex's definition through any of these should 15840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller * not be conflicting. 15940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller */ 16040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_SPLIT: 16140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_MERGE: 16240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_PHI: 16340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_UNION: 16440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller /* recurse again */ 16540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller for (int s = 0; insn->srcExists(s); ++s) 16640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, 16740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller uses); 16840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller break; 16940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller default: 17040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? 17140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller addTexUse(uses, insn, texi); 17240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller break; 17340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 17440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 17540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 17640c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 17740c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::findFirstUses(const Instruction *texi, 17840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller const Instruction *insn, 17900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::list<TexUse> &uses) 18000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 18100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (int d = 0; insn->defExists(d); ++d) { 18200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Value *v = insn->getDef(d); 18300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { 18400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *usei = (*u)->getInsn(); 18540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 18640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (usei->op == OP_PHI || usei->op == OP_UNION) { 18740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller // need a barrier before WAW cases 18840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller for (int s = 0; usei->srcExists(s); ++s) { 18940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller Instruction *defi = usei->getSrc(s)->getUniqueInsn(); 19040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (defi && &usei->src(s) != *u) 19140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findOverwritingDefs(texi, defi, usei->bb, uses); 19240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 19340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 19440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 19500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (usei->op == OP_SPLIT || 19640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller usei->op == OP_MERGE || 19700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller usei->op == OP_PHI || 19800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller usei->op == OP_UNION) { 19900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // these uses don't manifest in the machine code 20040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findFirstUses(texi, usei, uses); 20100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 20200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && 20300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { 20440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findFirstUses(texi, usei, uses); 20500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 20640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller addTexUse(uses, usei, insn); 20700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 20800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 20900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 21000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 21100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 21200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// Texture barriers: 21300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// This pass is a bit long and ugly and can probably be optimized. 21400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 21500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 1. obtain a list of TEXes and their outputs' first use(s) 21600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 2. calculate the barrier level of each first use (minimal number of TEXes, 21700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// over all paths, between the TEX and the use in question) 21800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 3. for each barrier, if all paths from the source TEX to that barrier 21900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// contain a barrier of lesser level, it can be culled 22000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 22100fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insertTextureBarriers(Function *fn) 22200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 22300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::list<TexUse> *uses; 22400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Instruction *> texes; 22500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstTex; 22600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstUse; 22700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> texCounts; 22800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<TexUse> useVec; 22900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ArrayList insns; 23000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 23100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller fn->orderInstructions(insns); 23200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 23300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts.resize(fn->allBBlocks.getSize(), 0); 23400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); 23500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); 23600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 23700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // tag BB CFG nodes by their id for later 23800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { 23900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); 24000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (bb) 24100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bb->cfg.tag = bb->getId(); 24200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 24300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 24400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // gather the first uses for each TEX 24500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (int i = 0; i < insns.getSize(); ++i) { 24600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); 24700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(tex->op)) { 24800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes.push_back(tex); 24900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!texCounts.at(tex->bb->getId())) 25000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex[tex->bb->getId()] = texes.size() - 1; 25100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts[tex->bb->getId()]++; 25200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 25300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 25400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insns.clear(); 25500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (texes.empty()) 25600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 25700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller uses = new std::list<TexUse>[texes.size()]; 25800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!uses) 25900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 26000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < texes.size(); ++i) 26140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findFirstUses(texes[i], texes[i], uses[i]); 26200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 26300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // determine the barrier level at each use 26400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < texes.size(); ++i) { 26500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); 26600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++u) { 26700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *tb = texes[i]->bb; 26800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *ub = u->insn->bb; 26900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (tb == ub) { 27000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 27100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = i + 1; j < texes.size() && 27200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == tb && texes[j]->serial < u->insn->serial; 27300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 27400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 27500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 27600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = fn->cfg.findLightestPathWeight(&tb->cfg, 27700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller &ub->cfg, texCounts); 27800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (u->level < 0) { 27900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller WARN("Failed to find path TEX -> TEXBAR\n"); 28000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 28100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 28200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 28300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // this counted all TEXes in the origin block, correct that 28400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; 28500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // and did not count the TEXes in the destination block, add those 28600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && 28700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == ub && texes[j]->serial < u->insn->serial; 28800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 28900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 29000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 29100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller assert(u->level >= 0); 29200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec.push_back(*u); 29300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 29400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 29500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete[] uses; 29600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller uses = NULL; 29700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 29800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // insert the barriers 29900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < useVec.size(); ++i) { 30000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = useVec[i].insn->prev; 30100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (useVec[i].level < 0) 30200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 30300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR) { 30400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev->subOp > useVec[i].level) 30500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->subOp = useVec[i].level; 30600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); 30700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 30800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); 30900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->fixed = 1; 31000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->subOp = useVec[i].level; 31100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // make use explicit to ease latency calculation 31200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); 31300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); 31400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 31500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 31600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 31700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (fn->getProgram()->optLevel < 3) { 31800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (uses) 31900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete[] uses; 32000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 32100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 32200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Limits> limitT, limitB, limitS; // entry, exit, single 32400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 32600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 32700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS.resize(fn->allBBlocks.getSize()); 32800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // cull unneeded barriers (should do that earlier, but for simplicity) 33040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller IteratorRef bi = fn->cfg.iteratorCFG(); 33100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // first calculate min/max outstanding TEXes for each BB 33200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 33300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 33400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 33500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int min = 0; 33600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = std::numeric_limits<int>::max(); 33700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = i->next) { 33800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 33900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min++; 34000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (max < std::numeric_limits<int>::max()) 34100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 34200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 34300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 34400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min = MIN2(min, i->subOp); 34500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = MIN2(max, i->subOp); 34600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 34700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 34800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // limits when looking at an isolated block 34900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].min = min; 35000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].max = max; 35100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // propagate the min/max values 35300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { 35400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 35500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 35600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 35700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int bbId = bb->getId(); 35800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { 35900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 36000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int inId = in->getId(); 36100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); 36200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); 36300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 36400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // I just hope this is correct ... 36500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (limitS[bbId].max == std::numeric_limits<int>::max()) { 36600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // no barrier 36700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; 36800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; 36900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 37000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // block contained a barrier 37100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = MIN2(limitS[bbId].max, 37200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min + limitS[bbId].min); 37300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = MIN2(limitS[bbId].max, 37400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max + limitS[bbId].min); 37500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // finally delete unnecessary barriers 37900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 38000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 38100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 38200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = NULL; 38300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *next; 38400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = limitT[bb->getId()].max; 38500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = next) { 38600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller next = i->next; 38700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 38800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->subOp >= max) { 38900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, i); 3907086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco i = NULL; 39100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 39200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = i->subOp; 39300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { 39400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, prev); 39500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = NULL; 39600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 39700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 39800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 39900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 40000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 40100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 4027086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco if (i && !i->isNop()) 40300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = i; 40400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (uses) 40700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete[] uses; 40800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 40900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 41000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 41100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 41257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(Function *fn) 41357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 41400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (needTexBar) 41500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insertTextureBarriers(fn); 41600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 4173433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller rZero = new_LValue(fn, FILE_GPR); 41899e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry = new_LValue(fn, FILE_FLAGS); 4194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 4203433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); 42199e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry->reg.data.id = 0; 4224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 42357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 42457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 42557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 42657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 42757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::replaceZero(Instruction *i) 42857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 42957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; i->srcExists(s); ++s) { 4304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (s == 2 && i->op == OP_SUCLAMP) 4314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller continue; 43257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ImmediateValue *imm = i->getSrc(s)->asImm(); 43357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm && imm->reg.data.u64 == 0) 4343433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller i->setSrc(s, rZero); 43557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 43657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 43757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 43857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace CONT with BRA for single unconditional continue 43957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 44057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) 44157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 44257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) 44357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 44457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.incident(); 44557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 44657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 44757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 44857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 44957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *contBB = BasicBlock::get(ei.getNode()); 45057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 45157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || 45257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->getPredicate()) 45357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 45457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->op = OP_BRA; 45557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); // delete PRECONT 45657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 45757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 45857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(ei.end() || ei.getType() != Graph::Edge::BACK); 45957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 46057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 46157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 46257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace branches to join blocks with join ops 46357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 46457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::propagateJoin(BasicBlock *bb) 46557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 46657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) 46757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 46857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 46957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 47057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *exit = in->getExit(); 47157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!exit) { 47257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); 47357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // there should always be a terminator instruction 47457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller WARN("inserted missing terminator in BB:%i\n", in->getId()); 47557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 47657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (exit->op == OP_BRA) { 47757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->op = OP_JOIN; 47857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->asFlow()->limit = 1; // must-not-propagate marker 47957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 48057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 48157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); 48257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 48357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 48557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(BasicBlock *bb) 48657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 48757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next; 48857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // remove pseudo operations and non-fixed no-ops, split 64 bit operations 49057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getFirst(); i; i = next) { 49157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 49257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_EMIT || i->op == OP_RESTART) { 49357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!i->getDef(0)->refCount()) 49457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, NULL); 4959362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).getFile() == FILE_IMMEDIATE) 4963433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller i->setSrc(0, rZero); // initial value must be 0 49707d3972b4927841bb892af16ff0389f8a241b24cBen Skeggs replaceZero(i); 49857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 49957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isNop()) { 50057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(i); 50157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 50299e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // TODO: Move this to before register allocation for operations that 50399e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // need the $c register ! 50499e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (typeSizeof(i->dType) == 8) { 50599e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller Instruction *hi; 50699e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); 50799e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (hi) 50899e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller next = hi; 50999e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller } 51099e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller 51157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_MOV && i->op != OP_PFETCH) 51257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller replaceZero(i); 51357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 51457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!bb->getEntry()) 51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!tryReplaceContWithBra(bb)) 51957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller propagateJoin(bb); 52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 52157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 52257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 52357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 52457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) 52557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 52657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(prog); 5274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller gMemBase = NULL; 52857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 52957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 53057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 53157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Function *fn) 53257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 53357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 53457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!strncmp(fn->getName(), "MAIN", 4)); 53557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: when we generate actual functions pass this value along somehow 53657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); 53757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); 5382ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller if (fn->cfgExit) { 5392ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); 5402ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.mkMovToReg(0, gpEmitAddress); 5412ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller } 54257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 54357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 54457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 54557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 54657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 54757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(BasicBlock *bb) 54857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 54957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 55057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 55157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5527a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumillerinline Value * 5537a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph BumillerNVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) 5547a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller{ 5557a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller uint8_t b = prog->driver->io.resInfoCBSlot; 5567a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller uint32_t off = prog->driver->io.texBindBase + slot * 4; 5577a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller return bld. 5587a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 5597a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller} 5607a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 56157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// move array source to first slot, convert to u16, add indirections 56257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 56357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTEX(TexInstruction *i) 56457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 56537a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 56637a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int arg = i->tex.target.getArgCount(); 5674da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); 56819ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 56957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 57019ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 5717a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 5727a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller WARN("indirect TEX not implemented\n"); 5737a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller } 574e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller if (i->tex.r == i->tex.s) { 5754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller i->tex.r += prog->driver->io.texBindBase / 4; 576e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller i->tex.s = 0; // only a single cX[] value possible here 577e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else { 5787a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *hnd = bld.getScratch(); 5797a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *rHnd = loadTexHandle(NULL, i->tex.r); 5807a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *sHnd = loadTexHandle(NULL, i->tex.s); 5817a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 5827a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd); 5837a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 5847a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.r = 0; // not used for indirect tex 5857a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.s = 0; 5867a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->setIndirectR(hnd); 587e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 588e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller if (i->tex.target.isArray()) { 589e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller LValue *layer = new_LValue(func, FILE_GPR); 5904da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *src = i->getSrc(lyr); 591e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller const int sat = (i->op == OP_TXF) ? 1 : 0; 592e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 593e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; 594e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller for (int s = dim; s >= 1; --s) 595e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller i->setSrc(s, i->getSrc(s - 1)); 596e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller i->setSrc(0, layer); 597e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 598e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else 599e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller // (nvc0) generate and move the tsc/tic/array source to the front 60019ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 60157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 60257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6034da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; 60457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = dim; s >= 1; --s) 60557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, i->getSrc(s - 1)); 60657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, arrayIndex); 60757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 60857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *ticRel = i->getIndirectR(); 60957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *tscRel = i->getIndirectS(); 61057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 611e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller if (arrayIndex) { 612e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller int sat = (i->op == OP_TXF) ? 1 : 0; 613e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 614e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; 615e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } else { 61657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.loadImm(src, 0); 617e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } 61857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 61957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ticRel) { 62057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(i->tex.rIndirectSrc, NULL); 62157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); 62257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 62357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tscRel) { 62457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(i->tex.sIndirectSrc, NULL); 62557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); 62657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 62757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 62857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, src); 62957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 63057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 63119ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // For nvc0, the sample id has to be in the second operand, as the offset 63219ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // does. Right now we don't know how to pass both in, and this case can't 63319ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // happen with OpenGL. On nve0, the sample id is part of the texture 63419ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // coordinate argument. 63519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin assert(chipset >= NVISA_GK104_CHIPSET || 63619ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin !i->tex.useOffsets || !i->tex.target.isMS()); 63719ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin 638f782d6e792db2ed7773a2d22866dbcdb1e4062eeIlia Mirkin // offset is between lod and dc 63957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->tex.useOffsets) { 64057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int n, c; 64171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 642f782d6e792db2ed7773a2d22866dbcdb1e4062eeIlia Mirkin if (i->tex.target.isShadow()) 643f782d6e792db2ed7773a2d22866dbcdb1e4062eeIlia Mirkin s--; 64471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (i->srcExists(s)) // move potential predicate out of the way 64571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->moveSources(s, 1); 646f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if (i->tex.useOffsets == 4 && i->srcExists(s + 1)) 647f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->moveSources(s + 1, 1); 648f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin if (i->op == OP_TXG) { 649f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // Either there is 1 offset, which goes into the 2 low bytes of the 650f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // first source, or there are 4 offsets, which go into 2 sources (8 651f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // values, 1 byte each). 652f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin Value *offs[2] = {NULL, NULL}; 653f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (n = 0; n < i->tex.useOffsets; n++) { 654f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 2; ++c) { 655f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if ((n % 2) == 0 && c == 0) 656f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2] = i->offset[n][c].get(); 657f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin else 658f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, 659f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2], 660f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->offset[n][c].get(), 661f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)), 662f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2]); 663f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 664f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 665f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s, offs[0]); 666f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if (offs[1]) 667f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s + 1, offs[1]); 668f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } else { 669f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin unsigned imm = 0; 670f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin assert(i->tex.useOffsets == 1); 671f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 3; ++c) { 672f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin ImmediateValue val; 673f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin assert(i->offset[0][c].getImmediate(val)); 674f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin imm |= (val.reg.data.u32 & 0xf) << (c * 4); 675f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 676f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s, bld.loadImm(NULL, imm)); 677f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } 67857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 67957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 68019ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 68171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 68271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // If TEX requires more than 4 sources, the 2nd register tuple must be 68371c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // aligned to 4, even if it consists of just a single 4-byte register. 68471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 68571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. 68671c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 68771c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 68871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (s > 4 && s < 7) { 68971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (i->srcExists(s)) // move potential predicate out of the way 69071c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->moveSources(s, 7 - s); 69171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller while (s < 7) 69271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->setSrc(s++, bld.loadImm(NULL, 0)); 69371c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 69471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 69571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller 69657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 69757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 69857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 69957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 70057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleManualTXD(TexInstruction *i) 70157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 70257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller static const uint8_t qOps[4][2] = 70357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 70457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 70557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 70657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 70757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 70857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller }; 70957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[4][4]; 71057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *crd[3]; 71157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *tex; 71257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *zero = bld.loadImm(bld.getSSA(), 0); 71357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int l, c; 71457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const int dim = i->tex.target.getDim(); 71557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 71657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_TEX; // no need to clone dPdx/dPdy later 71757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 71857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 71957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller crd[c] = bld.getScratch(); 72057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 72157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADON, TYPE_NONE, NULL); 72257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) { 72357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // mov coordinates from lane l to all lanes 72457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 72557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); 72657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdx from lane l to lanes dx 72757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 72857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); 72957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdy from lane l to lanes dy 73057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 73157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); 73257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // texture 733a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez bld.insert(tex = cloneForward(func, i)); 73457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 73557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tex->setSrc(c, crd[c]); 73657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // save results 73757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 73857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mov; 73957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[c][l] = bld.getSSA(); 74057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov = bld.mkMov(def[c][l], tex->getDef(c)); 74157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->fixed = 1; 74257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->lanes = 1 << l; 74357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 74457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 74557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); 74657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 74757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 74857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); 74957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) 75057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller u->setSrc(l, def[c][l]); 75157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 75257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 75357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->bb->remove(i); 75457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 75557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 75657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 75757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 75857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTXD(TexInstruction *txd) 75957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 76057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int dim = txd->tex.target.getDim(); 76138a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller int arg = txd->tex.target.getArgCount(); 76257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 76357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleTEX(txd); 76438a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller while (txd->srcExists(arg)) 76557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++arg; 76657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7679c930639d9f6d713ccfd16b390a41a9f584f348cChristoph Bumiller txd->tex.derivAll = true; 76838a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller if (dim > 2 || 76938a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->tex.target.isCube() || 77038a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller arg > 4 || 77138a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->tex.target.isShadow()) 77257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleManualTXD(txd); 77357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 77457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int c = 0; c < dim; ++c) { 77538a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); 77638a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); 7779362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdx[c].set(NULL); 7789362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdy[c].set(NULL); 77957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 78057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 78157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 78257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 78357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 78430cb66cd745fc793a2349f1d17046c50cd51c558Christoph BumillerNVC0LoweringPass::handleTXQ(TexInstruction *txq) 78530cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller{ 78630cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller // TODO: indirect resource/sampler index 78730cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return true; 78830cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller} 78930cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller 79030cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumillerbool 791423f64e83ab5b1ea7de475ae80300a8408522743Ilia MirkinNVC0LoweringPass::handleTXLQ(TexInstruction *i) 792423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin{ 793423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The outputs are inverted compared to what the TGSI instruction 794423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * expects. Take that into account in the mask. 795423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 796423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin assert((i->tex.mask & ~3) == 0); 797423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 1) 798423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 2; 799423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin else if (i->tex.mask == 2) 800423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 1; 801423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin handleTEX(i); 802423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.setPosition(i, true); 803423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 804423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The returned values are not quite what we want: 805423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (a) convert from s16/u16 to f32 806423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (b) multiply by 1/256 807423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 808423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin for (int def = 0; def < 2; ++def) { 809423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (!i->defExists(def)) 810423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin continue; 811423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin enum DataType type = TYPE_S16; 812423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 2 || def > 0) 813423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin type = TYPE_U16; 814423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), type, i->getDef(def)); 815423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), 816423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); 817423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 818423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 3) { 819423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin LValue *t = new_LValue(func, FILE_GPR); 820423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(t, i->getDef(0)); 821423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(0), i->getDef(1)); 822423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(1), t); 823423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 824423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return true; 825423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin} 826423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 827423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 828423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkinbool 829c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph BumillerNVC0LoweringPass::handleATOM(Instruction *atom) 830c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller{ 831c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller SVSemantic sv; 832c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 833c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller switch (atom->src(0).getFile()) { 834c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_LOCAL: 835c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller sv = SV_LBASE; 836c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 837c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_SHARED: 838c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller sv = SV_SBASE; 839c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 840c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller default: 841c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); 842c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 843c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller } 844c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller Value *base = 845c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); 846c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller Value *ptr = atom->getIndirect(0, 0); 847c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 848c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); 849c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 850c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller if (ptr) 851c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); 852c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setIndirect(0, 0, base); 853c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 854c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 855c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller} 856c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 85775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumillerbool 85875f1f852b00ad0d766684d01695322b93a2acd55Christoph BumillerNVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) 85975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller{ 86075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && 86175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) 86275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return false; 86375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, true); 86475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 86575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (needCctl) { 86675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); 86775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); 86875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->fixed = 1; 86975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->subOp = NV50_IR_SUBOP_CCTL_IV; 87075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->isPredicated()) 87175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setPredicate(cas->cc, cas->getPredicate()); 87275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 87375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 87475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { 87575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // CAS is crazy. It's 2nd source is a double reg, and the 3rd source 87675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // should be set to the high part of the double reg or bad things will 87775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // happen elsewhere in the universe. 87875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // Also, it sometimes returns the new value instead of the old one 87975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // under mysterious circumstances. 88075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Value *dreg = bld.getSSA(8); 88175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, false); 88275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); 88375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->setSrc(1, dreg); 88475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 88575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 88675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return true; 88775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller} 88875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 8894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 8904506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) 8914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 8924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint8_t b = prog->driver->io.resInfoCBSlot; 8934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off += prog->driver->io.suInfoBase; 8944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 8954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 8964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 8974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 8984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 8994506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) 9004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 9014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint8_t b = prog->driver->io.msInfoCBSlot; 9024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off += prog->driver->io.msInfoBase; 9034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 9044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 9054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 9064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller/* On nvc0, surface info is obtained via the surface binding points passed 9084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * to the SULD/SUST instructions. 9094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * On nve4, surface info is stored in c[] and is used by various special 9104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * instructions, e.g. for clamping coordiantes or generating an address. 9114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * They couldn't just have added an equivalent to TIC now, couldn't they ? 9124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller */ 9134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_ADDR 0x00 9144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_FMT 0x04 9154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM_X 0x08 9164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_PITCH 0x0c 9174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM_Y 0x10 9184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_ARRAY 0x14 9194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM_Z 0x18 9204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_UNK1C 0x1c 9214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_WIDTH 0x20 9224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_HEIGHT 0x24 9234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DEPTH 0x28 9244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_TARGET 0x2c 9254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_CALL 0x30 9264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_RAW_X 0x34 9274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_MS_X 0x38 9284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_MS_Y 0x3c 9294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO__STRIDE 0x40 9314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM(i) (0x08 + (i) * 8) 9334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_SIZE(i) (0x20 + (i) * 4) 9344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_MS(i) (0x38 + (i) * 4) 9354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerstatic inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) 9374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 9384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (su->tex.target.getEnum()) { 9394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1); 9404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D_ARRAY: return (c == 1) ? 9434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_PL(0, 2) : 9444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 9464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 9474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 9524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 9534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(0); 9544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return 0; 9554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 9564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 9574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 9594506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) 9604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 9614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE; 9624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int arg = tex->tex.target.getArgCount(); 9634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS) 9654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D; 9664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 9674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY) 9684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D_ARRAY; 9694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 9704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return; 9714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *x = tex->getSrc(0); 9734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = tex->getSrc(1); 9744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *s = tex->getSrc(arg - 1); 9754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); 9774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0)); 9794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1)); 9804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); 9824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); 9834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7)); 9854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3)); 9864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dx = loadMsInfo32(ts, 0x0); 9884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dy = loadMsInfo32(ts, 0x4); 9894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); 9914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); 9924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(0, tx); 9944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(1, ty); 9954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->moveSources(arg, -1); 9964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 9974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 9984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// Sets 64-bit "generic address", predicate and format sources for SULD/SUST. 9994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// They're computed from the coordinates using the surface info in c[] space. 10004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 10014506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) 10024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 10034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Instruction *insn; 10044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP; 10054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool raw = 10064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; 10074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int idx = su->tex.r; 10084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int dim = su->tex.target.getDim(); 10094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int arg = dim + (su->tex.target.isArray() ? 1 : 0); 10104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const uint16_t base = idx * NVE4_SU_INFO__STRIDE; 10114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller int c; 10124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *zero = bld.mkImm(0); 10134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *p1 = NULL; 10144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *v; 10154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *src[3]; 10164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *bf, *eau, *off; 10174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *addr, *pred; 10184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off = bld.getScratch(4); 10204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = bld.getScratch(4); 10214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller addr = bld.getSSA(8); 10224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 10234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.setPosition(su, false); 10254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller adjustCoordinatesMS(su); 10274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate clamped coordinates 10294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (c = 0; c < arg; ++c) { 10304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = bld.getScratch(); 10314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (c == 0 && raw) 10324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X); 10334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 10344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c)); 10354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) 10364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = getSuClampSubOp(su, c); 10374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 10384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (; c < 3; ++c) 10394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = zero; 10404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set predicate output 10424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 10434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[0]->getInsn()->setFlagsDef(1, pred); 10444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 10454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target.isArray()) { 10464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller p1 = bld.getSSA(1, FILE_PREDICATE); 10474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[dim]->getInsn()->setFlagsDef(1, p1); 10484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 10494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate pixel offset 10514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) { 10524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target != TEX_TARGET_BUFFER) 10534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); 10544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 10554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 3) { 10564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); 10574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) 10584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 10594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); 10614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) 10624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l 10634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 10644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 2); 10654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); 10664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) 10674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = su->tex.target.isArray() ? 10684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 10694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 10704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate effective address part 1 10724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 10734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (raw) { 10744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = src[0]; 10754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 10764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); 10774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) 10784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); 10794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 10804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 10814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = src[1]; 10824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *z = src[2]; 10834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint16_t subOp = 0; 10844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (dim) { 10864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 1: 10874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller y = zero; 10884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = zero; 10894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 10904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 2: 10914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = off; 10924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (!su->tex.target.isArray()) { 10934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); 10944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 10954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 10964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 10974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 10984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 10994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 3); 11004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 11014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z); 11034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->subOp = subOp; 11044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->setFlagsDef(1, pred); 11054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // part 2 11084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR); 11094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 11114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = v; 11124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 11134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); 11144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // add array layer offset 11164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target.isArray()) { 11174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY); 11184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) 11194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) 11204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 11214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 11224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau) 11234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32 11244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // combine predicates 11254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(p1); 11264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1); 11274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom) { 11304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *lo = bf; 11314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 11324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller lo = zero; 11334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(off, bf); 11344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // bf == g[] address & 0xff 11364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // eau == g[] address >> 8 11374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau); 11384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau); 11394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 11404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) { 11414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Convert from u32 to u8 address format, which is what the library code 11424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // doing SULDP currently uses. 11434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: can SUEAU do this ? 11444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: does it matter that we don't mask high bytes in bf ? 11454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Grrr. 11464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8)); 11474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off); 11484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau); 11514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom && su->tex.target == TEX_TARGET_BUFFER) 11534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off); 11544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // let's just set it 0 for raw access and hope it works 11564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = raw ? 11574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); 11584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // get rid of old coordinate sources, make space for fmt info and predicate 11604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->moveSources(arg, 3 - arg); 11614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set 64 bit address and 32-bit format sources 11624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(0, addr); 11634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(1, v); 11644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(2, pred); 11654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 11664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 11684506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) 11694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 11704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller processSurfaceCoordsNVE4(su); 11714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Who do we hate more ? The person who decided that nvc0's SULD doesn't 11734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // have to support conversion or the person who decided that, in OpenCL, 11744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // you don't have to specify the format here like you do in OpenGL ? 11754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SULDP) { 11774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // We don't patch shaders. Ever. 11784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // You get an indirect call to our library blob here. 11794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // But at least it's uniform. 11804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller FlowInstruction *call; 11814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller LValue *p[3]; 11824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller LValue *r[5]; 11834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL; 11844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 4; ++i) 11864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller (r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i; 11874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 3; ++i) 11884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller (p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i; 11894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller (r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4; 11904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8); 11924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8); 11934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(p[0], su->getSrc(2), TYPE_U8); 11944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(r[4], su->getSrc(0), TYPE_U64); 11954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(r[2], su->getSrc(1), TYPE_U32); 11964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate()); 11984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->indirect = 1; 12004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->absolute = 1; 12014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST, 12024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller prog->driver->io.resInfoCBSlot, TYPE_U32, 12034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller prog->driver->io.suInfoBase + base)); 12044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(1, r[2]); 12054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(2, r[4]); 12064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 3; ++i) 12074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(3 + i, p[i]); 12084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 4; ++i) { 12094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setDef(i, r[i]); 12104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(su->getDef(i), r[i]); 12114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setDef(4, p[1]); 12134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller delete_Instruction(bld.getProgram(), su); 12144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SUREDB || su->op == OP_SUREDP) { 121775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // FIXME: for out of bounds access, destination value will be undefined ! 12184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *pred = su->getSrc(2); 12194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller CondCode cc = CC_NOT_P; 12204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->getPredicate()) { 12214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 12224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller cc = su->cc; 12234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (cc == CC_NOT_P) { 12244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); 12254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 12264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); 12274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT); 12284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Instruction *red = bld.mkOp(OP_ATOM, su->dType, su->getDef(0)); 12314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->subOp = su->subOp; 12324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (!gMemBase) 12334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0); 12344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(0, gMemBase); 12354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(1, su->getSrc(3)); 12364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->subOp == NV50_IR_SUBOP_ATOM_CAS) 12374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(2, su->getSrc(4)); 12384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setIndirect(0, 0, su->getSrc(0)); 12394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setPredicate(cc, pred); 12404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller delete_Instruction(bld.getProgram(), su); 124175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(red, true); 12424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 12434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; 12444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 12464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1247c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumillerbool 124857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleWRSV(Instruction *i) 124957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 125057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *st; 125157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym; 125257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr; 125357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 125457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // must replace, $sreg are not writeable 125557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); 125657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (addr >= 0x400) 125757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 125857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); 125957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 126057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), 126157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(1)); 126257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->perPatch = i->perPatch; 126357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 126457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 126557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 126657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 126757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 126857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 126957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::readTessCoord(LValue *dst, int c) 127057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 127157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *laneid = bld.getSSA(); 127257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *x, *y; 127357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 127457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); 127557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 127657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 0) { 127757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = dst; 127857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = NULL; 127957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 128057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 1) { 128157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = NULL; 128257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = dst; 128357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 128457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(c == 2); 128557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = bld.getSSA(); 128657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = bld.getSSA(); 128757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 128857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (x) 128957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); 129057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (y) 12913fc2818f2b9e8a19e5349442e50dcee4858452c6Christoph Bumiller bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); 129257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 129357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 2) { 129457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); 129557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); 129657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 129757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 129857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 129957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 130057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleRDSV(Instruction *i) 130157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 130257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym = i->getSrc(0)->asSym(); 1303ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller const SVSemantic sv = sym->reg.data.sv.sv; 130457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *vtx = NULL; 130557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld; 130657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); 130757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1308ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (addr >= 0x400) { 1309ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // mov $sreg 1310ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 1311ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID 1312ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 1313ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); 1314ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 131557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 1316ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 131757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1318ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller switch (sv) { 131957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_POSITION: 132057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_FRAGMENT); 132152c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); 132252c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller break; 132352c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller case SV_FACE: 132452c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller { 132552c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller Value *face = i->getDef(0); 132652c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); 132752c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller if (i->dType == TYPE_F32) { 132852c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkOp2(OP_AND, TYPE_U32, face, face, bld.mkImm(0x80000000)); 132952c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000)); 133052c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 133152c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 133257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 133357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_TESS_COORD: 133457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); 133557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); 133657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 1337ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NTID: 1338ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NCTAID: 1339ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_GRIDID: 1340ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise 1341ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 1342ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 1343ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); 1344ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller return true; 1345ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 1346ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller addr += prog->driver->prop.cp.gridInfoBase; 1347ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller bld.mkLoad(TYPE_U32, i->getDef(0), 1348ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); 1349ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller break; 1350af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_INDEX: 1351af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // TODO: Properly pass source as an address in the PIX address space 1352af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // (which can be of the form [r0+offset]). But this is currently 1353af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // unnecessary. 1354af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 1355af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 1356af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 1357af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_POS: { 1358af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin Value *off = new_LValue(func, FILE_GPR); 1359af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 1360af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 1361af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); 1362af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkLoad(TYPE_F32, 1363af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin i->getDef(0), 1364af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkSymbol( 1365af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, 1366af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin TYPE_U32, prog->driver->io.sampleInfoBase + 1367af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin 4 * sym->reg.data.sv.index), 1368af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin off); 1369af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 1370af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin } 1371b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin case SV_SAMPLE_MASK: 1372b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 1373b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK; 1374b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin break; 137557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 137657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) 137757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 137857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld = bld.mkFetch(i->getDef(0), i->dType, 137957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); 138057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld->perPatch = i->perPatch; 138157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 138257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 138357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 138457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 138557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 138657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 138757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 138857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleDIV(Instruction *i) 138957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 139057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isFloatType(i->dType)) 139157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 1392b5f2c0505fd4f66422e034b041cdf0bc3dc46e99Christoph Bumiller bld.setPosition(i, false); 139357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1)); 139457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MUL; 139557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, rcp->getDef(0)); 139657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 139757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 139857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 140057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleMOD(Instruction *i) 140157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 140257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType != TYPE_F32) 140357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 140457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *value = bld.getScratch(); 140557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1)); 140657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value); 140757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_TRUNC, TYPE_F32, value, value); 140857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value); 140957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_SUB; 141057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, value); 141157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 141257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 141357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 141457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 141557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleSQRT(Instruction *i) 141657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 141757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, 141857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getSSA(), i->getSrc(0)); 141957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MUL; 142057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, rsq->getDef(0)); 142157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 142257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 142357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 142457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 142557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 142657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handlePOW(Instruction *i) 142757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 142857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *val = bld.getScratch(); 142957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); 143157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; 143257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); 143357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_EX2; 143557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, val); 143657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 143757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 143957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 144057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 144157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 144257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleEXPORT(Instruction *i) 144357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 144457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_FRAGMENT) { 144557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int id = i->getSrc(0)->reg.data.offset / 4; 144657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 14479362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).isIndirect(0)) // TODO, ugly 144857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 144957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 145000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller i->subOp = NV50_IR_SUBOP_MOV_FINAL; 14519362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).set(i->src(1)); 145257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 145357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, new_LValue(func, FILE_GPR)); 145457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getDef(0)->reg.data.id = id; 145557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 145657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prog->maxGPR = MAX2(prog->maxGPR, id); 145757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 145857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 145957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setIndirect(0, 1, gpEmitAddress); 146057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 146157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 146257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 146357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 146457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 146557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleOUT(Instruction *i) 146657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 146757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) { 146857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; 146957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 147057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 147157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(gpEmitAddress); 147257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, gpEmitAddress); 147357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->srcExists(0)) 147457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, i->getSrc(0)); 147557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, gpEmitAddress); 147657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 147757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 147857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 147957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 148057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Generate a binary predicate if an instruction is predicated by 148157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// e.g. an f32 value. 148257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 148357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::checkPredicate(Instruction *insn) 148457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 148557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = insn->getPredicate(); 148657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pdst; 148757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 148857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!pred || pred->reg.file == FILE_PREDICATE) 148957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 149057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller pdst = new_LValue(func, FILE_PREDICATE); 149157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 149257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // CAUTION: don't use pdst->getInsn, the definition might not be unique, 149357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass 149457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1495bbe3d6dc29f218e4d790e5ea359d3c6736e94226Dave Airlie bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred); 149657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 149757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->setPredicate(insn->cc, pdst); 149857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 149957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 150057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 150157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - add quadop dance for texturing 150257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - put FP outputs in GPRs 150357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - convert instruction sequences 150457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 150557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 150657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Instruction *i) 150757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 1508405bd00f3c98cb78d1dda1f3bf5d74155b18cd57Christoph Bumiller bld.setPosition(i, false); 150957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 151057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->cc != CC_ALWAYS) 151157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkPredicate(i); 151257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 151357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 151457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TEX: 151557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXB: 151657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXL: 151757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXF: 151857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXG: 151957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTEX(i->asTex()); 152057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXD: 152157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTXD(i->asTex()); 1522423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin case OP_TXLQ: 1523423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return handleTXLQ(i->asTex()); 152430cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller case OP_TXQ: 152530cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return handleTXQ(i->asTex()); 152657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: 152757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); 152857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getDef(0)); 152957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 153057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_POW: 153157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handlePOW(i); 153257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 153357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleDIV(i); 153457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 153557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleMOD(i); 153657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: 153757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleSQRT(i); 153857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EXPORT: 153957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleEXPORT(i); 154057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EMIT: 154157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RESTART: 154257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleOUT(i); 154357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RDSV: 154457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleRDSV(i); 154557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_WRSV: 154657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleWRSV(i); 154757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LOAD: 15489362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).getFile() == FILE_SHADER_INPUT) { 1549d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller if (prog->getType() == Program::TYPE_COMPUTE) { 1550d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller i->getSrc(0)->reg.file = FILE_MEMORY_CONST; 1551d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller i->getSrc(0)->reg.fileIndex = 0; 1552b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain } else 1553b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain if (prog->getType() == Program::TYPE_GEOMETRY && 1554b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain i->src(0).isIndirect(0)) { 1555b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain // XXX: this assumes vec4 units 1556b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 1557b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain i->getIndirect(0, 0), bld.mkImm(4)); 1558b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain i->setIndirect(0, 0, ptr); 1559d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller } else { 1560d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller i->op = OP_VFETCH; 1561d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP 1562d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller } 156357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 156457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 1565c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case OP_ATOM: 156675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller { 156775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; 1568c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller handleATOM(i); 156975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(i, cctl); 157075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 1571c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 15724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDB: 15734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDP: 15744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTB: 15754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTP: 15764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDB: 15774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDP: 15784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (targ->getChipset() >= NVISA_GK104_CHIPSET) 15794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller handleSurfaceOpNVE4(i->asTex()); 15804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 158157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 158257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 15836bca283ad5ebdd85e268c6757842b3c808c6b73dJohannes Obermayr } 158457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 158557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 158657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 158757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 158857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerTargetNVC0::runLegalizePass(Program *prog, CGStage stage) const 158957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 159057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_PRE_SSA) { 159157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LoweringPass pass(prog); 159257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 159357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 159457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_POST_RA) { 1595e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller NVC0LegalizePostRA pass(prog); 159657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 159757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 159857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_SSA) { 159957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LegalizeSSA pass; 160057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 160157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 160257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 160357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 160457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} // namespace nv50_ir 1606