nv50_ir_lowering_nvc0.cpp revision 217301843aea0299ab245e260b20af7ad250e9d8
1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/* 2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller 3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a 5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"), 6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation 7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the 9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions: 10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in 12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software. 13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 183d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER DEALINGS IN THE SOFTWARE. 21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */ 2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir.h" 245eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_build_util.h" 2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 265eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_target_nvc0.h" 273723ff52237194995d4f9f9fb5d66fb80110889eBen Skeggs#include "codegen/nv50_ir_lowering_nvc0.h" 2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller#include <limits> 3000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir { 3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_ADD 0 3457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUBR 1 3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUB 2 3657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_MOV2 3 3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 38717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller// UL UR LL LR 3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QUADOP(q, r, s, t) \ 40717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller ((QOP_##q << 6) | (QOP_##r << 4) | \ 41717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller (QOP_##s << 2) | (QOP_##t << 0)) 4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 4357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 4457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleDIV(Instruction *i) 4557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *call; 4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int builtin; 4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[2]; 4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 5157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); 5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); 5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 5457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; 5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; 5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); 6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); 6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); 6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); 6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->fixed = 1; 6557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->absolute = call->builtin = 1; 6657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->target.builtin = builtin; 6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 7157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleRCPRSQ(Instruction *i) 7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 73b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin assert(i->dType == TYPE_F64); 74b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // There are instructions that will compute the high 32 bits of the 64-bit 75b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // float. We will just stick 0 in the bottom 32 bits. 76b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 77b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.setPosition(i, false); 78b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 79b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 1. Take the source and it up. 80b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin Value *src[2], *dst[2], *def = i->getDef(0); 81b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkSplit(src, 4, i->getSrc(0)); 82b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 83b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. 84b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin dst[0] = bld.loadImm(NULL, 0); 85b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin dst[1] = bld.getSSA(); 86b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 87b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 3. The new version of the instruction takes the high 32 bits of the 88b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // source and outputs the high 32 bits of the destination. 89b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setSrc(0, src[1]); 90b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setDef(0, dst[1]); 91b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setType(TYPE_F32); 92b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->subOp = NV50_IR_SUBOP_RCPRSQ_64H; 93b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 94b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 4. Recombine the two dst pieces back into the original destination. 95b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.setPosition(i, true); 96b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); 9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 996fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkinvoid 1006fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia MirkinNVC0LegalizeSSA::handleFTZ(Instruction *i) 1016fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin{ 1026fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // Only want to flush float inputs 103d1eea18a595a468dbc2267a8d14197a3b1a5a4b6Ilia Mirkin assert(i->sType == TYPE_F32); 1046fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1056fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // If we're already flushing denorms (and NaN's) to zero, no need for this. 1066fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (i->dnz) 1076fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin return; 1086fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1096fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // Only certain classes of operations can flush 1106fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin OpClass cls = prog->getTarget()->getOpClass(i->op); 1116fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && 1126fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin cls != OPCLASS_CONVERT) 1136fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin return; 1146fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1156fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin i->ftz = true; 1166fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin} 1176fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 11857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 11957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(Function *fn) 12057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 12157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(fn->getProgram()); 12257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 12357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 12457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 12657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(BasicBlock *bb) 12757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 12857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 12957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 13057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 131d1eea18a595a468dbc2267a8d14197a3b1a5a4b6Ilia Mirkin if (i->sType == TYPE_F32) { 1326fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (prog->getType() != Program::TYPE_COMPUTE) 1336fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin handleFTZ(i); 13457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 1356fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin } 13657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 13757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 13857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 13957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleDIV(i); 14057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 14157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 14257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: 14357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_F64) 14457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleRCPRSQ(i); 14557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 14657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 14757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 14857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 14957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 15057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 15157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 15257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 153e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph BumillerNVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) 15474be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee : rZero(NULL), 15574be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee carry(NULL), 15674be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee needTexBar(prog->getTarget()->getChipset() >= 0xe0) 157e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller{ 158e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller} 159e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller 16057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 16100fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insnDominatedBy(const Instruction *later, 16200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const Instruction *early) const 16300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 16400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (early->bb == later->bb) 16500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return early->serial < later->serial; 16600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return later->bb->dominatedBy(early->bb); 16700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 16800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 16900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillervoid 17040c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, 17140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller Instruction *usei, const Instruction *insn) 17240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 17340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller bool add = true; 17440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller for (std::list<TexUse>::iterator it = uses.begin(); 17540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller it != uses.end();) { 17640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (insnDominatedBy(usei, it->insn)) { 17740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller add = false; 17840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller break; 17940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 18040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (insnDominatedBy(it->insn, usei)) 18140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller it = uses.erase(it); 18240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller else 18340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller ++it; 18440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 18540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (add) 18640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller uses.push_back(TexUse(usei, insn)); 18740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 18840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 18940c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 19040c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, 19140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller Instruction *insn, 19240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller const BasicBlock *term, 19340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller std::list<TexUse> &uses) 19440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 19540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) 19640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller insn = insn->getSrc(0)->getUniqueInsn(); 19740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 1981792d609007798adbe621048e81a883edb31a199Ilia Mirkin if (!insn->bb->reachableBy(texi->bb, term)) 19940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller return; 20040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 20140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller switch (insn->op) { 20240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller /* Values not connected to the tex's definition through any of these should 20340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller * not be conflicting. 20440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller */ 20540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_SPLIT: 20640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_MERGE: 20740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_PHI: 20840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller case OP_UNION: 20940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller /* recurse again */ 21040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller for (int s = 0; insn->srcExists(s); ++s) 21140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, 21240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller uses); 21340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller break; 21440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller default: 21540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? 21640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller addTexUse(uses, insn, texi); 21740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller break; 21840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 21940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 22040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 22140c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 222c4bb436f7660c951cd27e52660cf825da68793e5Ilia MirkinNVC0LegalizePostRA::findFirstUses( 223c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin const Instruction *texi, 224c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin const Instruction *insn, 225c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin std::list<TexUse> &uses, 226c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin std::tr1::unordered_set<const Instruction *>& visited) 22700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 22800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (int d = 0; insn->defExists(d); ++d) { 22900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Value *v = insn->getDef(d); 23000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { 23100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *usei = (*u)->getInsn(); 23240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 233ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller // NOTE: In case of a loop that overwrites a value but never uses 234ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller // it, it can happen that we have a cycle of uses that consists only 235ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller // of phis and no-op moves and will thus cause an infinite loop here 236ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller // since these are not considered actual uses. 237ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller // The most obvious (and perhaps the only) way to prevent this is to 238ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller // remember which instructions we've already visited. 239ca9ab05d45ebf407485af2daa3742b897ff99162Christoph Bumiller 240c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin if (visited.find(usei) != visited.end()) 241c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin continue; 242c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin 243c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin visited.insert(usei); 244c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin 24540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (usei->op == OP_PHI || usei->op == OP_UNION) { 24640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller // need a barrier before WAW cases 24740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller for (int s = 0; usei->srcExists(s); ++s) { 24840c224a573f2b763046001e622aafca90f68c693Christoph Bumiller Instruction *defi = usei->getSrc(s)->getUniqueInsn(); 24940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (defi && &usei->src(s) != *u) 25040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller findOverwritingDefs(texi, defi, usei->bb, uses); 25140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 25240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 25340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 25400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (usei->op == OP_SPLIT || 25540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller usei->op == OP_MERGE || 25600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller usei->op == OP_PHI || 25700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller usei->op == OP_UNION) { 25800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // these uses don't manifest in the machine code 259c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin findFirstUses(texi, usei, uses, visited); 26000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 26100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && 26200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { 263c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin findFirstUses(texi, usei, uses, visited); 26400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 26540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller addTexUse(uses, usei, insn); 26600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 26700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 26800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 26900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 27000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 27100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// Texture barriers: 27200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// This pass is a bit long and ugly and can probably be optimized. 27300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 27400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 1. obtain a list of TEXes and their outputs' first use(s) 27500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 2. calculate the barrier level of each first use (minimal number of TEXes, 27600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// over all paths, between the TEX and the use in question) 27700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 3. for each barrier, if all paths from the source TEX to that barrier 27800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// contain a barrier of lesser level, it can be culled 27900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 28000fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insertTextureBarriers(Function *fn) 28100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 28200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::list<TexUse> *uses; 28300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Instruction *> texes; 28400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstTex; 28500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstUse; 28600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> texCounts; 28700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<TexUse> useVec; 28800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ArrayList insns; 28900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 29000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller fn->orderInstructions(insns); 29100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 29200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts.resize(fn->allBBlocks.getSize(), 0); 29300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); 29400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); 29500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 29600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // tag BB CFG nodes by their id for later 29700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { 29800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); 29900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (bb) 30000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bb->cfg.tag = bb->getId(); 30100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 30200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 30300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // gather the first uses for each TEX 30400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (int i = 0; i < insns.getSize(); ++i) { 30500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); 30600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(tex->op)) { 30700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes.push_back(tex); 30800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!texCounts.at(tex->bb->getId())) 30900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex[tex->bb->getId()] = texes.size() - 1; 31000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts[tex->bb->getId()]++; 31100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 31200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 31300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insns.clear(); 31400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (texes.empty()) 31500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 31600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller uses = new std::list<TexUse>[texes.size()]; 31700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!uses) 31800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 319c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin for (size_t i = 0; i < texes.size(); ++i) { 320c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin std::tr1::unordered_set<const Instruction *> visited; 321c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin findFirstUses(texes[i], texes[i], uses[i], visited); 322c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin } 32300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // determine the barrier level at each use 32500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < texes.size(); ++i) { 32600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); 32700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++u) { 32800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *tb = texes[i]->bb; 32900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *ub = u->insn->bb; 33000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (tb == ub) { 33100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 33200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = i + 1; j < texes.size() && 33300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == tb && texes[j]->serial < u->insn->serial; 33400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 33500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 33600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 33700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = fn->cfg.findLightestPathWeight(&tb->cfg, 33800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller &ub->cfg, texCounts); 33900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (u->level < 0) { 34000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller WARN("Failed to find path TEX -> TEXBAR\n"); 34100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 34200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 34300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 34400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // this counted all TEXes in the origin block, correct that 34500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; 34600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // and did not count the TEXes in the destination block, add those 34700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && 34800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == ub && texes[j]->serial < u->insn->serial; 34900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 35000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 35100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller assert(u->level >= 0); 35300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec.push_back(*u); 35400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete[] uses; 35700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 35800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // insert the barriers 35900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < useVec.size(); ++i) { 36000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = useVec[i].insn->prev; 36100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (useVec[i].level < 0) 36200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 36300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR) { 36400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev->subOp > useVec[i].level) 36500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->subOp = useVec[i].level; 36600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); 36700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 36800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); 36900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->fixed = 1; 37000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->subOp = useVec[i].level; 37100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // make use explicit to ease latency calculation 37200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); 37300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); 37400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 3775966903c28a13f13923de308c5f5116a0d5c8cbdIlia Mirkin if (fn->getProgram()->optLevel < 3) 37800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 37900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 38000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Limits> limitT, limitB, limitS; // entry, exit, single 38100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 38200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 38300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 38400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS.resize(fn->allBBlocks.getSize()); 38500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 38600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // cull unneeded barriers (should do that earlier, but for simplicity) 38740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller IteratorRef bi = fn->cfg.iteratorCFG(); 38800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // first calculate min/max outstanding TEXes for each BB 38900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 39000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 39100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 39200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int min = 0; 39300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = std::numeric_limits<int>::max(); 39400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = i->next) { 39500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 39600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min++; 39700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (max < std::numeric_limits<int>::max()) 39800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 39900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 40000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 40100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min = MIN2(min, i->subOp); 40200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = MIN2(max, i->subOp); 40300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // limits when looking at an isolated block 40600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].min = min; 40700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].max = max; 40800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // propagate the min/max values 41000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { 41100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 41200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 41300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 41400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int bbId = bb->getId(); 41500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { 41600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 41700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int inId = in->getId(); 41800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); 41900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); 42000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 42100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // I just hope this is correct ... 42200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (limitS[bbId].max == std::numeric_limits<int>::max()) { 42300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // no barrier 42400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; 42500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; 42600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 42700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // block contained a barrier 42800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = MIN2(limitS[bbId].max, 42900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min + limitS[bbId].min); 43000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = MIN2(limitS[bbId].max, 43100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max + limitS[bbId].min); 43200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // finally delete unnecessary barriers 43600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 43700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 43800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 43900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = NULL; 44000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *next; 44100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = limitT[bb->getId()].max; 44200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = next) { 44300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller next = i->next; 44400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 44500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->subOp >= max) { 44600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, i); 4477086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco i = NULL; 44800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 44900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = i->subOp; 45000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { 45100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, prev); 45200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = NULL; 45300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 45400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 45500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 45600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 45700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 45800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 4597086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco if (i && !i->isNop()) 46000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = i; 46100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 46200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 46300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 46400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 46500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 46600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 46757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(Function *fn) 46857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 46900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (needTexBar) 47000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insertTextureBarriers(fn); 47100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 4723433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller rZero = new_LValue(fn, FILE_GPR); 47399e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry = new_LValue(fn, FILE_FLAGS); 4744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 4753433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); 47699e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry->reg.data.id = 0; 4774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 47857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 47957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 48057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 48257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::replaceZero(Instruction *i) 48357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 48457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; i->srcExists(s); ++s) { 4854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (s == 2 && i->op == OP_SUCLAMP) 4864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller continue; 48757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ImmediateValue *imm = i->getSrc(s)->asImm(); 48857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm && imm->reg.data.u64 == 0) 4893433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller i->setSrc(s, rZero); 49057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 49157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 49257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 49357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace CONT with BRA for single unconditional continue 49457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 49557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) 49657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 49757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) 49857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 49957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.incident(); 50057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 50157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 50257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 50357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 50457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *contBB = BasicBlock::get(ei.getNode()); 50557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || 50757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->getPredicate()) 50857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 50957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->op = OP_BRA; 51057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); // delete PRECONT 51157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 51257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 51357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(ei.end() || ei.getType() != Graph::Edge::BACK); 51457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace branches to join blocks with join ops 51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 51957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::propagateJoin(BasicBlock *bb) 52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 52157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) 52257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 52357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 52457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 52557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *exit = in->getExit(); 52657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!exit) { 52757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); 52857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // there should always be a terminator instruction 52957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller WARN("inserted missing terminator in BB:%i\n", in->getId()); 53057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 53157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (exit->op == OP_BRA) { 53257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->op = OP_JOIN; 53357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->asFlow()->limit = 1; // must-not-propagate marker 53457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 53557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 53657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); 53757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 53857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 53957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 54057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(BasicBlock *bb) 54157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 54257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next; 54357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 54457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // remove pseudo operations and non-fixed no-ops, split 64 bit operations 54557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getFirst(); i; i = next) { 54657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 54757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_EMIT || i->op == OP_RESTART) { 54857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!i->getDef(0)->refCount()) 54957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, NULL); 5509362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).getFile() == FILE_IMMEDIATE) 5513433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller i->setSrc(0, rZero); // initial value must be 0 55207d3972b4927841bb892af16ff0389f8a241b24cBen Skeggs replaceZero(i); 55357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 55457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isNop()) { 55557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(i); 55657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 55799e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // TODO: Move this to before register allocation for operations that 55899e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // need the $c register ! 55999e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (typeSizeof(i->dType) == 8) { 56099e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller Instruction *hi; 56199e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); 56299e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (hi) 56399e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller next = hi; 56499e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller } 56599e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller 56657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_MOV && i->op != OP_PFETCH) 56757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller replaceZero(i); 56857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 56957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 57057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!bb->getEntry()) 57157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 57257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 57357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!tryReplaceContWithBra(bb)) 57457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller propagateJoin(bb); 57557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 57657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 57757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 57857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 57957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) 58057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 58157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(prog); 5824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller gMemBase = NULL; 58357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 58457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 58557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 58657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Function *fn) 58757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 58857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 58957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!strncmp(fn->getName(), "MAIN", 4)); 59057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: when we generate actual functions pass this value along somehow 59157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); 59257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); 5932ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller if (fn->cfgExit) { 5942ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); 5952ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.mkMovToReg(0, gpEmitAddress); 5962ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller } 59757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 59857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 59957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 60057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 60157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 60257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(BasicBlock *bb) 60357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 60457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 60557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 60657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6077a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumillerinline Value * 6087a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph BumillerNVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) 6097a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller{ 6107a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller uint8_t b = prog->driver->io.resInfoCBSlot; 6117a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller uint32_t off = prog->driver->io.texBindBase + slot * 4; 6127a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller return bld. 6137a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 6147a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller} 6157a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 61657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// move array source to first slot, convert to u16, add indirections 61757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 61857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTEX(TexInstruction *i) 61957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 62037a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 62137a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int arg = i->tex.target.getArgCount(); 6224da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); 62319ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 62457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 625f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Arguments to the TEX instruction are a little insane. Even though the 626f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // encoding is identical between SM20 and SM30, the arguments mean 627f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // different things between Fermi and Kepler+. A lot of arguments are 628f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // optional based on flags passed to the instruction. This summarizes the 629f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // order of things. 630f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // 631f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Fermi: 632f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // array/indirect 633f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // coords 634f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // sample 635f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // lod bias 636f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // depth compare 637f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // offsets: 638f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg) 639f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // - other: 4 bits each, single reg 640f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // 641f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Kepler+: 642f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // indirect handle 643f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // array (+ offsets for txd in upper 16 bits) 644f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // coords 645f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // sample 646f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // lod bias 647f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // depth compare 648f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // offsets (same as fermi, except txd which takes it with array) 6490532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // 6500532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // Maxwell (tex): 6510532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // array 6520532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // coords 6530532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // indirect handle 6540532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // sample 6550532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // lod bias 6560532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // depth compare 6570532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // offsets 6580532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // 6590532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // Maxwell (txd): 6600532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // indirect handle 6610532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // coords 6620532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // array + offsets 6630532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // derivatives 664f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin 66519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 6667a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 667b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin // XXX this ignores tsc, and assumes a 1:1 mapping 668b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin assert(i->tex.rIndirectSrc >= 0); 669b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin Value *hnd = loadTexHandle( 670b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 671b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->getIndirectR(), bld.mkImm(2)), 672b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.r); 673b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.r = 0xff; 674b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.s = 0x1f; 675b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectR(hnd); 676b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectS(NULL); 677b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin } else if (i->tex.r == i->tex.s) { 6784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller i->tex.r += prog->driver->io.texBindBase / 4; 679e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller i->tex.s = 0; // only a single cX[] value possible here 680e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else { 6817a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *hnd = bld.getScratch(); 6827a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *rHnd = loadTexHandle(NULL, i->tex.r); 6837a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *sHnd = loadTexHandle(NULL, i->tex.s); 6847a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 6857a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd); 6867a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 6877a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.r = 0; // not used for indirect tex 6887a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.s = 0; 6897a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->setIndirectR(hnd); 690e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 691e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller if (i->tex.target.isArray()) { 692e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller LValue *layer = new_LValue(func, FILE_GPR); 6934da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *src = i->getSrc(lyr); 694e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller const int sat = (i->op == OP_TXF) ? 1 : 0; 695e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 696e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; 6970532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) { 6980532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin for (int s = dim; s >= 1; --s) 6990532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(s, i->getSrc(s - 1)); 7000532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(0, layer); 7010532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin } else { 7020532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(dim, layer); 7030532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin } 704e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 705b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin // Move the indirect reference to the first place 7060532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (i->tex.rIndirectSrc >= 0 && ( 7070532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) { 708b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin Value *hnd = i->getIndirectR(); 709b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin 710b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectR(NULL); 711b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->moveSources(0, 1); 712b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setSrc(0, hnd); 713b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.rIndirectSrc = 0; 714b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.sIndirectSrc = -1; 715b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin } 716e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else 717e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller // (nvc0) generate and move the tsc/tic/array source to the front 71819ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 71957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 72057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7219807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin Value *ticRel = i->getIndirectR(); 7229807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin Value *tscRel = i->getIndirectS(); 7239807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin 724af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (ticRel) { 7259807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin i->setSrc(i->tex.rIndirectSrc, NULL); 726af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (i->tex.r) 727af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 728af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin ticRel, bld.mkImm(i->tex.r)); 729af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin } 730af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (tscRel) { 7319807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin i->setSrc(i->tex.sIndirectSrc, NULL); 732af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (i->tex.s) 733af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 734af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin tscRel, bld.mkImm(i->tex.s)); 735af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin } 7369807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin 7374da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; 73857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = dim; s >= 1; --s) 73957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, i->getSrc(s - 1)); 74057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, arrayIndex); 74157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 742e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller if (arrayIndex) { 743e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller int sat = (i->op == OP_TXF) ? 1 : 0; 744e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 745e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; 746e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } else { 74757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.loadImm(src, 0); 748e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } 74957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7509807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin if (ticRel) 75157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); 7529807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin if (tscRel) 75357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); 75457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 75557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, src); 75657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 75757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 75819ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // For nvc0, the sample id has to be in the second operand, as the offset 75919ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // does. Right now we don't know how to pass both in, and this case can't 76019ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // happen with OpenGL. On nve0, the sample id is part of the texture 76119ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // coordinate argument. 76219ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin assert(chipset >= NVISA_GK104_CHIPSET || 76319ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin !i->tex.useOffsets || !i->tex.target.isMS()); 76419ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin 765f782d6e792db2ed7773a2d22866dbcdb1e4062eeIlia Mirkin // offset is between lod and dc 76657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->tex.useOffsets) { 76757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int n, c; 76871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 7698aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) { 7708aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.target.isShadow()) 7718aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin s--; 7728aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->srcExists(s)) // move potential predicate out of the way 7738aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->moveSources(s, 1); 7748aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.useOffsets == 4 && i->srcExists(s + 1)) 7758aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->moveSources(s + 1, 1); 7768aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 777f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin if (i->op == OP_TXG) { 778f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // Either there is 1 offset, which goes into the 2 low bytes of the 779f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // first source, or there are 4 offsets, which go into 2 sources (8 780f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // values, 1 byte each). 781f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin Value *offs[2] = {NULL, NULL}; 782f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (n = 0; n < i->tex.useOffsets; n++) { 783f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 2; ++c) { 784f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if ((n % 2) == 0 && c == 0) 785f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2] = i->offset[n][c].get(); 786f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin else 787f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, 788f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2], 789f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->offset[n][c].get(), 790f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)), 791f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2]); 792f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 793f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 794f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s, offs[0]); 795f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if (offs[1]) 796f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s + 1, offs[1]); 797f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } else { 798f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin unsigned imm = 0; 799f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin assert(i->tex.useOffsets == 1); 800f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 3; ++c) { 801f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin ImmediateValue val; 802fb1afd1ea5fd25d82c75c5c3a2aba0bcb53b6d47Ilia Mirkin if (!i->offset[0][c].getImmediate(val)) 803fb1afd1ea5fd25d82c75c5c3a2aba0bcb53b6d47Ilia Mirkin assert(!"non-immediate offset passed to non-TXG"); 804f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin imm |= (val.reg.data.u32 & 0xf) << (c * 4); 805f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 8068aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) { 8078aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // The offset goes into the upper 16 bits of the array index. So 8088aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // create it if it's not already there, and INSBF it if it already 8098aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // is. 810b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin s = (i->tex.rIndirectSrc >= 0) ? 1 : 0; 8110532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (chipset >= NVISA_GM107_CHIPSET) 8120532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin s += dim; 8138aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.target.isArray()) { 8140532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s), 8158aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin bld.loadImm(NULL, imm), bld.mkImm(0xc10), 816b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->getSrc(s)); 8178aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 818b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->moveSources(s, 1); 819b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setSrc(s, bld.loadImm(NULL, imm << 16)); 8208aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 8218aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 8228aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->setSrc(s, bld.loadImm(NULL, imm)); 8238aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 824f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } 82557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 82657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 82719ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 82871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 82971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // If TEX requires more than 4 sources, the 2nd register tuple must be 83071c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // aligned to 4, even if it consists of just a single 4-byte register. 83171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 83271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. 83371c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 83471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 83571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (s > 4 && s < 7) { 83671c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (i->srcExists(s)) // move potential predicate out of the way 83771c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->moveSources(s, 7 - s); 83871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller while (s < 7) 83971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->setSrc(s++, bld.loadImm(NULL, 0)); 84071c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 84171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 84271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller 84357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 84457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 84557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 84657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 84757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleManualTXD(TexInstruction *i) 84857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 84957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller static const uint8_t qOps[4][2] = 85057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 85157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 85257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 85357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 85457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 85557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller }; 85657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[4][4]; 85757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *crd[3]; 85857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *tex; 85957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *zero = bld.loadImm(bld.getSSA(), 0); 86057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int l, c; 86157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const int dim = i->tex.target.getDim(); 862afea9bae67208cdb00b27a60c9cb013bf7d6de52Ilia Mirkin const int array = i->tex.target.isArray(); 86357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 86457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_TEX; // no need to clone dPdx/dPdy later 86557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 86657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 86757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller crd[c] = bld.getScratch(); 86857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 86957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADON, TYPE_NONE, NULL); 87057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) { 87157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // mov coordinates from lane l to all lanes 87257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 873afea9bae67208cdb00b27a60c9cb013bf7d6de52Ilia Mirkin bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); 87457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdx from lane l to lanes dx 87557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 87657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); 87757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdy from lane l to lanes dy 87857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 87957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); 88057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // texture 881a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez bld.insert(tex = cloneForward(func, i)); 88257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 883afea9bae67208cdb00b27a60c9cb013bf7d6de52Ilia Mirkin tex->setSrc(c + array, crd[c]); 88457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // save results 88557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 88657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mov; 88757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[c][l] = bld.getSSA(); 88857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov = bld.mkMov(def[c][l], tex->getDef(c)); 88957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->fixed = 1; 89057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->lanes = 1 << l; 89157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 89257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 89357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); 89457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 89557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 89657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); 89757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) 89857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller u->setSrc(l, def[c][l]); 89957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 90057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->bb->remove(i); 90257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 90357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 90457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 90657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTXD(TexInstruction *txd) 90757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 90857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int dim = txd->tex.target.getDim(); 9098aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin unsigned arg = txd->tex.target.getArgCount(); 9108aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin unsigned expected_args = arg; 9118aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 9128aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin 9138aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 9148aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (!txd->tex.target.isArray() && txd->tex.useOffsets) 9158aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin expected_args++; 916b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0) 917b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin expected_args++; 9188aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 9198aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (txd->tex.useOffsets) 9208aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin expected_args++; 9217f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin if (!txd->tex.target.isArray() && ( 9227f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)) 9237f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin expected_args++; 9248aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 9258aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin 9268aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (expected_args > 4 || 9278aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin dim > 2 || 9288aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin txd->tex.target.isShadow() || 9298aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin txd->tex.target.isCube()) 9308aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin txd->op = OP_TEX; 93157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 93257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleTEX(txd); 93338a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller while (txd->srcExists(arg)) 93457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++arg; 93557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 9369c930639d9f6d713ccfd16b390a41a9f584f348cChristoph Bumiller txd->tex.derivAll = true; 9378aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (txd->op == OP_TEX) 93857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleManualTXD(txd); 93957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 9408aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin assert(arg == expected_args); 94157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int c = 0; c < dim; ++c) { 94238a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); 94338a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); 9449362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdx[c].set(NULL); 9459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdy[c].set(NULL); 94657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 94757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 94857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 94957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 95057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 95130cb66cd745fc793a2349f1d17046c50cd51c558Christoph BumillerNVC0LoweringPass::handleTXQ(TexInstruction *txq) 95230cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller{ 95330cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller // TODO: indirect resource/sampler index 95430cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return true; 95530cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller} 95630cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller 95730cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumillerbool 958423f64e83ab5b1ea7de475ae80300a8408522743Ilia MirkinNVC0LoweringPass::handleTXLQ(TexInstruction *i) 959423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin{ 960423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The outputs are inverted compared to what the TGSI instruction 961423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * expects. Take that into account in the mask. 962423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 963423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin assert((i->tex.mask & ~3) == 0); 964423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 1) 965423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 2; 966423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin else if (i->tex.mask == 2) 967423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 1; 968423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin handleTEX(i); 969423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.setPosition(i, true); 970423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 971423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The returned values are not quite what we want: 972423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (a) convert from s16/u16 to f32 973423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (b) multiply by 1/256 974423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 975423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin for (int def = 0; def < 2; ++def) { 976423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (!i->defExists(def)) 977423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin continue; 978423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin enum DataType type = TYPE_S16; 979423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 2 || def > 0) 980423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin type = TYPE_U16; 981423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), type, i->getDef(def)); 982423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), 983423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); 984423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 985423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 3) { 986423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin LValue *t = new_LValue(func, FILE_GPR); 987423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(t, i->getDef(0)); 988423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(0), i->getDef(1)); 989423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(1), t); 990423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 991423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return true; 992423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin} 993423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 994423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 995423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkinbool 996c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph BumillerNVC0LoweringPass::handleATOM(Instruction *atom) 997c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller{ 998c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller SVSemantic sv; 999c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1000c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller switch (atom->src(0).getFile()) { 1001c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_LOCAL: 1002c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller sv = SV_LBASE; 1003c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 1004c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_SHARED: 1005c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller sv = SV_SBASE; 1006c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 1007c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller default: 1008c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); 1009c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 1010c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller } 1011c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller Value *base = 1012c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); 1013c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller Value *ptr = atom->getIndirect(0, 0); 1014c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1015c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); 1016c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 1017c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller if (ptr) 1018c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); 1019c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setIndirect(0, 0, base); 1020c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1021c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 1022c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller} 1023c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 102475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumillerbool 102575f1f852b00ad0d766684d01695322b93a2acd55Christoph BumillerNVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) 102675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller{ 102775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && 102875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) 102975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return false; 103075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, true); 103175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 103275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (needCctl) { 103375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); 103475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); 103575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->fixed = 1; 103675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->subOp = NV50_IR_SUBOP_CCTL_IV; 103775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->isPredicated()) 103875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setPredicate(cas->cc, cas->getPredicate()); 103975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 104075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 104175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { 104275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // CAS is crazy. It's 2nd source is a double reg, and the 3rd source 104375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // should be set to the high part of the double reg or bad things will 104475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // happen elsewhere in the universe. 104575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // Also, it sometimes returns the new value instead of the old one 104675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // under mysterious circumstances. 104775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Value *dreg = bld.getSSA(8); 104875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, false); 104975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); 105075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->setSrc(1, dreg); 105175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 105275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 105375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return true; 105475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller} 105575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 10564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 10574506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) 10584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 10594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint8_t b = prog->driver->io.resInfoCBSlot; 10604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off += prog->driver->io.suInfoBase; 10614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 10624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 10634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 10644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 10664506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) 10674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 10684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint8_t b = prog->driver->io.msInfoCBSlot; 10694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off += prog->driver->io.msInfoBase; 10704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 10714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 10724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 10734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller/* On nvc0, surface info is obtained via the surface binding points passed 10754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * to the SULD/SUST instructions. 10764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * On nve4, surface info is stored in c[] and is used by various special 10774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * instructions, e.g. for clamping coordiantes or generating an address. 10784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * They couldn't just have added an equivalent to TIC now, couldn't they ? 10794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller */ 10804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_ADDR 0x00 10814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_FMT 0x04 10824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM_X 0x08 10834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_PITCH 0x0c 10844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM_Y 0x10 10854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_ARRAY 0x14 10864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM_Z 0x18 10874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_UNK1C 0x1c 10884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_WIDTH 0x20 10894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_HEIGHT 0x24 10904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DEPTH 0x28 10914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_TARGET 0x2c 10924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_CALL 0x30 10934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_RAW_X 0x34 10944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_MS_X 0x38 10954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_MS_Y 0x3c 10964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO__STRIDE 0x40 10984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 10994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_DIM(i) (0x08 + (i) * 8) 11004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_SIZE(i) (0x20 + (i) * 4) 11014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller#define NVE4_SU_INFO_MS(i) (0x38 + (i) * 4) 11024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerstatic inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) 11044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 11054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (su->tex.target.getEnum()) { 11064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1); 11074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D_ARRAY: return (c == 1) ? 11104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_PL(0, 2) : 11114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 11134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 11144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 11194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 11204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(0); 11214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return 0; 11224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 11234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 11244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 11264506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) 11274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 11284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE; 11294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int arg = tex->tex.target.getArgCount(); 11304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS) 11324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D; 11334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 11344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY) 11354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D_ARRAY; 11364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 11374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return; 11384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *x = tex->getSrc(0); 11404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = tex->getSrc(1); 11414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *s = tex->getSrc(arg - 1); 11424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); 11444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0)); 11464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1)); 11474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); 11494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); 11504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7)); 11524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3)); 11534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dx = loadMsInfo32(ts, 0x0); 11554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dy = loadMsInfo32(ts, 0x4); 11564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); 11584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); 11594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(0, tx); 11614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(1, ty); 11624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->moveSources(arg, -1); 11634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 11644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// Sets 64-bit "generic address", predicate and format sources for SULD/SUST. 11664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// They're computed from the coordinates using the surface info in c[] space. 11674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 11684506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) 11694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 11704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Instruction *insn; 11714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP; 11724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool raw = 11734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; 11744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int idx = su->tex.r; 11754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int dim = su->tex.target.getDim(); 11764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int arg = dim + (su->tex.target.isArray() ? 1 : 0); 11774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const uint16_t base = idx * NVE4_SU_INFO__STRIDE; 11784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller int c; 11794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *zero = bld.mkImm(0); 11804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *p1 = NULL; 11814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *v; 11824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *src[3]; 11834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *bf, *eau, *off; 11844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *addr, *pred; 11854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off = bld.getScratch(4); 11874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = bld.getScratch(4); 11884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller addr = bld.getSSA(8); 11894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 11904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.setPosition(su, false); 11924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller adjustCoordinatesMS(su); 11944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 11954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate clamped coordinates 11964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (c = 0; c < arg; ++c) { 11974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = bld.getScratch(); 11984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (c == 0 && raw) 11994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X); 12004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 12014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c)); 12024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) 12034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = getSuClampSubOp(su, c); 12044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (; c < 3; ++c) 12064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = zero; 12074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set predicate output 12094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 12104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[0]->getInsn()->setFlagsDef(1, pred); 12114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 12124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target.isArray()) { 12134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller p1 = bld.getSSA(1, FILE_PREDICATE); 12144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[dim]->getInsn()->setFlagsDef(1, p1); 12154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate pixel offset 12184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) { 12194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target != TEX_TARGET_BUFFER) 12204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); 12214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 12224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 3) { 12234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); 12244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) 12254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 12264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); 12284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) 12294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l 12304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 12314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 2); 12324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); 12334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) 12344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = su->tex.target.isArray() ? 12354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 12364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate effective address part 1 12394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 12404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (raw) { 12414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = src[0]; 12424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 12434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); 12444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) 12454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); 12464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 12484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = src[1]; 12494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *z = src[2]; 12504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint16_t subOp = 0; 12514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (dim) { 12534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 1: 12544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller y = zero; 12554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = zero; 12564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 12574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 2: 12584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = off; 12594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (!su->tex.target.isArray()) { 12604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); 12614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 12624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 12644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 12654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 12664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 3); 12674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 12684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z); 12704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->subOp = subOp; 12714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->setFlagsDef(1, pred); 12724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // part 2 12754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR); 12764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 12784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = v; 12794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 12804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); 12814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // add array layer offset 12834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target.isArray()) { 12844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY); 12854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) 12864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) 12874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 12884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 12894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau) 12904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32 12914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // combine predicates 12924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(p1); 12934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1); 12944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 12954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 12964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom) { 12974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *lo = bf; 12984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 12994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller lo = zero; 13004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(off, bf); 13014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 13024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // bf == g[] address & 0xff 13034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // eau == g[] address >> 8 13044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau); 13054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau); 13064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 13074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) { 13084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Convert from u32 to u8 address format, which is what the library code 13094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // doing SULDP currently uses. 13104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: can SUEAU do this ? 13114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: does it matter that we don't mask high bytes in bf ? 13124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Grrr. 13134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8)); 13144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off); 13154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 13164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau); 13184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom && su->tex.target == TEX_TARGET_BUFFER) 13204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off); 13214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // let's just set it 0 for raw access and hope it works 13234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = raw ? 13244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); 13254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // get rid of old coordinate sources, make space for fmt info and predicate 13274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->moveSources(arg, 3 - arg); 13284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set 64 bit address and 32-bit format sources 13294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(0, addr); 13304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(1, v); 13314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(2, pred); 13324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 13334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 13354506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) 13364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 13374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller processSurfaceCoordsNVE4(su); 13384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Who do we hate more ? The person who decided that nvc0's SULD doesn't 13404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // have to support conversion or the person who decided that, in OpenCL, 13414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // you don't have to specify the format here like you do in OpenGL ? 13424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SULDP) { 13444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // We don't patch shaders. Ever. 13454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // You get an indirect call to our library blob here. 13464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // But at least it's uniform. 13474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller FlowInstruction *call; 13484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller LValue *p[3]; 13494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller LValue *r[5]; 13504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL; 13514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 4; ++i) 13534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller (r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i; 13544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 3; ++i) 13554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller (p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i; 13564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller (r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4; 13574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8); 13594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8); 13604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(p[0], su->getSrc(2), TYPE_U8); 13614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(r[4], su->getSrc(0), TYPE_U64); 13624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(r[2], su->getSrc(1), TYPE_U32); 13634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate()); 13654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->indirect = 1; 13674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->absolute = 1; 13684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST, 13694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller prog->driver->io.resInfoCBSlot, TYPE_U32, 13704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller prog->driver->io.suInfoBase + base)); 13714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(1, r[2]); 13724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(2, r[4]); 13734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 3; ++i) 13744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setSrc(3 + i, p[i]); 13754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (int i = 0; i < 4; ++i) { 13764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setDef(i, r[i]); 13774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(su->getDef(i), r[i]); 13784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 13794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller call->setDef(4, p[1]); 13804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller delete_Instruction(bld.getProgram(), su); 13814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 13824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 13834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SUREDB || su->op == OP_SUREDP) { 138475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // FIXME: for out of bounds access, destination value will be undefined ! 13854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *pred = su->getSrc(2); 13864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller CondCode cc = CC_NOT_P; 13874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->getPredicate()) { 13884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 13894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller cc = su->cc; 13904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (cc == CC_NOT_P) { 13914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); 13924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 13934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); 13944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT); 13954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 13964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 13974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Instruction *red = bld.mkOp(OP_ATOM, su->dType, su->getDef(0)); 13984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->subOp = su->subOp; 13994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (!gMemBase) 14004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0); 14014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(0, gMemBase); 14024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(1, su->getSrc(3)); 14034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->subOp == NV50_IR_SUBOP_ATOM_CAS) 14044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(2, su->getSrc(4)); 14054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setIndirect(0, 0, su->getSrc(0)); 14064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setPredicate(cc, pred); 14074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller delete_Instruction(bld.getProgram(), su); 140875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(red, true); 14094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 14104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; 14114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 14124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 14134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1414c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumillerbool 141557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleWRSV(Instruction *i) 141657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 141757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *st; 141857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym; 141957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr; 142057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 142157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // must replace, $sreg are not writeable 142257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); 142357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (addr >= 0x400) 142457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 142557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); 142657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 142757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), 142857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(1)); 142957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->perPatch = i->perPatch; 143057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 143257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 143357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 143457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 143657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::readTessCoord(LValue *dst, int c) 143757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 143857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *laneid = bld.getSSA(); 143957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *x, *y; 144057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 144157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); 144257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 144357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 0) { 144457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = dst; 144557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = NULL; 144657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 144757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 1) { 144857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = NULL; 144957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = dst; 145057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 145157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(c == 2); 145257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = bld.getSSA(); 145357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = bld.getSSA(); 145457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 145557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (x) 145657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); 145757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (y) 14583fc2818f2b9e8a19e5349442e50dcee4858452c6Christoph Bumiller bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); 145957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 146057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 2) { 146157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); 146257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); 146357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 146457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 146557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 146657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 146757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleRDSV(Instruction *i) 146857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 146957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym = i->getSrc(0)->asSym(); 1470ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller const SVSemantic sv = sym->reg.data.sv.sv; 147157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *vtx = NULL; 147257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld; 147357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); 147457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1475ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (addr >= 0x400) { 1476ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // mov $sreg 1477ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 1478ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID 1479ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 1480ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); 1481ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 148257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 1483ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 148457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1485ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller switch (sv) { 148657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_POSITION: 148757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_FRAGMENT); 1488acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin if (i->srcExists(1)) { 1489acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin // Pass offset through to the interpolation logic 1490acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin ld = bld.mkInterp(NV50_IR_INTERP_LINEAR | NV50_IR_INTERP_OFFSET, 1491acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin i->getDef(0), addr, NULL); 1492acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin ld->setSrc(1, i->getSrc(1)); 1493acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin } else { 1494acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); 1495acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin } 149652c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller break; 149752c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller case SV_FACE: 149852c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller { 149952c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller Value *face = i->getDef(0); 150052c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); 150152c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller if (i->dType == TYPE_F32) { 1502354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkOp2(OP_OR, TYPE_U32, face, face, bld.mkImm(0x00000001)); 1503354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkOp1(OP_NEG, TYPE_S32, face, face); 1504354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, face, TYPE_S32, face); 150552c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 150652c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 150757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 150857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_TESS_COORD: 150957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); 151057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); 151157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 1512ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NTID: 1513ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NCTAID: 1514ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_GRIDID: 1515ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise 1516ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 1517ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 1518ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); 1519ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller return true; 1520ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 1521ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller addr += prog->driver->prop.cp.gridInfoBase; 1522ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller bld.mkLoad(TYPE_U32, i->getDef(0), 1523ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); 1524ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller break; 1525af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_INDEX: 1526af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // TODO: Properly pass source as an address in the PIX address space 1527af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // (which can be of the form [r0+offset]). But this is currently 1528af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // unnecessary. 1529af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 1530af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 1531af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 1532af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_POS: { 1533af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin Value *off = new_LValue(func, FILE_GPR); 1534af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 1535af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 1536af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); 1537af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkLoad(TYPE_F32, 1538af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin i->getDef(0), 1539af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkSymbol( 1540af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, 1541af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin TYPE_U32, prog->driver->io.sampleInfoBase + 1542af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin 4 * sym->reg.data.sv.index), 1543af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin off); 1544af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 1545af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin } 1546b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin case SV_SAMPLE_MASK: 1547b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 1548b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK; 1549b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin break; 155057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 155157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) 155257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 155357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld = bld.mkFetch(i->getDef(0), i->dType, 155457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); 155557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld->perPatch = i->perPatch; 155657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 155757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 155857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 155957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 156057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 156157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 156257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 156357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleDIV(Instruction *i) 156457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 156557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isFloatType(i->dType)) 156657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 1567b5f2c0505fd4f66422e034b041cdf0bc3dc46e99Christoph Bumiller bld.setPosition(i, false); 1568b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1)); 156957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MUL; 157057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, rcp->getDef(0)); 157157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 157257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 157357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 157457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 157557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleMOD(Instruction *i) 157657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 1577b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin if (!isFloatType(i->dType)) 157857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 1579b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin LValue *value = bld.getScratch(typeSizeof(i->dType)); 1580b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1)); 1581b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value); 1582b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp1(OP_TRUNC, i->dType, value, value); 1583b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value); 158457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_SUB; 158557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, value); 158657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 158757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 158857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 158957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 159057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleSQRT(Instruction *i) 159157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 1592ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin Value *pred = bld.getSSA(1, FILE_PREDICATE); 1593ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin Value *zero = bld.getSSA(); 1594ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin Instruction *rsq; 1595ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin 1596ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0)); 1597ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin if (i->dType == TYPE_F64) 1598ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero); 1599ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero); 1600ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred); 1601ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin rsq = bld.mkOp1(OP_RSQ, i->dType, 1602ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin bld.getSSA(typeSizeof(i->dType)), i->getSrc(0)); 1603ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin rsq->setPredicate(CC_NOT_P, pred); 160457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MUL; 160557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, rsq->getDef(0)); 1606ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin i->setPredicate(CC_NOT_P, pred); 1607ef8f09be3375231481373a5c49a5cef3db3d2141Ilia Mirkin 160857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 161057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 161157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 161257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 161357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handlePOW(Instruction *i) 161457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 161557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *val = bld.getScratch(); 161657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 161757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); 161857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; 161957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); 162057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 162157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_EX2; 162257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, val); 162357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 162457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 162557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 162657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 162757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 162857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 162957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleEXPORT(Instruction *i) 163057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 163157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_FRAGMENT) { 163257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int id = i->getSrc(0)->reg.data.offset / 4; 163357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 16349362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).isIndirect(0)) // TODO, ugly 163557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 163657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 163700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller i->subOp = NV50_IR_SUBOP_MOV_FINAL; 16389362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).set(i->src(1)); 163957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 164057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, new_LValue(func, FILE_GPR)); 164157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getDef(0)->reg.data.id = id; 164257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 164357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prog->maxGPR = MAX2(prog->maxGPR, id); 164457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 164557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 164657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setIndirect(0, 1, gpEmitAddress); 164757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 164857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 164957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 165057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 165157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 165257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleOUT(Instruction *i) 165357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 16542f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin Instruction *prev = i->prev; 16552f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin ImmediateValue stream, prevStream; 16562f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin 16572f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin // Only merge if the stream ids match. Also, note that the previous 16582f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin // instruction would have already been lowered, so we take arg1 from it. 16592f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin if (i->op == OP_RESTART && prev && prev->op == OP_EMIT && 16602f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin i->src(0).getImmediate(stream) && 16612f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin prev->src(1).getImmediate(prevStream) && 16622f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin stream.reg.data.u32 == prevStream.reg.data.u32) { 166357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; 166457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 166557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 166657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(gpEmitAddress); 166757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, gpEmitAddress); 16682f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin i->setSrc(1, i->getSrc(0)); 166957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, gpEmitAddress); 167057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 167157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 167257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 167357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 167457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Generate a binary predicate if an instruction is predicated by 167557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// e.g. an f32 value. 167657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 167757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::checkPredicate(Instruction *insn) 167857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 167957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = insn->getPredicate(); 168057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pdst; 168157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 168257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!pred || pred->reg.file == FILE_PREDICATE) 168357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 168457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller pdst = new_LValue(func, FILE_PREDICATE); 168557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 168657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // CAUTION: don't use pdst->getInsn, the definition might not be unique, 168757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass 168857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1689bbe3d6dc29f218e4d790e5ea359d3c6736e94226Dave Airlie bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred); 169057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 169157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->setPredicate(insn->cc, pdst); 169257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 169357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 169457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 169557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - add quadop dance for texturing 169657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - put FP outputs in GPRs 169757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - convert instruction sequences 169857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 169957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 170057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Instruction *i) 170157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 1702405bd00f3c98cb78d1dda1f3bf5d74155b18cd57Christoph Bumiller bld.setPosition(i, false); 170357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 170457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->cc != CC_ALWAYS) 170557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkPredicate(i); 170657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 170757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 170857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TEX: 170957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXB: 171057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXL: 171157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXF: 171257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXG: 171357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTEX(i->asTex()); 171457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXD: 171557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTXD(i->asTex()); 1716423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin case OP_TXLQ: 1717423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return handleTXLQ(i->asTex()); 171830cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller case OP_TXQ: 171930cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return handleTXQ(i->asTex()); 172057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: 172157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); 172257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getDef(0)); 172357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 172457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_POW: 172557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handlePOW(i); 172657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 172757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleDIV(i); 172857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 172957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleMOD(i); 173057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: 173157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleSQRT(i); 173257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EXPORT: 173357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleEXPORT(i); 173457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EMIT: 173557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RESTART: 173657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleOUT(i); 173757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RDSV: 173857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleRDSV(i); 173957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_WRSV: 174057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleWRSV(i); 174157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LOAD: 17429362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).getFile() == FILE_SHADER_INPUT) { 1743d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller if (prog->getType() == Program::TYPE_COMPUTE) { 1744d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller i->getSrc(0)->reg.file = FILE_MEMORY_CONST; 1745d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller i->getSrc(0)->reg.fileIndex = 0; 1746b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain } else 1747b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain if (prog->getType() == Program::TYPE_GEOMETRY && 1748b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain i->src(0).isIndirect(0)) { 1749b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain // XXX: this assumes vec4 units 1750b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 1751b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain i->getIndirect(0, 0), bld.mkImm(4)); 1752b3f82e1a63e8a58f0e7ac297fc5e94ebe76c3339Bryan Cain i->setIndirect(0, 0, ptr); 1753217301843aea0299ab245e260b20af7ad250e9d8Ilia Mirkin i->op = OP_VFETCH; 1754d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller } else { 1755d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller i->op = OP_VFETCH; 1756d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP 1757d105b3df14283a4dd80cecc1e6cab58432368ef6Christoph Bumiller } 17580ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { 17590ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin if (i->src(0).isIndirect(1)) { 17600ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin Value *ptr; 17610ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin if (i->src(0).isIndirect(0)) 17620ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(), 17630ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin i->getIndirect(0, 1), bld.mkImm(0x1010), 17640ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin i->getIndirect(0, 0)); 17650ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin else 17660ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 17670ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin i->getIndirect(0, 1), bld.mkImm(16)); 17680ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin i->setIndirect(0, 1, NULL); 17690ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin i->setIndirect(0, 0, ptr); 17700ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin i->subOp = NV50_IR_SUBOP_LDC_IS; 17710ddc28b026688df79e54d3af1d7914ff04b12fedIlia Mirkin } 177257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 177357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 1774c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case OP_ATOM: 177575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller { 177675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; 1777c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller handleATOM(i); 177875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(i, cctl); 177975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 1780c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 17814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDB: 17824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDP: 17834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTB: 17844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTP: 17854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDB: 17864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDP: 17874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (targ->getChipset() >= NVISA_GK104_CHIPSET) 17884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller handleSurfaceOpNVE4(i->asTex()); 17894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 179057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 179157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 17926bca283ad5ebdd85e268c6757842b3c808c6b73dJohannes Obermayr } 179357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 179457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 179557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 179657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 179757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerTargetNVC0::runLegalizePass(Program *prog, CGStage stage) const 179857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 179957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_PRE_SSA) { 180057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LoweringPass pass(prog); 180157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 180257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 180357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_POST_RA) { 1804e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller NVC0LegalizePostRA pass(prog); 180557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 180657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 180757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_SSA) { 180857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LegalizeSSA pass; 180957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 181057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 181157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 181257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 181357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 181457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} // namespace nv50_ir 1815