nv50_ir_lowering_nvc0.cpp revision 974ab614d3b883bf094d4cdbfdb9792df6625f55
1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/* 2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller 3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a 5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"), 6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation 7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the 9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions: 10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in 12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software. 13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 183d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER DEALINGS IN THE SOFTWARE. 21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */ 2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir.h" 245eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_build_util.h" 2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 265eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_target_nvc0.h" 273723ff52237194995d4f9f9fb5d66fb80110889eBen Skeggs#include "codegen/nv50_ir_lowering_nvc0.h" 2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller#include <limits> 3000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir { 3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_ADD 0 3457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUBR 1 3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUB 2 3657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_MOV2 3 3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 38717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller// UL UR LL LR 3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QUADOP(q, r, s, t) \ 40717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller ((QOP_##q << 6) | (QOP_##r << 4) | \ 41717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller (QOP_##s << 2) | (QOP_##t << 0)) 4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 4357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 4457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleDIV(Instruction *i) 4557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *call; 4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int builtin; 4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[2]; 4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 5157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); 5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); 5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 5457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; 5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; 5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); 6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); 6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); 6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); 6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->fixed = 1; 6557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->absolute = call->builtin = 1; 6657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->target.builtin = builtin; 6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 7157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleRCPRSQ(Instruction *i) 7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 73b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin assert(i->dType == TYPE_F64); 74b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // There are instructions that will compute the high 32 bits of the 64-bit 75b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // float. We will just stick 0 in the bottom 32 bits. 76b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 77b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.setPosition(i, false); 78b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 79b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 1. Take the source and it up. 80b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin Value *src[2], *dst[2], *def = i->getDef(0); 81b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkSplit(src, 4, i->getSrc(0)); 82b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 83b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. 84b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin dst[0] = bld.loadImm(NULL, 0); 85b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin dst[1] = bld.getSSA(); 86b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 87b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 3. The new version of the instruction takes the high 32 bits of the 88b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // source and outputs the high 32 bits of the destination. 89b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setSrc(0, src[1]); 90b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setDef(0, dst[1]); 91b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setType(TYPE_F32); 92b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->subOp = NV50_IR_SUBOP_RCPRSQ_64H; 93b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 94b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 4. Recombine the two dst pieces back into the original destination. 95b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.setPosition(i, true); 96b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); 9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 996fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkinvoid 1006fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia MirkinNVC0LegalizeSSA::handleFTZ(Instruction *i) 1016fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin{ 1026fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // Only want to flush float inputs 103d1eea18a595a468dbc2267a8d14197a3b1a5a4b6Ilia Mirkin assert(i->sType == TYPE_F32); 1046fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1056fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // If we're already flushing denorms (and NaN's) to zero, no need for this. 1066fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (i->dnz) 1076fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin return; 1086fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1096fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // Only certain classes of operations can flush 1106fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin OpClass cls = prog->getTarget()->getOpClass(i->op); 1116fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && 1126fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin cls != OPCLASS_CONVERT) 1136fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin return; 1146fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1156fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin i->ftz = true; 1166fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin} 1176fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 11857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 11957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(Function *fn) 12057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 12157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(fn->getProgram()); 12257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 12357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 12457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 12657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(BasicBlock *bb) 12757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 12857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 12957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 13057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 131d1eea18a595a468dbc2267a8d14197a3b1a5a4b6Ilia Mirkin if (i->sType == TYPE_F32) { 1326fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (prog->getType() != Program::TYPE_COMPUTE) 1336fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin handleFTZ(i); 13457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 1356fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin } 13657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 13757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 13857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 13957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleDIV(i); 14057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 14157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 14257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: 14357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_F64) 14457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleRCPRSQ(i); 14557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 14657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 14757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 14857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 14957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 15057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 15157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 15257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 153e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph BumillerNVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) 15474be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee : rZero(NULL), 15574be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee carry(NULL), 156ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin pOne(NULL), 15774be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee needTexBar(prog->getTarget()->getChipset() >= 0xe0) 158e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller{ 159e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller} 160e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller 16157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 16200fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insnDominatedBy(const Instruction *later, 16300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const Instruction *early) const 16400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 16500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (early->bb == later->bb) 16600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return early->serial < later->serial; 16700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return later->bb->dominatedBy(early->bb); 16800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 16900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 17000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillervoid 17140c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, 172a2af42c1d2dc91f4c31e25ff9fff15a89a9b6eadIlia Mirkin Instruction *usei, const Instruction *texi) 17340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 17440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller bool add = true; 1751804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin bool dominated = insnDominatedBy(usei, texi); 1761804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // Uses before the tex have to all be included. Just because an earlier 1771804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // instruction dominates another instruction doesn't mean that there's no 1781804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // way to get from the tex to the later instruction. For example you could 1791804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // have nested loops, with the tex in the inner loop, and uses before it in 1801804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // both loops - even though the outer loop's instruction would dominate the 1811804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // inner's, we still want a texbar before the inner loop's instruction. 1821804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // 1831804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // However we can still use the eliding logic between uses dominated by the 1841804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // tex instruction, as that is unambiguously correct. 1851804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (dominated) { 1861804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin for (std::list<TexUse>::iterator it = uses.begin(); it != uses.end();) { 1871804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (it->after) { 1881804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (insnDominatedBy(usei, it->insn)) { 1891804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin add = false; 1901804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin break; 1911804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 1921804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (insnDominatedBy(it->insn, usei)) { 1931804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin it = uses.erase(it); 1941804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin continue; 1951804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 1961804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 19740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller ++it; 1981804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 19940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 20040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (add) 2011804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin uses.push_back(TexUse(usei, texi, dominated)); 20240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 20340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 2047752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// While it might be tempting to use the an algorithm that just looks at tex 2057752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// uses, not all texture results are guaranteed to be used on all paths. In 2067752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// the case where along some control flow path a texture result is never used, 2077752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// we might reuse that register for something else, creating a 2087752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// write-after-write hazard. So we have to manually look through all 2097752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// instructions looking for ones that reference the registers in question. 21040c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 2117752bbc44e78e982de3cd4c34862adc38a338234Ilia MirkinNVC0LegalizePostRA::findFirstUses( 2127752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin Instruction *texi, std::list<TexUse> &uses) 21340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 2147752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin int minGPR = texi->def(0).rep()->reg.data.id; 2157752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin int maxGPR = minGPR + texi->def(0).rep()->reg.size / 4 - 1; 21640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 2177752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin unordered_set<const BasicBlock *> visited; 2187752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin findFirstUsesBB(minGPR, maxGPR, texi->next, texi, uses, visited); 21940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 22040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 22140c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 2227752bbc44e78e982de3cd4c34862adc38a338234Ilia MirkinNVC0LegalizePostRA::findFirstUsesBB( 2237752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin int minGPR, int maxGPR, Instruction *start, 2247752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin const Instruction *texi, std::list<TexUse> &uses, 2257752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin unordered_set<const BasicBlock *> &visited) 22600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 2277752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin const BasicBlock *bb = start->bb; 2287752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2297752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // We don't process the whole bb the first time around. This is correct, 2307752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // however we might be in a loop and hit this BB again, and need to process 2317752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // the full thing. So only mark a bb as visited if we processed it from the 2327752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // beginning. 2337752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (start == bb->getEntry()) { 2347752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (visited.find(bb) != visited.end()) 2357752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin return; 2367752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin visited.insert(bb); 2377752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin } 2387752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2397752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (Instruction *insn = start; insn != bb->getExit(); insn = insn->next) { 2407752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (insn->isNop()) 2417752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin continue; 2427752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2437752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (int d = 0; insn->defExists(d); ++d) { 24471ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin const Value *def = insn->def(d).rep(); 2457752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (insn->def(d).getFile() != FILE_GPR || 24671ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin def->reg.data.id + def->reg.size / 4 - 1 < minGPR || 24771ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin def->reg.data.id > maxGPR) 248c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin continue; 2497752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin addTexUse(uses, insn, texi); 2507752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin return; 2517752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin } 252c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin 2537752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (int s = 0; insn->srcExists(s); ++s) { 25471ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin const Value *src = insn->src(s).rep(); 2557752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (insn->src(s).getFile() != FILE_GPR || 25671ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin src->reg.data.id + src->reg.size / 4 - 1 < minGPR || 25771ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin src->reg.data.id > maxGPR) 2587752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin continue; 2597752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin addTexUse(uses, insn, texi); 2607752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin return; 26100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 26200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 2637752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2647752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 2657752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin findFirstUsesBB(minGPR, maxGPR, BasicBlock::get(ei.getNode())->getEntry(), 2667752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin texi, uses, visited); 2677752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin } 26800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 26900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 27000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// Texture barriers: 27100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// This pass is a bit long and ugly and can probably be optimized. 27200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 27300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 1. obtain a list of TEXes and their outputs' first use(s) 27400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 2. calculate the barrier level of each first use (minimal number of TEXes, 27500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// over all paths, between the TEX and the use in question) 27600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 3. for each barrier, if all paths from the source TEX to that barrier 27700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// contain a barrier of lesser level, it can be culled 27800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 27900fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insertTextureBarriers(Function *fn) 28000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 28100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::list<TexUse> *uses; 28200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Instruction *> texes; 28300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstTex; 28400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstUse; 28500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> texCounts; 28600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<TexUse> useVec; 28700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ArrayList insns; 28800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 28900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller fn->orderInstructions(insns); 29000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 29100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts.resize(fn->allBBlocks.getSize(), 0); 29200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); 29300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); 29400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 29500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // tag BB CFG nodes by their id for later 29600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { 29700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); 29800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (bb) 29900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bb->cfg.tag = bb->getId(); 30000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 30100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 30200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // gather the first uses for each TEX 30300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (int i = 0; i < insns.getSize(); ++i) { 30400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); 30500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(tex->op)) { 30600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes.push_back(tex); 30700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!texCounts.at(tex->bb->getId())) 30800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex[tex->bb->getId()] = texes.size() - 1; 30900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts[tex->bb->getId()]++; 31000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 31100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 31200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insns.clear(); 31300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (texes.empty()) 31400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 31500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller uses = new std::list<TexUse>[texes.size()]; 31600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!uses) 31700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 318c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin for (size_t i = 0; i < texes.size(); ++i) { 3197752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin findFirstUses(texes[i], uses[i]); 320c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin } 32100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // determine the barrier level at each use 32300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < texes.size(); ++i) { 32400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); 32500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++u) { 32600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *tb = texes[i]->bb; 32700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *ub = u->insn->bb; 32800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (tb == ub) { 32900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 33000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = i + 1; j < texes.size() && 33100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == tb && texes[j]->serial < u->insn->serial; 33200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 33300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 33400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 33500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = fn->cfg.findLightestPathWeight(&tb->cfg, 33600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller &ub->cfg, texCounts); 33700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (u->level < 0) { 33800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller WARN("Failed to find path TEX -> TEXBAR\n"); 33900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 34000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 34100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 34200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // this counted all TEXes in the origin block, correct that 34300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; 34400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // and did not count the TEXes in the destination block, add those 34500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && 34600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == ub && texes[j]->serial < u->insn->serial; 34700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 34800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 34900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller assert(u->level >= 0); 35100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec.push_back(*u); 35200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 35400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete[] uses; 35500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 35600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // insert the barriers 35700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < useVec.size(); ++i) { 35800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = useVec[i].insn->prev; 35900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (useVec[i].level < 0) 36000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 36100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR) { 36200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev->subOp > useVec[i].level) 36300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->subOp = useVec[i].level; 36400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); 36500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 36600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); 36700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->fixed = 1; 36800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->subOp = useVec[i].level; 36900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // make use explicit to ease latency calculation 37000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); 37100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); 37200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 3755966903c28a13f13923de308c5f5116a0d5c8cbdIlia Mirkin if (fn->getProgram()->optLevel < 3) 37600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 37700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 37800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Limits> limitT, limitB, limitS; // entry, exit, single 37900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 38000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 38100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 38200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS.resize(fn->allBBlocks.getSize()); 38300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 38400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // cull unneeded barriers (should do that earlier, but for simplicity) 38540c224a573f2b763046001e622aafca90f68c693Christoph Bumiller IteratorRef bi = fn->cfg.iteratorCFG(); 38600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // first calculate min/max outstanding TEXes for each BB 38700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 38800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 38900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 39000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int min = 0; 39100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = std::numeric_limits<int>::max(); 39200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = i->next) { 39300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 39400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min++; 39500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (max < std::numeric_limits<int>::max()) 39600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 39700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 39800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 39900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min = MIN2(min, i->subOp); 40000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = MIN2(max, i->subOp); 40100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // limits when looking at an isolated block 40400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].min = min; 40500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].max = max; 40600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 40700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // propagate the min/max values 40800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { 40900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 41000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 41100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 41200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int bbId = bb->getId(); 41300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { 41400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 41500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int inId = in->getId(); 41600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); 41700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); 41800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 41900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // I just hope this is correct ... 42000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (limitS[bbId].max == std::numeric_limits<int>::max()) { 42100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // no barrier 42200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; 42300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; 42400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 42500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // block contained a barrier 42600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = MIN2(limitS[bbId].max, 42700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min + limitS[bbId].min); 42800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = MIN2(limitS[bbId].max, 42900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max + limitS[bbId].min); 43000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // finally delete unnecessary barriers 43400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 43500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 43600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 43700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = NULL; 43800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *next; 43900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = limitT[bb->getId()].max; 44000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = next) { 44100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller next = i->next; 44200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 44300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->subOp >= max) { 44400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, i); 4457086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco i = NULL; 44600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 44700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = i->subOp; 44800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { 44900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, prev); 45000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = NULL; 45100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 45200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 45300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 45400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 45500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 45600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 4577086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco if (i && !i->isNop()) 45800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = i; 45900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 46000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 46100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 46200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 46300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 46400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 46557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(Function *fn) 46657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 46700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (needTexBar) 46800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insertTextureBarriers(fn); 46900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 4703433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller rZero = new_LValue(fn, FILE_GPR); 471ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin pOne = new_LValue(fn, FILE_PREDICATE); 47299e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry = new_LValue(fn, FILE_FLAGS); 4734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 4741f895caba0accc0af3e637d6193ac0b673ce98bcIlia Mirkin rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63; 47599e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry->reg.data.id = 0; 476ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin pOne->reg.data.id = 7; 4774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 47857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 47957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 48057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 48257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::replaceZero(Instruction *i) 48357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 48457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; i->srcExists(s); ++s) { 4854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (s == 2 && i->op == OP_SUCLAMP) 4864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller continue; 48757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ImmediateValue *imm = i->getSrc(s)->asImm(); 488ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (imm) { 489ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (i->op == OP_SELP && s == 2) { 490ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin i->setSrc(s, pOne); 491ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (imm->reg.data.u64 == 0) 492ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin i->src(s).mod = i->src(s).mod ^ Modifier(NV50_IR_MOD_NOT); 493ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } else if (imm->reg.data.u64 == 0) { 494ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin i->setSrc(s, rZero); 495ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 496ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 49757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 49857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 49957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace CONT with BRA for single unconditional continue 50157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 50257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) 50357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 50457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) 50557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 50657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.incident(); 50757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 50857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 50957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 51057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 51157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *contBB = BasicBlock::get(ei.getNode()); 51257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 51357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || 51457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->getPredicate()) 51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->op = OP_BRA; 51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); // delete PRECONT 51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 51957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(ei.end() || ei.getType() != Graph::Edge::BACK); 52157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 52257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 52357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 52457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace branches to join blocks with join ops 52557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 52657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::propagateJoin(BasicBlock *bb) 52757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 52857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) 52957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 53057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 53157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 53257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *exit = in->getExit(); 53357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!exit) { 53457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); 53557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // there should always be a terminator instruction 53657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller WARN("inserted missing terminator in BB:%i\n", in->getId()); 53757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 53857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (exit->op == OP_BRA) { 53957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->op = OP_JOIN; 54057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->asFlow()->limit = 1; // must-not-propagate marker 54157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 54257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 54357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); 54457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 54557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 54657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 54757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(BasicBlock *bb) 54857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 54957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next; 55057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 55157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // remove pseudo operations and non-fixed no-ops, split 64 bit operations 55257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getFirst(); i; i = next) { 55357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 55457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_EMIT || i->op == OP_RESTART) { 55557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!i->getDef(0)->refCount()) 55657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, NULL); 5579362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).getFile() == FILE_IMMEDIATE) 5583433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller i->setSrc(0, rZero); // initial value must be 0 55907d3972b4927841bb892af16ff0389f8a241b24cBen Skeggs replaceZero(i); 56057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 56157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isNop()) { 56257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(i); 563313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin } else 564313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin if (i->op == OP_BAR && i->subOp == NV50_IR_SUBOP_BAR_SYNC && 565313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin prog->getType() != Program::TYPE_COMPUTE) { 566313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin // It seems like barriers are never required for tessellation since 567313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin // the warp size is 32, and there are always at most 32 tcs threads. 568313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin bb->remove(i); 56937b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin } else 57037b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LDC_IS) { 57137b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin int offset = i->src(0).get()->reg.data.offset; 57237b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin if (abs(offset) > 0x10000) 57337b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin i->src(0).get()->reg.fileIndex += offset >> 16; 57437b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin i->src(0).get()->reg.data.offset = (int)(short)offset; 57557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 57699e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // TODO: Move this to before register allocation for operations that 57799e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // need the $c register ! 57899e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (typeSizeof(i->dType) == 8) { 57999e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller Instruction *hi; 58099e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); 58199e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (hi) 58299e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller next = hi; 58399e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller } 58499e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller 58557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_MOV && i->op != OP_PFETCH) 58657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller replaceZero(i); 58757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 58857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 58957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!bb->getEntry()) 59057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 59157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 59257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!tryReplaceContWithBra(bb)) 59357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller propagateJoin(bb); 59457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 59557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 59657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 59757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 59857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) 59957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 60057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(prog); 60157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 60257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 60357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 60457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Function *fn) 60557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 60657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 60757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!strncmp(fn->getName(), "MAIN", 4)); 60857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: when we generate actual functions pass this value along somehow 60957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); 61057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); 6112ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller if (fn->cfgExit) { 6122ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); 6132ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.mkMovToReg(0, gpEmitAddress); 6142ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller } 61557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 61657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 61757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 61857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 61957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 62057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(BasicBlock *bb) 62157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 62257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 62357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 62457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6257a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumillerinline Value * 6267a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph BumillerNVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) 6277a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller{ 628d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 6297a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller uint32_t off = prog->driver->io.texBindBase + slot * 4; 6309cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset 6319cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset if (ptr) 6329cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(2)); 6339cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset 6347a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller return bld. 6357a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 6367a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller} 6377a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 63857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// move array source to first slot, convert to u16, add indirections 63957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 64057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTEX(TexInstruction *i) 64157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 64237a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 64337a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int arg = i->tex.target.getArgCount(); 6444da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); 64519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 64657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6476eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin /* Only normalize in the non-explicit derivatives case. For explicit 6486eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin * derivatives, this is handled in handleManualTXD. 6496eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin */ 6506eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) { 6516eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin Value *src[3], *val; 6526eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin int c; 6536eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) 6546eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c)); 6556eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin val = bld.getScratch(); 6566eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); 6576eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); 6586eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp1(OP_RCP, TYPE_F32, val, val); 6596eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) { 6606eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), 6616eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin i->getSrc(c), val)); 6626eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } 6636eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } 6646eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin 665f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Arguments to the TEX instruction are a little insane. Even though the 666f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // encoding is identical between SM20 and SM30, the arguments mean 667f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // different things between Fermi and Kepler+. A lot of arguments are 668f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // optional based on flags passed to the instruction. This summarizes the 669f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // order of things. 670f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // 671f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Fermi: 672f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // array/indirect 673f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // coords 674f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // sample 675f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // lod bias 676f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // depth compare 677f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // offsets: 678f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg) 679f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // - other: 4 bits each, single reg 680f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // 681f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Kepler+: 682f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // indirect handle 683f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // array (+ offsets for txd in upper 16 bits) 684f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // coords 685f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // sample 686f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // lod bias 687f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // depth compare 688f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // offsets (same as fermi, except txd which takes it with array) 6890532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // 6900532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // Maxwell (tex): 6910532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // array 6920532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // coords 6930532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // indirect handle 6940532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // sample 6950532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // lod bias 6960532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // depth compare 6970532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // offsets 6980532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // 6990532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // Maxwell (txd): 7000532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // indirect handle 7010532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // coords 7020532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // array + offsets 7030532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // derivatives 704f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin 70519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 7067a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 707b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin // XXX this ignores tsc, and assumes a 1:1 mapping 708b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin assert(i->tex.rIndirectSrc >= 0); 7099cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset Value *hnd = loadTexHandle(i->getIndirectR(), i->tex.r); 710b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.r = 0xff; 711b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.s = 0x1f; 712b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectR(hnd); 713b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectS(NULL); 71463b850403c90f33c295d3ad6be4ad749d4ea6274Ilia Mirkin } else if (i->tex.r == i->tex.s || i->op == OP_TXF) { 7154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller i->tex.r += prog->driver->io.texBindBase / 4; 716e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller i->tex.s = 0; // only a single cX[] value possible here 717e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else { 7187a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *hnd = bld.getScratch(); 7197a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *rHnd = loadTexHandle(NULL, i->tex.r); 7207a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *sHnd = loadTexHandle(NULL, i->tex.s); 7217a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 7227a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd); 7237a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 7247a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.r = 0; // not used for indirect tex 7257a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.s = 0; 7267a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->setIndirectR(hnd); 727e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 728e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller if (i->tex.target.isArray()) { 729e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller LValue *layer = new_LValue(func, FILE_GPR); 7304da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *src = i->getSrc(lyr); 731e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller const int sat = (i->op == OP_TXF) ? 1 : 0; 732e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 733e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; 7340532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) { 7350532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin for (int s = dim; s >= 1; --s) 7360532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(s, i->getSrc(s - 1)); 7370532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(0, layer); 7380532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin } else { 7390532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(dim, layer); 7400532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin } 741e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 742b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin // Move the indirect reference to the first place 7430532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (i->tex.rIndirectSrc >= 0 && ( 7440532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) { 745b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin Value *hnd = i->getIndirectR(); 746b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin 747b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectR(NULL); 748b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->moveSources(0, 1); 749b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setSrc(0, hnd); 750b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.rIndirectSrc = 0; 751b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.sIndirectSrc = -1; 752b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin } 753ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin // Move the indirect reference to right after the coords 754ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin else if (i->tex.rIndirectSrc >= 0 && chipset >= NVISA_GM107_CHIPSET) { 755ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin Value *hnd = i->getIndirectR(); 756ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin 757ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->setIndirectR(NULL); 758ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->moveSources(arg, 1); 759ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->setSrc(arg, hnd); 760ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->tex.rIndirectSrc = 0; 761ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->tex.sIndirectSrc = -1; 762ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin } 763e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else 764e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller // (nvc0) generate and move the tsc/tic/array source to the front 76519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 76657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 76757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7689807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin Value *ticRel = i->getIndirectR(); 7699807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin Value *tscRel = i->getIndirectS(); 7709807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin 771af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (ticRel) { 7729807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin i->setSrc(i->tex.rIndirectSrc, NULL); 773af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (i->tex.r) 774af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 775af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin ticRel, bld.mkImm(i->tex.r)); 776af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin } 777af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (tscRel) { 7789807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin i->setSrc(i->tex.sIndirectSrc, NULL); 779af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (i->tex.s) 780af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 781af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin tscRel, bld.mkImm(i->tex.s)); 782af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin } 7839807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin 7844da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; 7857d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin if (arrayIndex) { 7867d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin for (int s = dim; s >= 1; --s) 7877d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin i->setSrc(s, i->getSrc(s - 1)); 7887d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin i->setSrc(0, arrayIndex); 7897d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin } else { 7907d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin i->moveSources(0, 1); 7917d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin } 79257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 793e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller if (arrayIndex) { 794e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller int sat = (i->op == OP_TXF) ? 1 : 0; 795e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 796e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; 797e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } else { 79857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.loadImm(src, 0); 799e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } 80057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8019807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin if (ticRel) 80257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); 8039807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin if (tscRel) 80457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); 80557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 80657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, src); 80757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 80857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 80919ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // For nvc0, the sample id has to be in the second operand, as the offset 81019ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // does. Right now we don't know how to pass both in, and this case can't 81119ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // happen with OpenGL. On nve0, the sample id is part of the texture 81219ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // coordinate argument. 81319ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin assert(chipset >= NVISA_GK104_CHIPSET || 81419ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin !i->tex.useOffsets || !i->tex.target.isMS()); 81519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin 816f782d6e792db2ed7773a2d22866dbcdb1e4062eeIlia Mirkin // offset is between lod and dc 81757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->tex.useOffsets) { 81857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int n, c; 81971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 8208aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) { 8218aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.target.isShadow()) 8228aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin s--; 8238aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->srcExists(s)) // move potential predicate out of the way 8248aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->moveSources(s, 1); 8258aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.useOffsets == 4 && i->srcExists(s + 1)) 8268aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->moveSources(s + 1, 1); 8278aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 828f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin if (i->op == OP_TXG) { 829f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // Either there is 1 offset, which goes into the 2 low bytes of the 830f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // first source, or there are 4 offsets, which go into 2 sources (8 831f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // values, 1 byte each). 832a48a343c299a6486a1540cdf7d083f38aa4ace55Ilia Mirkin Value *offs[2] = {NULL, NULL}; 833f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (n = 0; n < i->tex.useOffsets; n++) { 834f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 2; ++c) { 835f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if ((n % 2) == 0 && c == 0) 836a48a343c299a6486a1540cdf7d083f38aa4ace55Ilia Mirkin bld.mkMov(offs[n / 2] = bld.getScratch(), i->offset[n][c].get()); 837f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin else 838f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, 839f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2], 840f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->offset[n][c].get(), 841f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)), 842f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2]); 843f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 844f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 845f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s, offs[0]); 846f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if (offs[1]) 847f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s + 1, offs[1]); 848f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } else { 849f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin unsigned imm = 0; 850f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin assert(i->tex.useOffsets == 1); 851f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 3; ++c) { 852f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin ImmediateValue val; 853fb1afd1ea5fd25d82c75c5c3a2aba0bcb53b6d47Ilia Mirkin if (!i->offset[0][c].getImmediate(val)) 854fb1afd1ea5fd25d82c75c5c3a2aba0bcb53b6d47Ilia Mirkin assert(!"non-immediate offset passed to non-TXG"); 855f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin imm |= (val.reg.data.u32 & 0xf) << (c * 4); 856f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 8578aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) { 8588aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // The offset goes into the upper 16 bits of the array index. So 8598aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // create it if it's not already there, and INSBF it if it already 8608aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // is. 861b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin s = (i->tex.rIndirectSrc >= 0) ? 1 : 0; 8620532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (chipset >= NVISA_GM107_CHIPSET) 8630532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin s += dim; 8648aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.target.isArray()) { 8650532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s), 8668aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin bld.loadImm(NULL, imm), bld.mkImm(0xc10), 867b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->getSrc(s)); 8688aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 869b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->moveSources(s, 1); 870b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setSrc(s, bld.loadImm(NULL, imm << 16)); 8718aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 8728aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 8738aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->setSrc(s, bld.loadImm(NULL, imm)); 8748aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 875f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } 87657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 87757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 87819ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 87971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 88071c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // If TEX requires more than 4 sources, the 2nd register tuple must be 88171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // aligned to 4, even if it consists of just a single 4-byte register. 88271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 88371c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. 88471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 88571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 88671c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (s > 4 && s < 7) { 88771c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (i->srcExists(s)) // move potential predicate out of the way 88871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->moveSources(s, 7 - s); 88971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller while (s < 7) 89071c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->setSrc(s++, bld.loadImm(NULL, 0)); 89171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 89271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 89371c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller 89457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 89557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 89657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 89757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 89857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleManualTXD(TexInstruction *i) 89957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 90057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller static const uint8_t qOps[4][2] = 90157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 90257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 90357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 90457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 90557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 90657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller }; 90757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[4][4]; 90857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *crd[3]; 90957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *tex; 91057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *zero = bld.loadImm(bld.getSSA(), 0); 91157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int l, c; 91269e8b476d07544d6ef06414a1a78ce5c04761fdbIlia Mirkin const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 913f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin 914f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // This function is invoked after handleTEX lowering, so we have to expect 915f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // the arguments in the order that the hw wants them. For Fermi, array and 916f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // indirect are both in the leading arg, while for Kepler, array and 917f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // indirect are separate (and both precede the coordinates). Maxwell is 918f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // handled in a separate function. 919f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin unsigned array; 920f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin if (targ->getChipset() < NVISA_GK104_CHIPSET) 921f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0; 922f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin else 923f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin array = i->tex.target.isArray() + (i->tex.rIndirectSrc >= 0); 92457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 92557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_TEX; // no need to clone dPdx/dPdy later 92657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 92757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 92857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller crd[c] = bld.getScratch(); 92957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 93057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADON, TYPE_NONE, NULL); 93157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) { 9326eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin Value *src[3], *val; 93357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // mov coordinates from lane l to all lanes 93457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 935afea9bae67208cdb00b27a60c9cb013bf7d6de52Ilia Mirkin bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); 93657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdx from lane l to lanes dx 93757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 93857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); 93957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdy from lane l to lanes dy 94057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 94157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); 9426eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin // normalize cube coordinates 9436eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin if (i->tex.target.isCube()) { 9446eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) 9456eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); 9466eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin val = bld.getScratch(); 9476eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); 9486eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); 9496eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp1(OP_RCP, TYPE_F32, val, val); 9506eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) 9516eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); 9526eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } else { 9536eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < dim; ++c) 9546eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = crd[c]; 9556eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } 95657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // texture 957a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez bld.insert(tex = cloneForward(func, i)); 95857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 9596eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin tex->setSrc(c + array, src[c]); 96057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // save results 96157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 96257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mov; 96357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[c][l] = bld.getSSA(); 96457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov = bld.mkMov(def[c][l], tex->getDef(c)); 96557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->fixed = 1; 96657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->lanes = 1 << l; 96757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 96857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 96957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); 97057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 97157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 97257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); 97357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) 97457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller u->setSrc(l, def[c][l]); 97557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 97657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 97757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->bb->remove(i); 97857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 97957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 98057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 98157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 98257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTXD(TexInstruction *txd) 98357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 98469e8b476d07544d6ef06414a1a78ce5c04761fdbIlia Mirkin int dim = txd->tex.target.getDim() + txd->tex.target.isCube(); 9858aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin unsigned arg = txd->tex.target.getArgCount(); 9868aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin unsigned expected_args = arg; 9878aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 9888aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin 9898aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 9908aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (!txd->tex.target.isArray() && txd->tex.useOffsets) 9918aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin expected_args++; 992b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0) 993b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin expected_args++; 9948aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 9958aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (txd->tex.useOffsets) 9968aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin expected_args++; 9977f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin if (!txd->tex.target.isArray() && ( 9987f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)) 9997f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin expected_args++; 10008aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 10018aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin 10028aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (expected_args > 4 || 10038aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin dim > 2 || 100469e8b476d07544d6ef06414a1a78ce5c04761fdbIlia Mirkin txd->tex.target.isShadow()) 10058aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin txd->op = OP_TEX; 100657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 100757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleTEX(txd); 100838a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller while (txd->srcExists(arg)) 100957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++arg; 101057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10119c930639d9f6d713ccfd16b390a41a9f584f348cChristoph Bumiller txd->tex.derivAll = true; 10128aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (txd->op == OP_TEX) 101357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleManualTXD(txd); 101457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10158aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin assert(arg == expected_args); 101657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int c = 0; c < dim; ++c) { 101738a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); 101838a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); 10199362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdx[c].set(NULL); 10209362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdy[c].set(NULL); 102157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 10222ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin 10232ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin // In this case we have fewer than 4 "real" arguments, which means that 10242ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin // handleTEX didn't apply any padding. However we have to make sure that 10252ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin // the second "group" of arguments still gets padded up to 4. 10262ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 10272ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin int s = arg + 2 * dim; 10282ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin if (s >= 4 && s < 7) { 10292ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin if (txd->srcExists(s)) // move potential predicate out of the way 10302ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin txd->moveSources(s, 7 - s); 10312ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin while (s < 7) 10322ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin txd->setSrc(s++, bld.loadImm(NULL, 0)); 10332ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin } 10342ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin } 10352ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin 103657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 103757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 103857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 103957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 104030cb66cd745fc793a2349f1d17046c50cd51c558Christoph BumillerNVC0LoweringPass::handleTXQ(TexInstruction *txq) 104130cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller{ 10425877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin const int chipset = prog->getTarget()->getChipset(); 10435877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin if (chipset >= NVISA_GK104_CHIPSET && txq->tex.rIndirectSrc < 0) 10445877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin txq->tex.r += prog->driver->io.texBindBase / 4; 10455877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin 1046346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin if (txq->tex.rIndirectSrc < 0) 104720e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin return true; 104820e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 104920e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin Value *ticRel = txq->getIndirectR(); 105020e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 105120e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->setIndirectS(NULL); 105220e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->tex.sIndirectSrc = -1; 105320e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1054346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin assert(ticRel); 1055346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin 105620e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin if (chipset < NVISA_GK104_CHIPSET) { 105720e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 105820e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1059346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin txq->setSrc(txq->tex.rIndirectSrc, NULL); 1060346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin if (txq->tex.r) 1061346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 1062346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin ticRel, bld.mkImm(txq->tex.r)); 106320e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1064346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, src, ticRel, bld.mkImm(0x17)); 106520e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 106620e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->moveSources(0, 1); 106720e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->setSrc(0, src); 106820e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin } else { 10699cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset Value *hnd = loadTexHandle(txq->getIndirectR(), txq->tex.r); 107020e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->tex.r = 0xff; 107120e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->tex.s = 0x1f; 107220e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1073b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->setIndirectR(NULL); 1074b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->moveSources(0, 1); 1075b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->setSrc(0, hnd); 1076b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->tex.rIndirectSrc = 0; 107720e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin } 107820e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 107930cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return true; 108030cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller} 108130cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller 108230cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumillerbool 1083423f64e83ab5b1ea7de475ae80300a8408522743Ilia MirkinNVC0LoweringPass::handleTXLQ(TexInstruction *i) 1084423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin{ 1085423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The outputs are inverted compared to what the TGSI instruction 1086423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * expects. Take that into account in the mask. 1087423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 1088423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin assert((i->tex.mask & ~3) == 0); 1089423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 1) 1090423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 2; 1091423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin else if (i->tex.mask == 2) 1092423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 1; 1093423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin handleTEX(i); 1094423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.setPosition(i, true); 1095423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 1096423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The returned values are not quite what we want: 1097423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (a) convert from s16/u16 to f32 1098423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (b) multiply by 1/256 1099423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 1100423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin for (int def = 0; def < 2; ++def) { 1101423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (!i->defExists(def)) 1102423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin continue; 1103423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin enum DataType type = TYPE_S16; 1104423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 2 || def > 0) 1105423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin type = TYPE_U16; 1106423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), type, i->getDef(def)); 1107423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), 1108423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); 1109423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 1110423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 3) { 1111423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin LValue *t = new_LValue(func, FILE_GPR); 1112423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(t, i->getDef(0)); 1113423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(0), i->getDef(1)); 1114423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(1), t); 1115423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 1116423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return true; 1117423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin} 1118423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 1119423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkinbool 11207c47db359e193f21be796df3a7b5d037dd42b28fSamuel PitoisetNVC0LoweringPass::handleBUFQ(Instruction *bufq) 11212c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin{ 11227c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->op = OP_MOV; 11237c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->setSrc(0, loadBufLength32(bufq->getIndirect(0, 1), 11247c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->getSrc(0)->reg.fileIndex * 16)); 11257c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->setIndirect(0, 0, NULL); 11267c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->setIndirect(0, 1, NULL); 11272c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin return true; 11282c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin} 11292c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin 113014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoisetvoid 1131543fb95473e404b7212eea3f00a23dd0d23758d5Samuel PitoisetNVC0LoweringPass::handleSharedATOMNVE4(Instruction *atom) 1132543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset{ 1133543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); 1134543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1135543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *currBB = atom->bb; 1136543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *tryLockBB = atom->bb->splitBefore(atom, false); 1137543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *joinBB = atom->bb->splitAfter(atom); 1138543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *setAndUnlockBB = new BasicBlock(func); 1139543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *failLockBB = new BasicBlock(func); 1140543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1141543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(currBB, true); 1142543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset assert(!currBB->joinAt); 1143543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); 1144543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1145543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset CmpInstruction *pred = 1146543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 1147543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset TYPE_U32, bld.mkImm(0), bld.mkImm(1)); 1148543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1149543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockBB, CC_ALWAYS, NULL); 1150543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset currBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::TREE); 1151543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1152543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(tryLockBB, true); 1153543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1154543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset Instruction *ld = 1155dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkLoad(TYPE_U32, atom->getDef(0), atom->getSrc(0)->asSym(), 1156dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0)); 1157543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); 1158543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; 1159543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1160543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, setAndUnlockBB, CC_P, ld->getDef(1)); 1161543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL); 1162543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset tryLockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::CROSS); 1163543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset tryLockBB->cfg.attach(&setAndUnlockBB->cfg, Graph::Edge::TREE); 1164543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1165543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset tryLockBB->cfg.detach(&joinBB->cfg); 1166543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.remove(atom); 1167543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1168543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(setAndUnlockBB, true); 1169543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset Value *stVal; 1170543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 1171543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset // Read the old value, and write the new one. 1172543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset stVal = atom->getSrc(1); 1173543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { 1174543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset CmpInstruction *set = 1175543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(), 1176543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset TYPE_U32, ld->getDef(0), atom->getSrc(1)); 1177543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1178543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, (stVal = bld.getSSA()), 1179543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset TYPE_U32, atom->getSrc(2), ld->getDef(0), set->getDef(0)); 1180543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } else { 1181543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset operation op; 1182543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1183543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset switch (atom->subOp) { 1184543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_ADD: 1185543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_ADD; 1186543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1187543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_AND: 1188543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_AND; 1189543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1190543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_OR: 1191543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_OR; 1192543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1193543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_XOR: 1194543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_XOR; 1195543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1196543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MIN: 1197543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_MIN; 1198543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1199543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MAX: 1200543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_MAX; 1201543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1202543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset default: 1203543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset assert(0); 1204543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset return; 1205543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } 1206543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1207543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset stVal = bld.mkOp2v(op, atom->dType, bld.getSSA(), ld->getDef(0), 1208543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset atom->getSrc(1)); 1209543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } 1210543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1211543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset Instruction *st = 1212dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkStore(OP_STORE, TYPE_U32, atom->getSrc(0)->asSym(), 1213dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0), stVal); 1214543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset st->setDef(0, pred->getDef(0)); 1215543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; 1216543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1217543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL); 1218543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset setAndUnlockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::TREE); 1219543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1220543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset // Lock until the store has not been performed. 1221543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(failLockBB, true); 1222543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockBB, CC_NOT_P, pred->getDef(0)); 1223543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); 1224543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset failLockBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::BACK); 1225543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset failLockBB->cfg.attach(&joinBB->cfg, Graph::Edge::TREE); 1226543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1227543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(joinBB, false); 1228543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; 1229543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset} 1230543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1231543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoisetvoid 123214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel PitoisetNVC0LoweringPass::handleSharedATOM(Instruction *atom) 123314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset{ 123414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); 123514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 123614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset BasicBlock *currBB = atom->bb; 123714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false); 123814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset BasicBlock *joinBB = atom->bb->splitAfter(atom); 123914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 124014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.setPosition(currBB, true); 124114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset assert(!currBB->joinAt); 124214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); 124314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 124414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL); 124514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE); 124614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 124714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.setPosition(tryLockAndSetBB, true); 124814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 124914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Instruction *ld = 1250dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkLoad(TYPE_U32, atom->getDef(0), atom->getSrc(0)->asSym(), 1251dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0)); 125214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); 125314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; 125414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 125514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Value *stVal; 125614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 125714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset // Read the old value, and write the new one. 125814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset stVal = atom->getSrc(1); 125914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { 126014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset CmpInstruction *set = 126114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 126214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset TYPE_U32, ld->getDef(0), atom->getSrc(1)); 126314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset set->setPredicate(CC_P, ld->getDef(1)); 126414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 12656526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset Instruction *selp = 12666526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0), 12676526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset atom->getSrc(2), set->getDef(0)); 12686526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset selp->src(2).mod = Modifier(NV50_IR_MOD_NOT); 126914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset selp->setPredicate(CC_P, ld->getDef(1)); 127014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 127114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset stVal = selp->getDef(0); 127214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } else { 127314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset operation op; 127414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 127514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset switch (atom->subOp) { 127614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_ADD: 127714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_ADD; 127814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 127914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_AND: 128014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_AND; 128114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 128214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_OR: 128314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_OR; 128414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 128514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_XOR: 128614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_XOR; 128714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 128814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MIN: 128914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_MIN; 129014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 129114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MAX: 129214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_MAX; 129314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 129414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset default: 129514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset assert(0); 1296e05492fd7f0e1a9454482a9174f5870b8cb5a41eSamuel Pitoiset return; 129714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } 129814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 129914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Instruction *i = 130014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0), 130114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset atom->getSrc(1)); 130214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset i->setPredicate(CC_P, ld->getDef(1)); 130314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 130414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset stVal = i->getDef(0); 130514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } 130614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 130714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Instruction *st = 1308dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkStore(OP_STORE, TYPE_U32, atom->getSrc(0)->asSym(), 1309dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0), stVal); 131014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset st->setPredicate(CC_P, ld->getDef(1)); 131114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; 131214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 131314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset // Loop until the lock is acquired. 131414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1)); 131514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK); 131614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS); 131714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); 131814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 131914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.remove(atom); 132014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 132114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.setPosition(joinBB, false); 132214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; 132314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset} 132414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 13252c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkinbool 1326c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph BumillerNVC0LoweringPass::handleATOM(Instruction *atom) 1327c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller{ 1328c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller SVSemantic sv; 13297b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base; 1330c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1331c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller switch (atom->src(0).getFile()) { 1332c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_LOCAL: 1333c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller sv = SV_LBASE; 1334c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 1335c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_SHARED: 1336839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset // For Fermi/Kepler, we have to use ld lock/st unlock to perform atomic 1337839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset // operations on shared memory. For Maxwell, ATOMS is enough. 1338839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset if (targ->getChipset() < NVISA_GK104_CHIPSET) 1339543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset handleSharedATOM(atom); 1340839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset else if (targ->getChipset() < NVISA_GM107_CHIPSET) 1341839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset handleSharedATOMNVE4(atom); 134214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset return true; 1343c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller default: 134461d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); 1345b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); 1346c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin assert(base->reg.size == 8); 1347c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin if (ptr) 1348c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); 1349c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin assert(base->reg.size == 8); 1350c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin atom->setIndirect(0, 0, base); 135161d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 1352b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 1353b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin // Harden against out-of-bounds accesses 1354b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *offset = bld.loadImm(NULL, atom->getSrc(0)->reg.data.offset + typeSizeof(atom->sType)); 1355b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *length = loadBufLength32(ind, atom->getSrc(0)->reg.fileIndex * 16); 1356b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *pred = new_LValue(func, FILE_PREDICATE); 1357b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin if (ptr) 1358b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, ptr); 1359b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); 1360b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin atom->setPredicate(CC_NOT_P, pred); 1361b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin if (atom->defExists(0)) { 1362b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *zero, *dst = atom->getDef(0); 1363b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin atom->setDef(0, bld.getSSA()); 1364b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 1365b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.setPosition(atom, true); 1366b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkMov((zero = bld.getSSA()), bld.mkImm(0)) 1367b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin ->setPredicate(CC_P, pred); 1368b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkOp2(OP_UNION, TYPE_U32, dst, atom->getDef(0), zero); 1369b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin } 1370b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 1371c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 1372c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller } 1373c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin base = 1374c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); 1375c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1376c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); 1377c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 1378c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller if (ptr) 1379c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); 13807b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin atom->setIndirect(0, 1, NULL); 1381c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setIndirect(0, 0, base); 1382c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1383c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 1384c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller} 1385c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 138675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumillerbool 138775f1f852b00ad0d766684d01695322b93a2acd55Christoph BumillerNVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) 138875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller{ 1389839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset if (targ->getChipset() < NVISA_GM107_CHIPSET) { 1390839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { 1391839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). 1392839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset return false; 1393839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset } 139414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } 139514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 139675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && 139775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) 139875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return false; 139975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, true); 140075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 140175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (needCctl) { 140275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); 140375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); 140475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->fixed = 1; 140575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->subOp = NV50_IR_SUBOP_CCTL_IV; 140675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->isPredicated()) 140775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setPredicate(cas->cc, cas->getPredicate()); 140875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 140975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 1410df043f076464d817a9d88c4c43757e65b6eae3f9Ilia Mirkin if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { 141175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // CAS is crazy. It's 2nd source is a double reg, and the 3rd source 141275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // should be set to the high part of the double reg or bad things will 141375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // happen elsewhere in the universe. 141475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // Also, it sometimes returns the new value instead of the old one 141575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // under mysterious circumstances. 141675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Value *dreg = bld.getSSA(8); 141775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, false); 141875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); 141975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->setSrc(1, dreg); 1420df043f076464d817a9d88c4c43757e65b6eae3f9Ilia Mirkin cas->setSrc(2, dreg); 142175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 142275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 142375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return true; 142475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller} 142575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 14264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 1427b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off, uint16_t base) 14284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 1429d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 1430b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset off += base; 1431b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset 14324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 14334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 14344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 14354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 14364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 1437b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off, uint16_t base) 1438c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin{ 1439d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 1440b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset off += base; 1441c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 14427b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin if (ptr) 14437b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); 14447b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin 1445c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin return bld. 1446c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr); 1447c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin} 1448c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 1449c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkininline Value * 1450b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off, uint16_t base) 1451c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin{ 1452d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 1453b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset off += base; 1454c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 14557b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin if (ptr) 14567b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); 14577b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin 1458c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin return bld. 1459c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr); 1460c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin} 1461c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 1462c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkininline Value * 1463b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadBufInfo64(Value *ptr, uint32_t off) 1464b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset{ 1465b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset return loadResInfo64(ptr, off, prog->driver->io.bufInfoBase); 1466b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset} 1467b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset 1468b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoisetinline Value * 1469b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadBufLength32(Value *ptr, uint32_t off) 1470b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset{ 1471b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset return loadResLength32(ptr, off, prog->driver->io.bufInfoBase); 1472b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset} 1473b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset 1474b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoisetinline Value * 14754f58b78c309db372d408912ca87e88d319b895daSamuel PitoisetNVC0LoweringPass::loadUboInfo64(Value *ptr, uint32_t off) 14764f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset{ 14774f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset return loadResInfo64(ptr, off, prog->driver->io.uboInfoBase); 14784f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset} 14794f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset 14804f58b78c309db372d408912ca87e88d319b895daSamuel Pitoisetinline Value * 14814f58b78c309db372d408912ca87e88d319b895daSamuel PitoisetNVC0LoweringPass::loadUboLength32(Value *ptr, uint32_t off) 14824f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset{ 14834f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset return loadResLength32(ptr, off, prog->driver->io.uboInfoBase); 14844f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset} 14854f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset 14864f58b78c309db372d408912ca87e88d319b895daSamuel Pitoisetinline Value * 14874506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) 14884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 14894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint8_t b = prog->driver->io.msInfoCBSlot; 14904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off += prog->driver->io.msInfoBase; 14914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 14924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 14934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 14944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 14954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller/* On nvc0, surface info is obtained via the surface binding points passed 14964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * to the SULD/SUST instructions. 14974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * On nve4, surface info is stored in c[] and is used by various special 1498c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset * instructions, e.g. for clamping coordinates or generating an address. 14994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * They couldn't just have added an equivalent to TIC now, couldn't they ? 15004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller */ 1501c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_ADDR 0x00 1502c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_FMT 0x04 1503c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM_X 0x08 1504c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_PITCH 0x0c 1505c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM_Y 0x10 1506c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_ARRAY 0x14 1507c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM_Z 0x18 1508c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_UNK1C 0x1c 1509c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_WIDTH 0x20 1510c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_HEIGHT 0x24 1511c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DEPTH 0x28 1512c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_TARGET 0x2c 1513c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_BSIZE 0x30 1514c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_RAW_X 0x34 1515c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_MS_X 0x38 1516c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_MS_Y 0x3c 1517c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset 1518c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO__STRIDE 0x40 1519c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset 1520c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8) 1521c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4) 1522c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) 15234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1524ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoisetinline Value * 1525ed3a284382b194292460a99c0dfe7fd3feccca40Samuel PitoisetNVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off) 1526ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset{ 1527ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset uint32_t base = slot * NVC0_SU_INFO__STRIDE; 1528ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset 1529ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset if (ptr) { 1530ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); 1531ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); 1532ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6)); 1533ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset base = 0; 1534ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset } 1535ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset off += base; 1536ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset 1537ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset return loadResInfo32(ptr, off, prog->driver->io.suInfoBase); 1538ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset} 1539ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset 15404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerstatic inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) 15414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 15424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (su->tex.target.getEnum()) { 15434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1); 15444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D_ARRAY: return (c == 1) ? 15474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_PL(0, 2) : 15484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 15504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 15514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 15574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(0); 15584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return 0; 15594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 15604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 15614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 15627c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoisetbool 1563d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel PitoisetNVC0LoweringPass::handleSUQ(TexInstruction *suq) 15647c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset{ 1565b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin int mask = suq->tex.mask; 1566d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset int dim = suq->tex.target.getDim(); 1567d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset int arg = dim + (suq->tex.target.isArray() || suq->tex.target.isCube()); 1568d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset Value *ind = suq->getIndirectR(); 1569ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset int slot = suq->tex.r; 1570b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin int c, d; 1571d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1572b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin for (c = 0, d = 0; c < 3; ++c, mask >>= 1) { 1573b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (c >= arg || !(mask & 1)) 1574b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin continue; 1575d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1576b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin int offset; 1577d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1578b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (c == 1 && suq->tex.target == TEX_TARGET_1D_ARRAY) { 1579c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset offset = NVC0_SU_INFO_SIZE(2); 1580b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin } else { 1581c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset offset = NVC0_SU_INFO_SIZE(c); 1582d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset } 1583ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset)); 1584b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (c == 2 && suq->tex.target.isCube()) 1585b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1), 1586b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.loadImm(NULL, 6)); 1587d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset } 1588d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1589b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (mask & 1) { 1590b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (suq->tex.target.isMS()) { 1591ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); 1592ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); 1593b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y); 1594b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms); 1595b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin } else { 1596b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.mkMov(suq->getDef(d++), bld.loadImm(NULL, 1)); 1597b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin } 1598d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset } 1599d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1600d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset bld.remove(suq); 16017c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset return true; 16027c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset} 16037c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset 16044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 16054506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) 16064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 16074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int arg = tex->tex.target.getArgCount(); 1608ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset int slot = tex->tex.r; 16094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS) 16114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D; 16124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 16134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY) 16144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D_ARRAY; 16154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 16164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return; 16174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *x = tex->getSrc(0); 16194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = tex->getSrc(1); 16204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *s = tex->getSrc(arg - 1); 16214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); 1623b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin Value *ind = tex->getIndirectR(); 16244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1625ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); 1626ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); 16274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); 16294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); 16304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7)); 16324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3)); 16334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dx = loadMsInfo32(ts, 0x0); 16354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dy = loadMsInfo32(ts, 0x4); 16364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); 16384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); 16394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(0, tx); 16414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(1, ty); 16424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->moveSources(arg, -1); 16434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 16444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// Sets 64-bit "generic address", predicate and format sources for SULD/SUST. 16464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// They're computed from the coordinates using the surface info in c[] space. 16474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 16484506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) 16494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 16504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Instruction *insn; 16514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP; 16524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool raw = 16534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; 1654ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset const int slot = su->tex.r; 16554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int dim = su->tex.target.getDim(); 1656e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 16574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller int c; 16584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *zero = bld.mkImm(0); 16594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *p1 = NULL; 16604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *v; 16614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *src[3]; 16624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *bf, *eau, *off; 16634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *addr, *pred; 1664ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ind = su->getIndirectR(); 16654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off = bld.getScratch(4); 16674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = bld.getScratch(4); 16684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller addr = bld.getSSA(8); 16694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 16704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.setPosition(su, false); 16724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller adjustCoordinatesMS(su); 16744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate clamped coordinates 16764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (c = 0; c < arg; ++c) { 16778b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset int dimc = c; 16788b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset 16798b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset if (c == 1 && su->tex.target == TEX_TARGET_1D_ARRAY) { 16808b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset // The array index is stored in the Z component for 1D arrays. 16818b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset dimc = 2; 16828b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset } 16838b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset 16844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = bld.getScratch(); 16854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (c == 0 && raw) 1686ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X); 16874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 1688ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc)); 16894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) 16908b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset ->subOp = getSuClampSubOp(su, dimc); 16914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 16924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (; c < 3; ++c) 16934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = zero; 16944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set predicate output 16964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 16974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[0]->getInsn()->setFlagsDef(1, pred); 16984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 1699e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset if (su->tex.target.isArray() || su->tex.target.isCube()) { 17004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller p1 = bld.getSSA(1, FILE_PREDICATE); 17014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[dim]->getInsn()->setFlagsDef(1, p1); 17024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate pixel offset 17054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) { 17064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target != TEX_TARGET_BUFFER) 17074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); 17084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 17094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 3) { 1710ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); 17114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) 17124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 17134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1714ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); 17154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) 17164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l 17174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 17184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 2); 1719ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); 17204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) 1721e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ? 17224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 17234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate effective address part 1 17264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 17274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (raw) { 17284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = src[0]; 17294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 1730ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); 17314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) 17324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); 17334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 17354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = src[1]; 17364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *z = src[2]; 17374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint16_t subOp = 0; 17384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (dim) { 17404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 1: 17414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller y = zero; 17424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = zero; 17434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 17444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 2: 17454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = off; 1746e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset if (!su->tex.target.isArray() && !su->tex.target.isCube()) { 1747ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); 17484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 17494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 17514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 17524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 17534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 3); 17544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 17554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z); 17574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->subOp = subOp; 17584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->setFlagsDef(1, pred); 17594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // part 2 1762ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR); 17634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 17654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = v; 17664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 17674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); 17684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // add array layer offset 1770e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset if (su->tex.target.isArray() || su->tex.target.isCube()) { 1771ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); 17724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) 17734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) 17744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 17754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 17764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau) 17774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32 17784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // combine predicates 17794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(p1); 17804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1); 17814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom) { 17844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *lo = bf; 17854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 17864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller lo = zero; 17874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(off, bf); 17884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // bf == g[] address & 0xff 17904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // eau == g[] address >> 8 17914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau); 17924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau); 17934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 17944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) { 17954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Convert from u32 to u8 address format, which is what the library code 17964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // doing SULDP currently uses. 17974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: can SUEAU do this ? 17984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: does it matter that we don't mask high bytes in bf ? 17994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Grrr. 18004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8)); 18014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off); 18024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau); 18054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom && su->tex.target == TEX_TARGET_BUFFER) 18074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off); 18084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // let's just set it 0 for raw access and hope it works 18104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = raw ? 1811ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); 18124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // get rid of old coordinate sources, make space for fmt info and predicate 18144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->moveSources(arg, 3 - arg); 18154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set 64 bit address and 32-bit format sources 18164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(0, addr); 18174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(1, v); 18184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(2, pred); 1819fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 1820fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset // prevent read fault when the image is not actually bound 1821fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset CmpInstruction *pred1 = 1822fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 1823fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset TYPE_U32, bld.mkImm(0), 1824ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); 18256fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset 1826e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset if (su->op != OP_SUSTP && su->tex.format) { 18276fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset const TexInstruction::ImgFormatDesc *format = su->tex.format; 18286fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset int blockwidth = format->bits[0] + format->bits[1] + 18296fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset format->bits[2] + format->bits[3]; 18306fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset 1831e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset // make sure that the format doesn't mismatch 1832e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset assert(format->components != 0); 1833e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0), 1834e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset TYPE_U32, bld.loadImm(NULL, blockwidth / 8), 1835ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), 1836e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset pred1->getDef(0)); 18376fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset } 1838fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset su->setPredicate(CC_NOT_P, pred1->getDef(0)); 18396fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset 18406fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // TODO: initialize def values to 0 when the surface operation is not 18416fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // performed (not needed for stores). Also, fix the "address bounds test" 18426fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // subtests from arb_shader_image_load_store-invalid for buffers, because it 18436fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // seems like that the predicate is not correctly set by suclamp. 18444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 18454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18463ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkinstatic DataType 18473ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkingetSrcType(const TexInstruction::ImgFormatDesc *t, int c) 18483ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin{ 18493ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin switch (t->type) { 18503ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case FLOAT: return t->bits[c] == 16 ? TYPE_F16 : TYPE_F32; 18513ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UNORM: return t->bits[c] == 8 ? TYPE_U8 : TYPE_U16; 18523ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SNORM: return t->bits[c] == 8 ? TYPE_S8 : TYPE_S16; 18533ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UINT: 18543ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return (t->bits[c] == 8 ? TYPE_U8 : 18553ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin (t->bits[c] == 16 ? TYPE_U16 : TYPE_U32)); 18563ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SINT: 18573ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return (t->bits[c] == 8 ? TYPE_S8 : 18583ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin (t->bits[c] == 16 ? TYPE_S16 : TYPE_S32)); 18593ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 18603ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_NONE; 18613ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin} 18623ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 18633ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkinstatic DataType 18643ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkingetDestType(const ImgType type) { 18653ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin switch (type) { 18663ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case FLOAT: 18673ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UNORM: 18683ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SNORM: 18693ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_F32; 18703ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UINT: 18713ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_U32; 18723ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SINT: 18733ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_S32; 18743ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin default: 18753ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin assert(!"Impossible type"); 18763ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_NONE; 18773ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 18783ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin} 18793ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 18804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 18813ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkinNVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) 18824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 18833ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin const TexInstruction::ImgFormatDesc *format = su->tex.format; 18843ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin int width = format->bits[0] + format->bits[1] + 18853ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin format->bits[2] + format->bits[3]; 18863ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin Value *untypedDst[4] = {}; 18873ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin Value *typedDst[4] = {}; 18883ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 18893ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // We must convert this to a generic load. 18903ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->op = OP_SULDB; 18913ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 18923ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->dType = typeOfSize(width / 8); 18933ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->sType = TYPE_U8; 18944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18953ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < width / 32; i++) 18963ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin untypedDst[i] = bld.getSSA(); 18973ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (width < 32) 18983ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin untypedDst[0] = bld.getSSA(); 18993ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19003ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < 4; i++) { 19013ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin typedDst[i] = su->getDef(i); 19023ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19033ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19043ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Set the untyped dsts as the su's destinations 19053ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < 4; i++) 19063ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->setDef(i, untypedDst[i]); 19073ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19083ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.setPosition(su, true); 19093ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19103ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Unpack each component into the typed dsts 19113ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin int bits = 0; 19123ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < 4; bits += format->bits[i], i++) { 19133ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (!typedDst[i]) 19143ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin continue; 19153ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (i >= format->components) { 19163ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->type == FLOAT || 19173ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin format->type == UNORM || 19183ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin format->type == SNORM) 19193ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.loadImm(typedDst[i], i == 3 ? 1.0f : 0.0f); 19203ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else 19213ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.loadImm(typedDst[i], i == 3 ? 1 : 0); 19223ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin continue; 19233ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19243ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19253ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Get just that component's data into the relevant place 19263ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->bits[i] == 32) 19273ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkMov(typedDst[i], untypedDst[i]); 19283ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->bits[i] == 16) 19293ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, getDestType(format->type), typedDst[i], 19303ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin getSrcType(format, i), untypedDst[i / 2]) 19313ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin ->subOp = (i & 1) << (format->type == FLOAT ? 0 : 1); 19323ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->bits[i] == 8) 19333ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, getDestType(format->type), typedDst[i], 19343ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin getSrcType(format, i), untypedDst[0])->subOp = i; 19353ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else { 19363ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_EXTBF, TYPE_U32, typedDst[i], untypedDst[bits / 32], 19373ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkImm((bits % 32) | (format->bits[i] << 8))); 19383ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->type == UNORM || format->type == SNORM) 19393ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], getSrcType(format, i), typedDst[i]); 19403ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19413ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19423ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Normalize / convert as necessary 19433ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->type == UNORM) 19443ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, typedDst[i], typedDst[i], bld.loadImm(NULL, 1.0f / ((1 << format->bits[i]) - 1))); 19453ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->type == SNORM) 19463ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, typedDst[i], typedDst[i], bld.loadImm(NULL, 1.0f / ((1 << (format->bits[i] - 1)) - 1))); 19473ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->type == FLOAT && format->bits[i] < 16) { 19483ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, typedDst[i], typedDst[i], bld.loadImm(NULL, 15 - format->bits[i])); 19493ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]); 19504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 19514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 19528e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin 19538e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin if (format->bgra) { 19548e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin std::swap(typedDst[0], typedDst[2]); 19558e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin } 19563ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin} 19573ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19583ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkinvoid 19593ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkinNVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) 19603ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin{ 19613ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin processSurfaceCoordsNVE4(su); 19623ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19633ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (su->op == OP_SULDP) 19643ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin convertSurfaceFormat(su); 19654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 19664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SUREDB || su->op == OP_SUREDP) { 19674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *pred = su->getSrc(2); 19684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller CondCode cc = CC_NOT_P; 19694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->getPredicate()) { 19704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 19714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller cc = su->cc; 19724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (cc == CC_NOT_P) { 19734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); 19744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 19754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); 19764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT); 19774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 19784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 1979fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset Instruction *red = bld.mkOp(OP_ATOM, su->dType, bld.getSSA()); 19804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->subOp = su->subOp; 1981974ab614d3b883bf094d4cdbfdb9792df6625f55Samuel Pitoiset red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0)); 19824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(1, su->getSrc(3)); 19834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->subOp == NV50_IR_SUBOP_ATOM_CAS) 19844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(2, su->getSrc(4)); 19854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setIndirect(0, 0, su->getSrc(0)); 1986fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 1987fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset // make sure to initialize dst value when the atomic operation is not 1988fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset // performed 1989fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); 1990fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 1991fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset assert(cc == CC_NOT_P); 19924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setPredicate(cc, pred); 1993fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset mov->setPredicate(CC_P, pred); 1994fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 1995fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset bld.mkOp2(OP_UNION, TYPE_U32, su->getDef(0), 1996fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset red->getDef(0), mov->getDef(0)); 1997fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 19984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller delete_Instruction(bld.getProgram(), su); 199975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(red, true); 20004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 20013ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 20023ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (su->op == OP_SUSTB || su->op == OP_SUSTP) 20033ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; 20044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 20054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 2006362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoisetvoid 2007362e17a7123e55d22a188943886a7ead00def6b6Samuel PitoisetNVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) 2008362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset{ 2009ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset const int slot = su->tex.r; 2010362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int dim = su->tex.target.getDim(); 2011362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 2012362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset int c; 2013362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *zero = bld.mkImm(0); 2014362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *src[3]; 2015362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *v; 2016ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ind = su->getIndirectR(); 2017362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2018b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.setPosition(su, false); 2019b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin 2020b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin adjustCoordinatesMS(su); 2021b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin 202219d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset if (ind) { 202319d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset Value *ptr; 202419d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r)); 202519d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); 202619d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset su->setIndirectR(ptr); 202719d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset } 202819d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset 2029362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // get surface coordinates 2030362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset for (c = 0; c < arg; ++c) 2031362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset src[c] = su->getSrc(c); 2032362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset for (; c < 3; ++c) 2033362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset src[c] = zero; 2034362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2035362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // calculate pixel offset 2036362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op == OP_SULDP || su->op == OP_SUREDP) { 2037ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE); 2038362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v)); 2039362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2040362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2041362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // add array layer offset 2042362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->tex.target.isArray() || su->tex.target.isCube()) { 2043ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); 2044362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset assert(dim > 1); 2045362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)); 2046362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2047362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2048362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // prevent read fault when the image is not actually bound 2049362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset CmpInstruction *pred = 2050362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 2051362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset TYPE_U32, bld.mkImm(0), 2052ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); 2053362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op != OP_SUSTP && su->tex.format) { 2054362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const TexInstruction::ImgFormatDesc *format = su->tex.format; 2055362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset int blockwidth = format->bits[0] + format->bits[1] + 2056362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset format->bits[2] + format->bits[3]; 2057362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2058362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset assert(format->components != 0); 2059362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // make sure that the format doesn't mismatch when it's not FMT_NONE 2060362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), 2061362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset TYPE_U32, bld.loadImm(NULL, blockwidth / 8), 2062ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), 2063362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset pred->getDef(0)); 2064362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2065362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setPredicate(CC_NOT_P, pred->getDef(0)); 2066362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset} 2067362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2068362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoisetvoid 2069362e17a7123e55d22a188943886a7ead00def6b6Samuel PitoisetNVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) 2070362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset{ 2071362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->tex.target == TEX_TARGET_1D_ARRAY) { 2072362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset /* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY 2073362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset * will simplify the lowering pass and the texture constraints. */ 2074362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->moveSources(1, 1); 2075362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setSrc(1, bld.loadImm(NULL, 0)); 2076362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->tex.target = TEX_TARGET_2D_ARRAY; 2077362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2078362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2079362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset processSurfaceCoordsNVC0(su); 2080362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2081362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op == OP_SULDP) 2082362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset convertSurfaceFormat(su); 2083362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2084362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op == OP_SUREDB || su->op == OP_SUREDP) { 2085362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int dim = su->tex.target.getDim(); 2086362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 2087362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset LValue *addr = bld.getSSA(8); 2088362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *def = su->getDef(0); 2089362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2090362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->op = OP_SULEA; 2091362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2092362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // Set the destination to the address 2093362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->dType = TYPE_U64; 2094362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setDef(0, addr); 2095362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setDef(1, su->getPredicate()); 2096362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2097362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.setPosition(su, true); 2098362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2099362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // Perform the atomic op 2100362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA()); 2101362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->subOp = su->subOp; 2102362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0)); 2103362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setSrc(1, su->getSrc(arg)); 2104362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (red->subOp == NV50_IR_SUBOP_ATOM_CAS) 2105362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setSrc(2, su->getSrc(arg + 1)); 2106362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setIndirect(0, 0, addr); 2107362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2108362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // make sure to initialize dst value when the atomic operation is not 2109362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // performed 2110362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); 2111362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2112362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset assert(su->cc == CC_NOT_P); 2113362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setPredicate(su->cc, su->getPredicate()); 2114362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset mov->setPredicate(CC_P, su->getPredicate()); 2115362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2116362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0)); 2117362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2118362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset handleCasExch(red, false); 2119362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2120362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset} 2121362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2122c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoisetvoid 2123c68989b2c8de2ad961774cb7402aa206517d88c5Samuel PitoisetNVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) 2124c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset{ 2125c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const int slot = su->tex.r; 2126c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const int dim = su->tex.target.getDim(); 2127c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 2128c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset Value *ind = su->getIndirectR(); 2129c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset int pos = 0; 2130c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2131c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.setPosition(su, false); 2132c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2133c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // add texture handle 2134c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset switch (su->op) { 2135c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset case OP_SUSTP: 2136c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset pos = 4; 2137c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset break; 2138c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset case OP_SUREDP: 2139c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset pos = (su->subOp == NV50_IR_SUBOP_ATOM_CAS) ? 2 : 1; 2140c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset break; 2141c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset default: 2142c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset assert(pos == 0); 2143c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset break; 2144c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset } 2145c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->setSrc(arg + pos, loadTexHandle(ind, slot + 32)); 2146c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2147c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // prevent read fault when the image is not actually bound 2148c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset CmpInstruction *pred = 2149c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 2150c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset TYPE_U32, bld.mkImm(0), 2151c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); 2152c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (su->op != OP_SUSTP && su->tex.format) { 2153c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const TexInstruction::ImgFormatDesc *format = su->tex.format; 2154c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset int blockwidth = format->bits[0] + format->bits[1] + 2155c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset format->bits[2] + format->bits[3]; 2156c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2157c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset assert(format->components != 0); 2158c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // make sure that the format doesn't mismatch when it's not FMT_NONE 2159c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), 2160c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset TYPE_U32, bld.loadImm(NULL, blockwidth / 8), 2161c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), 2162c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset pred->getDef(0)); 2163c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset } 2164c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->setPredicate(CC_NOT_P, pred->getDef(0)); 2165c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset} 2166c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2167c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoisetvoid 2168c68989b2c8de2ad961774cb7402aa206517d88c5Samuel PitoisetNVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) 2169c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset{ 2170c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset processSurfaceCoordsGM107(su); 2171c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2172c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (su->op == OP_SULDP) 2173c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset convertSurfaceFormat(su); 2174c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2175c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (su->op == OP_SUREDP) { 2176c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset Value *def = su->getDef(0); 2177c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2178c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->op = OP_SUREDB; 2179c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->setDef(0, bld.getSSA()); 2180c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2181c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.setPosition(su, true); 2182c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2183c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // make sure to initialize dst value when the atomic operation is not 2184c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // performed 2185c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); 2186c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2187c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset assert(su->cc == CC_NOT_P); 2188c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset mov->setPredicate(CC_P, su->getPredicate()); 2189c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2190c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0)); 2191c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset } 2192c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset} 2193c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2194c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumillerbool 219557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleWRSV(Instruction *i) 219657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 219757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *st; 219857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym; 219957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr; 220057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 220157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // must replace, $sreg are not writeable 220257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); 220357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (addr >= 0x400) 220457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 220557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); 220657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 220757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), 220857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(1)); 220957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->perPatch = i->perPatch; 221057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 221157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 221257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 221357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 221457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 221557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 22169d57c84994abe45133382cf72ae617570bfe89daSamuel PitoisetNVC0LoweringPass::handleLDST(Instruction *i) 22179d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset{ 22189d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->src(0).getFile() == FILE_SHADER_INPUT) { 22199d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (prog->getType() == Program::TYPE_COMPUTE) { 22209d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getSrc(0)->reg.file = FILE_MEMORY_CONST; 22219d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getSrc(0)->reg.fileIndex = 0; 22229d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else 22239d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (prog->getType() == Program::TYPE_GEOMETRY && 22249d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->src(0).isIndirect(0)) { 22259d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // XXX: this assumes vec4 units 22269d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 22279d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 0), bld.mkImm(4)); 22289d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 0, ptr); 22299d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->op = OP_VFETCH; 22309d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else { 22319d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->op = OP_VFETCH; 22329d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP 22339d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 22349d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { 22354f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset if (targ->getChipset() >= NVISA_GK104_CHIPSET && 22364f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset prog->getType() == Program::TYPE_COMPUTE) { 22374f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // The launch descriptor only allows to set up 8 CBs, but OpenGL 22384f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // requires at least 12 UBOs. To bypass this limitation, we store the 22394f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // addrs into the driver constbuf and we directly load from the global 22404f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // memory. 22414f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1; 22424f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset Value *ind = i->getIndirect(0, 1); 22434f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset 22447f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset if (ind) { 22457f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset // Clamp the UBO index when an indirect access is used to avoid 22467f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset // loading information from the wrong place in the driver cb. 22477f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset ind = bld.mkOp2v(OP_MIN, TYPE_U32, ind, 22487f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), 22497f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset ind, bld.loadImm(NULL, fileIndex)), 22507f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset bld.loadImm(NULL, 12)); 22517f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset } 22527f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset 22534f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset if (i->src(0).isIndirect(1)) { 22544f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); 225504ecad97ff7b44bd9afd1bff9108dea199723829Ilia Mirkin Value *ptr = loadUboInfo64(ind, fileIndex * 16); 22564f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset Value *length = loadUboLength32(ind, fileIndex * 16); 22574f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset Value *pred = new_LValue(func, FILE_PREDICATE); 22584f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset if (i->src(0).isIndirect(0)) { 22594f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); 22604f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); 22614f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } 22624f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 22634f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->setIndirect(0, 1, NULL); 22644f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->setIndirect(0, 0, ptr); 22654f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); 22664f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->setPredicate(CC_NOT_P, pred); 22674f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset if (i->defExists(0)) { 22684f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset bld.mkMov(i->getDef(0), bld.mkImm(0)); 22694f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } 22704f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } else if (fileIndex >= 0) { 227104ecad97ff7b44bd9afd1bff9108dea199723829Ilia Mirkin Value *ptr = loadUboInfo64(ind, fileIndex * 16); 22724f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset if (i->src(0).isIndirect(0)) { 22734f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); 22744f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } 22754f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 22764f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->setIndirect(0, 1, NULL); 22774f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset i->setIndirect(0, 0, ptr); 22784f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } 22794f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } else if (i->src(0).isIndirect(1)) { 22809d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ptr; 22819d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->src(0).isIndirect(0)) 22829d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(), 22839d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 1), bld.mkImm(0x1010), 22849d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 0)); 22859d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset else 22869d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 22879d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 1), bld.mkImm(16)); 22889d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 1, NULL); 22899d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 0, ptr); 22909d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->subOp = NV50_IR_SUBOP_LDC_IS; 22919d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 22929d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { 22939d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); 22949d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->op = OP_VFETCH; 229561d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { 22969d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ind = i->getIndirect(0, 1); 22979d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ptr = loadBufInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); 22989d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // XXX come up with a way not to do this for EVERY little access but 22999d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // rather to batch these up somehow. Unfortunately we've lost the 23009d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // information about the field width by the time we get here. 23019d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); 23029d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *length = loadBufLength32(ind, i->getSrc(0)->reg.fileIndex * 16); 23039d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *pred = new_LValue(func, FILE_PREDICATE); 23049d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->src(0).isIndirect(0)) { 23059d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); 23069d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); 23079d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23089d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 1, NULL); 23099d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 0, ptr); 231061d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 23119d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); 23129d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setPredicate(CC_NOT_P, pred); 23139d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->defExists(0)) { 2314b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *zero, *dst = i->getDef(0); 2315b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin i->setDef(0, bld.getSSA()); 2316b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 2317b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.setPosition(i, true); 2318b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkMov((zero = bld.getSSA()), bld.mkImm(0)) 2319b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin ->setPredicate(CC_P, pred); 2320b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero); 23219d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23229d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23239d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset} 23249d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset 23259d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoisetvoid 232657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::readTessCoord(LValue *dst, int c) 232757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 232857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *laneid = bld.getSSA(); 232957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *x, *y; 233057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 233157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); 233257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 233357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 0) { 233457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = dst; 233557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = NULL; 233657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 233757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 1) { 233857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = NULL; 233957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = dst; 234057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 234157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(c == 2); 2342035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin if (prog->driver->prop.tp.domain != PIPE_PRIM_TRIANGLES) { 2343035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin bld.mkMov(dst, bld.loadImm(NULL, 0)); 2344035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin return; 2345035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin } 234657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = bld.getSSA(); 234757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = bld.getSSA(); 234857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 234957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (x) 235057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); 235157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (y) 23523fc2818f2b9e8a19e5349442e50dcee4858452c6Christoph Bumiller bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); 235357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 2) { 235557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); 235657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); 235757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 235857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 235957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 236057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 236157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleRDSV(Instruction *i) 236257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 236357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym = i->getSrc(0)->asSym(); 2364ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller const SVSemantic sv = sym->reg.data.sv.sv; 236557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *vtx = NULL; 236657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld; 236757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); 236857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2369ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (addr >= 0x400) { 2370ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // mov $sreg 2371ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 2372ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID 2373ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 2374ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); 2375ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 23767cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin if (sv == SV_VERTEX_COUNT) { 23777cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin bld.setPosition(i, true); 23787cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), bld.mkImm(0x808)); 23797cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin } 238057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 2381ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 238257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2383ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller switch (sv) { 238457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_POSITION: 238557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_FRAGMENT); 2386acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin if (i->srcExists(1)) { 2387acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin // Pass offset through to the interpolation logic 2388acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin ld = bld.mkInterp(NV50_IR_INTERP_LINEAR | NV50_IR_INTERP_OFFSET, 2389acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin i->getDef(0), addr, NULL); 2390acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin ld->setSrc(1, i->getSrc(1)); 2391acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin } else { 2392acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); 2393acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin } 239452c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller break; 239552c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller case SV_FACE: 239652c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller { 239752c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller Value *face = i->getDef(0); 239852c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); 239952c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller if (i->dType == TYPE_F32) { 2400354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkOp2(OP_OR, TYPE_U32, face, face, bld.mkImm(0x00000001)); 2401354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkOp1(OP_NEG, TYPE_S32, face, face); 2402354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, face, TYPE_S32, face); 240352c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 240452c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 240557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 240657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_TESS_COORD: 240757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); 240857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); 240957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 2410ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NTID: 2411ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NCTAID: 2412ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_GRIDID: 2413ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise 2414ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 2415ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 2416ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); 2417ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller return true; 2418ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 24192aa1197eee442ab960f6ad6b84d4cf58511d6cb7Hans de Goede // Fallthrough 24202aa1197eee442ab960f6ad6b84d4cf58511d6cb7Hans de Goede case SV_WORK_DIM: 2421ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller addr += prog->driver->prop.cp.gridInfoBase; 2422ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller bld.mkLoad(TYPE_U32, i->getDef(0), 242326cc411db87f924003f227874d7a047dd8b5e5a4Samuel Pitoiset bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, 242426cc411db87f924003f227874d7a047dd8b5e5a4Samuel Pitoiset TYPE_U32, addr), NULL); 2425ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller break; 2426af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_INDEX: 2427af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // TODO: Properly pass source as an address in the PIX address space 2428af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // (which can be of the form [r0+offset]). But this is currently 2429af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // unnecessary. 2430af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 2431af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 2432af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 2433af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_POS: { 2434af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin Value *off = new_LValue(func, FILE_GPR); 2435af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 2436af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 2437af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); 2438af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkLoad(TYPE_F32, 2439af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin i->getDef(0), 2440af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkSymbol( 2441d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, 2442af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin TYPE_U32, prog->driver->io.sampleInfoBase + 2443af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin 4 * sym->reg.data.sv.index), 2444af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin off); 2445af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 2446af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin } 2447ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin case SV_SAMPLE_MASK: { 2448b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 2449b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK; 2450ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin Instruction *sampleid = 2451ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp1(OP_PIXLD, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 2452ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin sampleid->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 2453ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin Value *masked = 2454ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ld->getDef(0), 2455ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 2456ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.loadImm(NULL, 1), sampleid->getDef(0))); 2457ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (prog->driver->prop.fp.persampleInvocation) { 2458ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkMov(i->getDef(0), masked); 2459ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } else { 2460ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), ld->getDef(0), masked, 2461ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkImm(0)) 2462ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin ->subOp = 1; 2463ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 2464b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin break; 2465ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 2466517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin case SV_BASEVERTEX: 2467517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin case SV_BASEINSTANCE: 2468517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin case SV_DRAWID: 2469517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin ld = bld.mkLoad(TYPE_U32, i->getDef(0), 2470517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin bld.mkSymbol(FILE_MEMORY_CONST, 2471517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin prog->driver->io.auxCBSlot, 2472517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin TYPE_U32, 2473517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin prog->driver->io.drawInfoBase + 2474517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin 4 * (sv - SV_BASEVERTEX)), 2475517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin NULL); 2476517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin break; 247757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 24787e0036a49258326cc2d875f2960d18c6b3665036Ilia Mirkin if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch) 247957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 248057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld = bld.mkFetch(i->getDef(0), i->dType, 248157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); 248257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld->perPatch = i->perPatch; 248357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 248457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 248557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 248657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 248757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 248857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 248957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 249057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleDIV(Instruction *i) 249157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 249257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isFloatType(i->dType)) 249357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 2494b5f2c0505fd4f66422e034b041cdf0bc3dc46e99Christoph Bumiller bld.setPosition(i, false); 2495b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1)); 249657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MUL; 249757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, rcp->getDef(0)); 249857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 249957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 250057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 250157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 250257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleMOD(Instruction *i) 250357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 2504b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin if (!isFloatType(i->dType)) 250557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 2506b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin LValue *value = bld.getScratch(typeSizeof(i->dType)); 2507b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1)); 2508b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value); 2509b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp1(OP_TRUNC, i->dType, value, value); 2510b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value); 251157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_SUB; 251257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, value); 251357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 251457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 251557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 251657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 251757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleSQRT(Instruction *i) 251857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 2519c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin if (i->dType == TYPE_F64) { 2520c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin Value *pred = bld.getSSA(1, FILE_PREDICATE); 25219184d9a0bbe8a8b88d676a20f95d66ceee9eaf21Pierre Moreau Value *zero = bld.loadImm(NULL, 0.0); 2522c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin Value *dst = bld.getSSA(8); 2523c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0)); 2524c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero); 2525c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkOp3(OP_SELP, TYPE_U64, dst, zero, dst, pred); 2526c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin i->op = OP_MUL; 2527c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin i->setSrc(1, dst); 2528c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin // TODO: Handle this properly with a library function 2529c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin } else { 2530c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.setPosition(i, true); 2531c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin i->op = OP_RSQ; 2532c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0)); 2533c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin } 253457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 253557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 253657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 253757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 253857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 253957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handlePOW(Instruction *i) 254057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 254157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *val = bld.getScratch(); 254257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 254357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); 254457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; 254557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); 254657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 254757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_EX2; 254857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, val); 254957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 255057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 255157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 255257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 255357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 255457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 255557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleEXPORT(Instruction *i) 255657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 255757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_FRAGMENT) { 255857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int id = i->getSrc(0)->reg.data.offset / 4; 255957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 25609362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).isIndirect(0)) // TODO, ugly 256157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 256257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 256300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller i->subOp = NV50_IR_SUBOP_MOV_FINAL; 25649362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).set(i->src(1)); 256557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 256657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, new_LValue(func, FILE_GPR)); 256757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getDef(0)->reg.data.id = id; 256857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 256957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prog->maxGPR = MAX2(prog->maxGPR, id); 257057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 257157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 257257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setIndirect(0, 1, gpEmitAddress); 257357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 257457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 257557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 257657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 257757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 257857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleOUT(Instruction *i) 257957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 25802f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin Instruction *prev = i->prev; 25812f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin ImmediateValue stream, prevStream; 25822f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin 25832f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin // Only merge if the stream ids match. Also, note that the previous 25842f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin // instruction would have already been lowered, so we take arg1 from it. 25852f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin if (i->op == OP_RESTART && prev && prev->op == OP_EMIT && 25862f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin i->src(0).getImmediate(stream) && 25872f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin prev->src(1).getImmediate(prevStream) && 25882f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin stream.reg.data.u32 == prevStream.reg.data.u32) { 258957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; 259057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 259157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 259257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(gpEmitAddress); 259357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, gpEmitAddress); 25942f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin i->setSrc(1, i->getSrc(0)); 259557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, gpEmitAddress); 259657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 259757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 259857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 259957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 260057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Generate a binary predicate if an instruction is predicated by 260157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// e.g. an f32 value. 260257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 260357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::checkPredicate(Instruction *insn) 260457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 260557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = insn->getPredicate(); 260657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pdst; 260757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 260857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!pred || pred->reg.file == FILE_PREDICATE) 260957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 261057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller pdst = new_LValue(func, FILE_PREDICATE); 261157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 261257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // CAUTION: don't use pdst->getInsn, the definition might not be unique, 261357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass 261457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2615bbe3d6dc29f218e4d790e5ea359d3c6736e94226Dave Airlie bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred); 261657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 261757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->setPredicate(insn->cc, pdst); 261857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 261957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 262057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 262157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - add quadop dance for texturing 262257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - put FP outputs in GPRs 262357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - convert instruction sequences 262457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 262557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 262657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Instruction *i) 262757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 26289d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin bool ret = true; 2629405bd00f3c98cb78d1dda1f3bf5d74155b18cd57Christoph Bumiller bld.setPosition(i, false); 263057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 263157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->cc != CC_ALWAYS) 263257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkPredicate(i); 263357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 263457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 263557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TEX: 263657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXB: 263757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXL: 263857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXF: 263957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXG: 264057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTEX(i->asTex()); 264157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXD: 264257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTXD(i->asTex()); 2643423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin case OP_TXLQ: 2644423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return handleTXLQ(i->asTex()); 264530cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller case OP_TXQ: 264630cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return handleTXQ(i->asTex()); 264757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: 264857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); 264957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getDef(0)); 265057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 265157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_POW: 265257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handlePOW(i); 265357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 265457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleDIV(i); 265557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 265657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleMOD(i); 265757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: 265857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleSQRT(i); 265957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EXPORT: 26609d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin ret = handleEXPORT(i); 26619d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin break; 266257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EMIT: 266357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RESTART: 266457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleOUT(i); 266557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RDSV: 266657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleRDSV(i); 266757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_WRSV: 266857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleWRSV(i); 2669c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin case OP_STORE: 267057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LOAD: 26719d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset handleLDST(i); 267257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 2673c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case OP_ATOM: 267475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller { 267561d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; 2676c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller handleATOM(i); 267775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(i, cctl); 267875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 2679c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 26804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDB: 26814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDP: 26824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTB: 26834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTP: 26844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDB: 26854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDP: 2686c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (targ->getChipset() >= NVISA_GM107_CHIPSET) 2687c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset handleSurfaceOpGM107(i->asTex()); 2688c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset else if (targ->getChipset() >= NVISA_GK104_CHIPSET) 26894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller handleSurfaceOpNVE4(i->asTex()); 2690362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset else 2691362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset handleSurfaceOpNVC0(i->asTex()); 26924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 26932c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin case OP_SUQ: 2694d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset handleSUQ(i->asTex()); 26952c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin break; 26967c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset case OP_BUFQ: 26977c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset handleBUFQ(i); 26987c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset break; 269957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 270057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 27016bca283ad5ebdd85e268c6757842b3c808c6b73dJohannes Obermayr } 27029d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin 27039d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin /* Kepler+ has a special opcode to compute a new base address to be used 27049d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin * for indirect loads. 2705a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin * 2706a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin * Maxwell+ has an additional similar requirement for indirect 2707a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin * interpolation ops in frag shaders. 27089d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin */ 2709a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin bool doAfetch = false; 2710a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin if (targ->getChipset() >= NVISA_GK104_CHIPSET && 2711a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin !i->perPatch && 2712a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin (i->op == OP_VFETCH || i->op == OP_EXPORT) && 2713a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->src(0).isIndirect(0)) { 2714a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin doAfetch = true; 2715a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin } 2716a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin if (targ->getChipset() >= NVISA_GM107_CHIPSET && 2717a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin (i->op == OP_LINTERP || i->op == OP_PINTERP) && 2718a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->src(0).isIndirect(0)) { 2719a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin doAfetch = true; 2720a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin } 2721a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin 2722a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin if (doAfetch) { 2723a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin Value *addr = cloneShallow(func, i->getSrc(0)); 27249d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(), 2725a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->getSrc(0)); 27269d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin afetch->setIndirect(0, 0, i->getIndirect(0, 0)); 2727a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin addr->reg.data.offset = 0; 2728a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->setSrc(0, addr); 27299d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin i->setIndirect(0, 0, afetch->getDef(0)); 27309d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin } 27319d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin 27329d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin return ret; 273357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 273457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 273557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 273657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerTargetNVC0::runLegalizePass(Program *prog, CGStage stage) const 273757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 273857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_PRE_SSA) { 273957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LoweringPass pass(prog); 274057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 274157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 274257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_POST_RA) { 2743e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller NVC0LegalizePostRA pass(prog); 274457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 274557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 274657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_SSA) { 274757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LegalizeSSA pass; 274857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 274957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 275057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 275157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 275257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 275357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} // namespace nv50_ir 2754