1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/* 2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller 3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a 5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"), 6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation 7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the 9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions: 10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in 12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software. 13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 183d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203d8d5b298a268b119d840bc9bae0ee9e0c9244a9Kenneth Graunke * OTHER DEALINGS IN THE SOFTWARE. 21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */ 2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir.h" 245eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_build_util.h" 2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 265eb7ff1175a644ffe3b0f1a75cb235400355f9fbJohannes Obermayr#include "codegen/nv50_ir_target_nvc0.h" 273723ff52237194995d4f9f9fb5d66fb80110889eBen Skeggs#include "codegen/nv50_ir_lowering_nvc0.h" 2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller#include <limits> 3000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir { 3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_ADD 0 3457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUBR 1 3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_SUB 2 3657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QOP_MOV2 3 3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 38717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller// UL UR LL LR 3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define QUADOP(q, r, s, t) \ 40717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller ((QOP_##q << 6) | (QOP_##r << 4) | \ 41717f55d79d9709a31e0f85a87f076ac13446701dChristoph Bumiller (QOP_##s << 2) | (QOP_##t << 0)) 4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 4357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 4457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleDIV(Instruction *i) 4557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *call; 4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int builtin; 4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[2]; 4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 5157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); 5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); 5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 5457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; 5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; 5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); 6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); 6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); 6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); 6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->fixed = 1; 6557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->absolute = call->builtin = 1; 6657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller call->target.builtin = builtin; 6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 7157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::handleRCPRSQ(Instruction *i) 7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 73b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin assert(i->dType == TYPE_F64); 74b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // There are instructions that will compute the high 32 bits of the 64-bit 75b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // float. We will just stick 0 in the bottom 32 bits. 76b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 77b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.setPosition(i, false); 78b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 79b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 1. Take the source and it up. 80b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin Value *src[2], *dst[2], *def = i->getDef(0); 81b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkSplit(src, 4, i->getSrc(0)); 82b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 83b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. 84b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin dst[0] = bld.loadImm(NULL, 0); 85b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin dst[1] = bld.getSSA(); 86b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 87b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 3. The new version of the instruction takes the high 32 bits of the 88b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // source and outputs the high 32 bits of the destination. 89b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setSrc(0, src[1]); 90b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setDef(0, dst[1]); 91b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->setType(TYPE_F32); 92b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin i->subOp = NV50_IR_SUBOP_RCPRSQ_64H; 93b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin 94b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin // 4. Recombine the two dst pieces back into the original destination. 95b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.setPosition(i, true); 96b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); 9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 996fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkinvoid 1006fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia MirkinNVC0LegalizeSSA::handleFTZ(Instruction *i) 1016fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin{ 1026fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // Only want to flush float inputs 103d1eea18a595a468dbc2267a8d14197a3b1a5a4b6Ilia Mirkin assert(i->sType == TYPE_F32); 1046fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1056fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // If we're already flushing denorms (and NaN's) to zero, no need for this. 1066fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (i->dnz) 1076fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin return; 1086fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1096fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin // Only certain classes of operations can flush 1106fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin OpClass cls = prog->getTarget()->getOpClass(i->op); 1116fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && 1126fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin cls != OPCLASS_CONVERT) 1136fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin return; 1146fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1156fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin i->ftz = true; 1166fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin} 1176fe0d4f0354418c6e68dd352996e9891ddd4dfd6Ilia Mirkin 1189145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkinvoid 1199145873b152e8d0e361399f6b05347c0ec6c361dIlia MirkinNVC0LegalizeSSA::handleTEXLOD(TexInstruction *i) 1209145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin{ 121f897036978b42619ce27ea4f41886cc0002f33efIlia Mirkin if (i->tex.levelZero) 1229145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin return; 1239145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 1249145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin ImmediateValue lod; 1259145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 1269145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin // The LOD argument comes right after the coordinates (before depth bias, 1279145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin // offsets, etc). 1289145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin int arg = i->tex.target.getArgCount(); 1299145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 1309145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin // SM30+ stores the indirect handle as a separate arg, which comes before 1319145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin // the LOD. 1329145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET && 1339145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin i->tex.rIndirectSrc >= 0) 1349145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin arg++; 1359145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin // SM20 stores indirect handle combined with array coordinate 1369145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin if (prog->getTarget()->getChipset() < NVISA_GK104_CHIPSET && 1379145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin !i->tex.target.isArray() && 1389145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin i->tex.rIndirectSrc >= 0) 1399145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin arg++; 1409145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 1419145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin if (!i->src(arg).getImmediate(lod) || !lod.isInteger(0)) 1429145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin return; 1439145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 1449145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin if (i->op == OP_TXL) 1459145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin i->op = OP_TEX; 1469145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin i->tex.levelZero = true; 1479145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin i->moveSources(arg + 1, -1); 1489145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin} 1499145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 15057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 15157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(Function *fn) 15257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 15357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(fn->getProgram()); 15457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 15557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 15657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 15757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 15857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizeSSA::visit(BasicBlock *bb) 15957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 16057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 16157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 16257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 1639145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 1649145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin if (i->sType == TYPE_F32 && prog->getType() != Program::TYPE_COMPUTE) 1659145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin handleFTZ(i); 1669145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin 16757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 16857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 16957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 1709145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin if (i->sType != TYPE_F32) 1719145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin handleDIV(i); 17257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 17357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 17457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: 17557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_F64) 17657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleRCPRSQ(i); 17757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 1789145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin case OP_TXL: 1799145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin case OP_TXF: 1809145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin handleTEXLOD(i->asTex()); 1819145873b152e8d0e361399f6b05347c0ec6c361dIlia Mirkin break; 18257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 18357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 18457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 18557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 18657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 18757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 18857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 189e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph BumillerNVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) 19074be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee : rZero(NULL), 19174be77a99e1196d07ebd941aee24313f7aa123c9Vinson Lee carry(NULL), 192ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin pOne(NULL), 1931b3b4196f08bf825d031cdf6bfcbc7dd3ccf3172Samuel Pitoiset needTexBar(prog->getTarget()->getChipset() >= 0xe0 && 1941b3b4196f08bf825d031cdf6bfcbc7dd3ccf3172Samuel Pitoiset prog->getTarget()->getChipset() < 0x110) 195e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller{ 196e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller} 197e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller 19857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 19900fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insnDominatedBy(const Instruction *later, 20000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const Instruction *early) const 20100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 20200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (early->bb == later->bb) 20300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return early->serial < later->serial; 20400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return later->bb->dominatedBy(early->bb); 20500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 20600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 20700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillervoid 20840c224a573f2b763046001e622aafca90f68c693Christoph BumillerNVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, 209a2af42c1d2dc91f4c31e25ff9fff15a89a9b6eadIlia Mirkin Instruction *usei, const Instruction *texi) 21040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 21140c224a573f2b763046001e622aafca90f68c693Christoph Bumiller bool add = true; 2121804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin bool dominated = insnDominatedBy(usei, texi); 2131804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // Uses before the tex have to all be included. Just because an earlier 2141804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // instruction dominates another instruction doesn't mean that there's no 2151804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // way to get from the tex to the later instruction. For example you could 2161804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // have nested loops, with the tex in the inner loop, and uses before it in 2171804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // both loops - even though the outer loop's instruction would dominate the 2181804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // inner's, we still want a texbar before the inner loop's instruction. 2191804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // 2201804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // However we can still use the eliding logic between uses dominated by the 2211804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin // tex instruction, as that is unambiguously correct. 2221804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (dominated) { 2231804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin for (std::list<TexUse>::iterator it = uses.begin(); it != uses.end();) { 2241804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (it->after) { 2251804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (insnDominatedBy(usei, it->insn)) { 2261804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin add = false; 2271804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin break; 2281804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 2291804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin if (insnDominatedBy(it->insn, usei)) { 2301804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin it = uses.erase(it); 2311804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin continue; 2321804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 2331804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 23440c224a573f2b763046001e622aafca90f68c693Christoph Bumiller ++it; 2351804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin } 23640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller } 23740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller if (add) 2381804aa0b80cf5b1ee5d97bc33a12808c78673a12Ilia Mirkin uses.push_back(TexUse(usei, texi, dominated)); 23940c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 24040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 2417752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// While it might be tempting to use the an algorithm that just looks at tex 2427752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// uses, not all texture results are guaranteed to be used on all paths. In 2437752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// the case where along some control flow path a texture result is never used, 2447752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// we might reuse that register for something else, creating a 2457752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// write-after-write hazard. So we have to manually look through all 2467752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin// instructions looking for ones that reference the registers in question. 24740c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 2487752bbc44e78e982de3cd4c34862adc38a338234Ilia MirkinNVC0LegalizePostRA::findFirstUses( 2497752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin Instruction *texi, std::list<TexUse> &uses) 25040c224a573f2b763046001e622aafca90f68c693Christoph Bumiller{ 2517752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin int minGPR = texi->def(0).rep()->reg.data.id; 2527752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin int maxGPR = minGPR + texi->def(0).rep()->reg.size / 4 - 1; 25340c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 2547752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin unordered_set<const BasicBlock *> visited; 2557752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin findFirstUsesBB(minGPR, maxGPR, texi->next, texi, uses, visited); 25640c224a573f2b763046001e622aafca90f68c693Christoph Bumiller} 25740c224a573f2b763046001e622aafca90f68c693Christoph Bumiller 25840c224a573f2b763046001e622aafca90f68c693Christoph Bumillervoid 2597752bbc44e78e982de3cd4c34862adc38a338234Ilia MirkinNVC0LegalizePostRA::findFirstUsesBB( 2607752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin int minGPR, int maxGPR, Instruction *start, 2617752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin const Instruction *texi, std::list<TexUse> &uses, 2627752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin unordered_set<const BasicBlock *> &visited) 26300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 2647752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin const BasicBlock *bb = start->bb; 2657752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2667752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // We don't process the whole bb the first time around. This is correct, 2677752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // however we might be in a loop and hit this BB again, and need to process 2687752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // the full thing. So only mark a bb as visited if we processed it from the 2697752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin // beginning. 2707752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (start == bb->getEntry()) { 2717752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (visited.find(bb) != visited.end()) 2727752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin return; 2737752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin visited.insert(bb); 2747752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin } 2757752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2767752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (Instruction *insn = start; insn != bb->getExit(); insn = insn->next) { 2777752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (insn->isNop()) 2787752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin continue; 2797752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 2807752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (int d = 0; insn->defExists(d); ++d) { 28171ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin const Value *def = insn->def(d).rep(); 2827752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (insn->def(d).getFile() != FILE_GPR || 28371ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin def->reg.data.id + def->reg.size / 4 - 1 < minGPR || 28471ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin def->reg.data.id > maxGPR) 285c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin continue; 2867752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin addTexUse(uses, insn, texi); 2877752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin return; 2887752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin } 289c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin 2907752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (int s = 0; insn->srcExists(s); ++s) { 29171ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin const Value *src = insn->src(s).rep(); 2927752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin if (insn->src(s).getFile() != FILE_GPR || 29371ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin src->reg.data.id + src->reg.size / 4 - 1 < minGPR || 29471ad8a173f5c64d6384c13f04361455571c42ffeIlia Mirkin src->reg.data.id > maxGPR) 2957752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin continue; 2967752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin addTexUse(uses, insn, texi); 2977752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin return; 29800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 29900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 3007752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin 3017752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 3027752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin findFirstUsesBB(minGPR, maxGPR, BasicBlock::get(ei.getNode())->getEntry(), 3037752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin texi, uses, visited); 3047752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin } 30500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 30600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 30700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// Texture barriers: 30800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// This pass is a bit long and ugly and can probably be optimized. 30900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 31000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 1. obtain a list of TEXes and their outputs' first use(s) 31100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 2. calculate the barrier level of each first use (minimal number of TEXes, 31200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// over all paths, between the TEX and the use in question) 31300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// 3. for each barrier, if all paths from the source TEX to that barrier 31400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller// contain a barrier of lesser level, it can be culled 31500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 31600fe442253744c4c4e7e68da44d6983da053968bChristoph BumillerNVC0LegalizePostRA::insertTextureBarriers(Function *fn) 31700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller{ 31800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::list<TexUse> *uses; 31900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Instruction *> texes; 32000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstTex; 32100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> bbFirstUse; 32200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<int> texCounts; 32300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<TexUse> useVec; 32400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ArrayList insns; 32500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller fn->orderInstructions(insns); 32700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 32800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts.resize(fn->allBBlocks.getSize(), 0); 32900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); 33000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); 33100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 33200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // tag BB CFG nodes by their id for later 33300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { 33400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); 33500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (bb) 33600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bb->cfg.tag = bb->getId(); 33700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 33800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 33900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // gather the first uses for each TEX 34000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (int i = 0; i < insns.getSize(); ++i) { 34100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); 34200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(tex->op)) { 34300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes.push_back(tex); 34400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!texCounts.at(tex->bb->getId())) 34500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bbFirstTex[tex->bb->getId()] = texes.size() - 1; 34600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texCounts[tex->bb->getId()]++; 34700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 34800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 34900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insns.clear(); 35000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (texes.empty()) 35100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 35200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller uses = new std::list<TexUse>[texes.size()]; 35300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (!uses) 35400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return false; 355c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin for (size_t i = 0; i < texes.size(); ++i) { 3567752bbc44e78e982de3cd4c34862adc38a338234Ilia Mirkin findFirstUses(texes[i], uses[i]); 357c4bb436f7660c951cd27e52660cf825da68793e5Ilia Mirkin } 35800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 35900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // determine the barrier level at each use 36000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < texes.size(); ++i) { 36100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); 36200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++u) { 36300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *tb = texes[i]->bb; 36400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *ub = u->insn->bb; 36500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (tb == ub) { 36600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 36700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = i + 1; j < texes.size() && 36800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == tb && texes[j]->serial < u->insn->serial; 36900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 37000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 37100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 37200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = fn->cfg.findLightestPathWeight(&tb->cfg, 37300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller &ub->cfg, texCounts); 37400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (u->level < 0) { 37500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller WARN("Failed to find path TEX -> TEXBAR\n"); 37600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level = 0; 37700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 37800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 37900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // this counted all TEXes in the origin block, correct that 38000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; 38100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // and did not count the TEXes in the destination block, add those 38200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && 38300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller texes[j]->bb == ub && texes[j]->serial < u->insn->serial; 38400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller ++j) 38500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller u->level++; 38600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 38700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller assert(u->level >= 0); 38800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec.push_back(*u); 38900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 39000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 39100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete[] uses; 39200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 39300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // insert the barriers 39400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (size_t i = 0; i < useVec.size(); ++i) { 39500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = useVec[i].insn->prev; 39600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (useVec[i].level < 0) 39700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller continue; 39800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR) { 39900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev->subOp > useVec[i].level) 40000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->subOp = useVec[i].level; 40100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); 40200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 40300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); 40400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->fixed = 1; 40500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->subOp = useVec[i].level; 40600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // make use explicit to ease latency calculation 40700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); 40800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); 40900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 41000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 41100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 4125966903c28a13f13923de308c5f5116a0d5c8cbdIlia Mirkin if (fn->getProgram()->optLevel < 3) 41300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 41400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 41500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller std::vector<Limits> limitT, limitB, limitS; // entry, exit, single 41600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 41700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 41800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 41900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS.resize(fn->allBBlocks.getSize()); 42000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 42100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // cull unneeded barriers (should do that earlier, but for simplicity) 42240c224a573f2b763046001e622aafca90f68c693Christoph Bumiller IteratorRef bi = fn->cfg.iteratorCFG(); 42300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // first calculate min/max outstanding TEXes for each BB 42400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 42500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 42600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 42700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int min = 0; 42800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = std::numeric_limits<int>::max(); 42900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = i->next) { 43000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 43100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min++; 43200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (max < std::numeric_limits<int>::max()) 43300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 43400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 43500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 43600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller min = MIN2(min, i->subOp); 43700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = MIN2(max, i->subOp); 43800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 43900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 44000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // limits when looking at an isolated block 44100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].min = min; 44200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitS[bb->getId()].max = max; 44300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 44400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // propagate the min/max values 44500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { 44600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 44700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 44800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 44900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int bbId = bb->getId(); 45000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { 45100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 45200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller const int inId = in->getId(); 45300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); 45400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); 45500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 45600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // I just hope this is correct ... 45700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (limitS[bbId].max == std::numeric_limits<int>::max()) { 45800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // no barrier 45900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; 46000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; 46100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 46200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // block contained a barrier 46300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].min = MIN2(limitS[bbId].max, 46400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].min + limitS[bbId].min); 46500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitB[bbId].max = MIN2(limitS[bbId].max, 46600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller limitT[bbId].max + limitS[bbId].min); 46700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 46800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 46900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 47000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller // finally delete unnecessary barriers 47100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (bi->reset(); !bi->end(); bi->next()) { 47200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 47300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller BasicBlock *bb = BasicBlock::get(n); 47400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *prev = NULL; 47500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller Instruction *next; 47600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller int max = limitT[bb->getId()].max; 47700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = next) { 47800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller next = i->next; 47900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->op == OP_TEXBAR) { 48000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (i->subOp >= max) { 48100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, i); 4827086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco i = NULL; 48300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else { 48400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max = i->subOp; 48500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { 48600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller delete_Instruction(prog, prev); 48700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = NULL; 48800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 48900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 49000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } else 49100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (isTextureOp(i->op)) { 49200fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller max++; 49300fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 4947086636358b611a2bb124253e1fe870107e1cecbTiziano Bacocco if (i && !i->isNop()) 49500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller prev = i; 49600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 49700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller } 49800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller return true; 49900fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller} 50000fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 50100fe442253744c4c4e7e68da44d6983da053968bChristoph Bumillerbool 50257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(Function *fn) 50357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 50400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (needTexBar) 50500fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insertTextureBarriers(fn); 50600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller 5073433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller rZero = new_LValue(fn, FILE_GPR); 508ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin pOne = new_LValue(fn, FILE_PREDICATE); 50999e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry = new_LValue(fn, FILE_FLAGS); 5104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 5111f895caba0accc0af3e637d6193ac0b673ce98bcIlia Mirkin rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63; 51299e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller carry->reg.data.id = 0; 513ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin pOne->reg.data.id = 7; 5144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 51957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::replaceZero(Instruction *i) 52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 52157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; i->srcExists(s); ++s) { 5224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (s == 2 && i->op == OP_SUCLAMP) 5234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller continue; 52457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ImmediateValue *imm = i->getSrc(s)->asImm(); 525ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (imm) { 526ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (i->op == OP_SELP && s == 2) { 527ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin i->setSrc(s, pOne); 528ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (imm->reg.data.u64 == 0) 529ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin i->src(s).mod = i->src(s).mod ^ Modifier(NV50_IR_MOD_NOT); 530ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } else if (imm->reg.data.u64 == 0) { 531ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin i->setSrc(s, rZero); 532ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 533ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 53457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 53557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 53657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 53757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace CONT with BRA for single unconditional continue 53857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 53957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) 54057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 54157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) 54257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 54357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.incident(); 54457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 54557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 54657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.getType() != Graph::Edge::BACK) 54757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 54857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *contBB = BasicBlock::get(ei.getNode()); 54957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 55057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || 55157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->getPredicate()) 55257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 55357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller contBB->getExit()->op = OP_BRA; 55457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); // delete PRECONT 55557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 55657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 55757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(ei.end() || ei.getType() != Graph::Edge::BACK); 55857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 55957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 56057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 56157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// replace branches to join blocks with join ops 56257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 56357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::propagateJoin(BasicBlock *bb) 56457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 56557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) 56657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 56757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 56857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *in = BasicBlock::get(ei.getNode()); 56957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *exit = in->getExit(); 57057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!exit) { 57157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); 57257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // there should always be a terminator instruction 57357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller WARN("inserted missing terminator in BB:%i\n", in->getId()); 57457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 57557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (exit->op == OP_BRA) { 57657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->op = OP_JOIN; 57757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller exit->asFlow()->limit = 1; // must-not-propagate marker 57857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 57957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 58057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getEntry()); 58157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 58257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 58357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 58457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LegalizePostRA::visit(BasicBlock *bb) 58557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 58657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next; 58757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 58857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // remove pseudo operations and non-fixed no-ops, split 64 bit operations 58957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getFirst(); i; i = next) { 59057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 59157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_EMIT || i->op == OP_RESTART) { 59257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!i->getDef(0)->refCount()) 59357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, NULL); 5949362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).getFile() == FILE_IMMEDIATE) 5953433471e8b46dd9dd042a00f88ef9ad011a94aacChristoph Bumiller i->setSrc(0, rZero); // initial value must be 0 59607d3972b4927841bb892af16ff0389f8a241b24cBen Skeggs replaceZero(i); 59757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 59857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isNop()) { 59957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(i); 600313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin } else 601313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin if (i->op == OP_BAR && i->subOp == NV50_IR_SUBOP_BAR_SYNC && 602313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin prog->getType() != Program::TYPE_COMPUTE) { 603313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin // It seems like barriers are never required for tessellation since 604313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin // the warp size is 32, and there are always at most 32 tcs threads. 605313940b03cf7c857143b9e3ec0ab969ce4472c83Ilia Mirkin bb->remove(i); 60637b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin } else 60737b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LDC_IS) { 60837b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin int offset = i->src(0).get()->reg.data.offset; 60937b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin if (abs(offset) > 0x10000) 61037b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin i->src(0).get()->reg.fileIndex += offset >> 16; 61137b67db6ae34fb6586d640a7a1b6232f091dd812Ilia Mirkin i->src(0).get()->reg.data.offset = (int)(short)offset; 61257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 61399e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // TODO: Move this to before register allocation for operations that 61499e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller // need the $c register ! 61599e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (typeSizeof(i->dType) == 8) { 61699e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller Instruction *hi; 61799e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); 61899e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller if (hi) 61999e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller next = hi; 62099e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller } 62199e4eba669f13a0dc80880f4f91e2338377c1667Christoph Bumiller 62257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_MOV && i->op != OP_PFETCH) 62357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller replaceZero(i); 62457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 62557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 62657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!bb->getEntry()) 62757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 62857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 62957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!tryReplaceContWithBra(bb)) 63057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller propagateJoin(bb); 63157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 63257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 63357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 63457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 63557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) 63657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 63757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setProgram(prog); 63857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 63957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 64057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 64157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Function *fn) 64257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 64357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 64457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!strncmp(fn->getName(), "MAIN", 4)); 64557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: when we generate actual functions pass this value along somehow 64657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); 64757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); 6482ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller if (fn->cfgExit) { 6492ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); 6502ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller bld.mkMovToReg(0, gpEmitAddress); 6512ec5c8feb331af29548e98b0e78e810bbbc7009eChristoph Bumiller } 65257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 65357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 65457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 65557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 65657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 65757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(BasicBlock *bb) 65857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 65957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 66057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 66157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6627a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumillerinline Value * 6637a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph BumillerNVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) 6647a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller{ 665d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 6667a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller uint32_t off = prog->driver->io.texBindBase + slot * 4; 6679cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset 6689cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset if (ptr) 6699cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(2)); 6709cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset 6717a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller return bld. 6727a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 6737a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller} 6747a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 67557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// move array source to first slot, convert to u16, add indirections 67657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 67757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTEX(TexInstruction *i) 67857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 67937a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 68037a08ddce54d28f90dc8db8e10792d0759938590Christoph Bumiller const int arg = i->tex.target.getArgCount(); 6814da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); 68219ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 68357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6846eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin /* Only normalize in the non-explicit derivatives case. For explicit 6856eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin * derivatives, this is handled in handleManualTXD. 6866eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin */ 6876eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) { 6886eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin Value *src[3], *val; 6896eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin int c; 6906eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) 6916eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c)); 6926eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin val = bld.getScratch(); 6936eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); 6946eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); 6956eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp1(OP_RCP, TYPE_F32, val, val); 6966eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) { 6976eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), 6986eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin i->getSrc(c), val)); 6996eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } 7006eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } 7016eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin 702f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Arguments to the TEX instruction are a little insane. Even though the 703f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // encoding is identical between SM20 and SM30, the arguments mean 704f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // different things between Fermi and Kepler+. A lot of arguments are 705f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // optional based on flags passed to the instruction. This summarizes the 706f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // order of things. 707f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // 708f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Fermi: 709f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // array/indirect 710f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // coords 711f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // sample 712f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // lod bias 713f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // depth compare 714f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // offsets: 715f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg) 716f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // - other: 4 bits each, single reg 717f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // 718f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // Kepler+: 719f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // indirect handle 720f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // array (+ offsets for txd in upper 16 bits) 721f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // coords 722f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // sample 723f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // lod bias 724f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // depth compare 725f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin // offsets (same as fermi, except txd which takes it with array) 7260532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // 7270532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // Maxwell (tex): 7280532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // array 7290532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // coords 7300532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // indirect handle 7310532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // sample 7320532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // lod bias 7330532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // depth compare 7340532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // offsets 7350532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // 7360532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // Maxwell (txd): 7370532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // indirect handle 7380532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // coords 7390532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // array + offsets 7400532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin // derivatives 741f525bd01d1430a5e33f57805f50fe4e89aa86ae8Ilia Mirkin 74219ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 7437a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 744b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin // XXX this ignores tsc, and assumes a 1:1 mapping 745b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin assert(i->tex.rIndirectSrc >= 0); 7469cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset Value *hnd = loadTexHandle(i->getIndirectR(), i->tex.r); 747b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.r = 0xff; 748b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.s = 0x1f; 749b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectR(hnd); 750b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectS(NULL); 75163b850403c90f33c295d3ad6be4ad749d4ea6274Ilia Mirkin } else if (i->tex.r == i->tex.s || i->op == OP_TXF) { 7525ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin if (i->tex.r == 0xffff) 7535ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin i->tex.r = prog->driver->io.fbtexBindBase / 4; 7545ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin else 7555ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin i->tex.r += prog->driver->io.texBindBase / 4; 756e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller i->tex.s = 0; // only a single cX[] value possible here 757e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else { 7587a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *hnd = bld.getScratch(); 7597a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *rHnd = loadTexHandle(NULL, i->tex.r); 7607a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller Value *sHnd = loadTexHandle(NULL, i->tex.s); 7617a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 7627a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd); 7637a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller 7647a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.r = 0; // not used for indirect tex 7657a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->tex.s = 0; 7667a91d3a2a4c4e7851fdb46465224213ce1874c9bChristoph Bumiller i->setIndirectR(hnd); 767e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 768e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller if (i->tex.target.isArray()) { 769e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller LValue *layer = new_LValue(func, FILE_GPR); 7704da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *src = i->getSrc(lyr); 771e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller const int sat = (i->op == OP_TXF) ? 1 : 0; 772e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 773e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; 7740532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) { 7750532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin for (int s = dim; s >= 1; --s) 7760532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(s, i->getSrc(s - 1)); 7770532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(0, layer); 7780532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin } else { 7790532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->setSrc(dim, layer); 7800532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin } 781e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } 782b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin // Move the indirect reference to the first place 7830532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (i->tex.rIndirectSrc >= 0 && ( 7840532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) { 785b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin Value *hnd = i->getIndirectR(); 786b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin 787b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setIndirectR(NULL); 788b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->moveSources(0, 1); 789b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setSrc(0, hnd); 790b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.rIndirectSrc = 0; 791b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->tex.sIndirectSrc = -1; 792b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin } 793ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin // Move the indirect reference to right after the coords 794ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin else if (i->tex.rIndirectSrc >= 0 && chipset >= NVISA_GM107_CHIPSET) { 795ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin Value *hnd = i->getIndirectR(); 796ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin 797ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->setIndirectR(NULL); 798ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->moveSources(arg, 1); 799ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->setSrc(arg, hnd); 800ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->tex.rIndirectSrc = 0; 801ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin i->tex.sIndirectSrc = -1; 802ecea2f69ef8c07bd7b08f659b214a83f64ea2daaIlia Mirkin } 803e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller } else 804e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller // (nvc0) generate and move the tsc/tic/array source to the front 80519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 80657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 80757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8089807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin Value *ticRel = i->getIndirectR(); 8099807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin Value *tscRel = i->getIndirectS(); 8109807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin 8115ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin if (i->tex.r == 0xffff) { 8125ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin i->tex.r = 0x20; 8135ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin i->tex.s = 0x10; 8145ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin } 8155ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin 816af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (ticRel) { 8179807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin i->setSrc(i->tex.rIndirectSrc, NULL); 818af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (i->tex.r) 819af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 820af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin ticRel, bld.mkImm(i->tex.r)); 821af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin } 822af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (tscRel) { 8239807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin i->setSrc(i->tex.sIndirectSrc, NULL); 824af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin if (i->tex.s) 825af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 826af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin tscRel, bld.mkImm(i->tex.s)); 827af3619e88043ce85560b8220dc16244f8898a926Ilia Mirkin } 8289807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin 8294da54c91d24da891c56957f29274e7821c8254f6Christoph Bumiller Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; 8307d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin if (arrayIndex) { 8317d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin for (int s = dim; s >= 1; --s) 8327d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin i->setSrc(s, i->getSrc(s - 1)); 8337d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin i->setSrc(0, arrayIndex); 8347d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin } else { 8357d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin i->moveSources(0, 1); 8367d98bfedd73d632041d27ff12ccf7c7be74a2dddIlia Mirkin } 83757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 838e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller if (arrayIndex) { 839e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller int sat = (i->op == OP_TXF) ? 1 : 0; 840e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 841e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; 842e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } else { 84357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.loadImm(src, 0); 844e4210a42bcfdb19336faa2ad4b807818c71a2982Christoph Bumiller } 84557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8469807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin if (ticRel) 84757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); 8489807a8ddaf3d0b6d8bf8b3e7c0b01cc4c7db4f30Ilia Mirkin if (tscRel) 84957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); 85057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 85157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, src); 85257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 85357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 85419ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // For nvc0, the sample id has to be in the second operand, as the offset 85519ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // does. Right now we don't know how to pass both in, and this case can't 85619ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // happen with OpenGL. On nve0, the sample id is part of the texture 85719ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin // coordinate argument. 85819ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin assert(chipset >= NVISA_GK104_CHIPSET || 85919ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin !i->tex.useOffsets || !i->tex.target.isMS()); 86019ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin 861f782d6e792db2ed7773a2d22866dbcdb1e4062eeIlia Mirkin // offset is between lod and dc 86257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->tex.useOffsets) { 86357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int n, c; 86471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 8658aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) { 8668aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.target.isShadow()) 8678aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin s--; 8688aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->srcExists(s)) // move potential predicate out of the way 8698aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->moveSources(s, 1); 8708aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.useOffsets == 4 && i->srcExists(s + 1)) 8718aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->moveSources(s + 1, 1); 8728aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 873f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin if (i->op == OP_TXG) { 874f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // Either there is 1 offset, which goes into the 2 low bytes of the 875f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // first source, or there are 4 offsets, which go into 2 sources (8 876f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin // values, 1 byte each). 877a48a343c299a6486a1540cdf7d083f38aa4ace55Ilia Mirkin Value *offs[2] = {NULL, NULL}; 878f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (n = 0; n < i->tex.useOffsets; n++) { 879f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 2; ++c) { 880f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if ((n % 2) == 0 && c == 0) 881a48a343c299a6486a1540cdf7d083f38aa4ace55Ilia Mirkin bld.mkMov(offs[n / 2] = bld.getScratch(), i->offset[n][c].get()); 882f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin else 883f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, 884f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2], 885f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->offset[n][c].get(), 886f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)), 887f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin offs[n / 2]); 888f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 889f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 890f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s, offs[0]); 891f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin if (offs[1]) 892f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin i->setSrc(s + 1, offs[1]); 893f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } else { 894f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin unsigned imm = 0; 895f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin assert(i->tex.useOffsets == 1); 896f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin for (c = 0; c < 3; ++c) { 897f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin ImmediateValue val; 898fb1afd1ea5fd25d82c75c5c3a2aba0bcb53b6d47Ilia Mirkin if (!i->offset[0][c].getImmediate(val)) 899fb1afd1ea5fd25d82c75c5c3a2aba0bcb53b6d47Ilia Mirkin assert(!"non-immediate offset passed to non-TXG"); 900f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin imm |= (val.reg.data.u32 & 0xf) << (c * 4); 901f3aa999383074d666d6e3f3506e66b0c937904caIlia Mirkin } 9028aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) { 9038aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // The offset goes into the upper 16 bits of the array index. So 9048aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // create it if it's not already there, and INSBF it if it already 9058aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin // is. 906b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin s = (i->tex.rIndirectSrc >= 0) ? 1 : 0; 9070532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin if (chipset >= NVISA_GM107_CHIPSET) 9080532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin s += dim; 9098aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (i->tex.target.isArray()) { 9100532a5fd00cdddda0fd1727fb519cb4312f47e83Ilia Mirkin bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s), 9118aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin bld.loadImm(NULL, imm), bld.mkImm(0xc10), 912b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->getSrc(s)); 9138aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 914b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->moveSources(s, 1); 915b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin i->setSrc(s, bld.loadImm(NULL, imm << 16)); 9168aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 9178aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 9188aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin i->setSrc(s, bld.loadImm(NULL, imm)); 9198aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 920f6579e4b17a6010fadb464b5179dea5779c74968Ilia Mirkin } 92157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 92257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 92319ba573a57ff6125a26ff9ae94cf43c36129645fIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 92471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 92571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // If TEX requires more than 4 sources, the 2nd register tuple must be 92671c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // aligned to 4, even if it consists of just a single 4-byte register. 92771c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 92871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. 92971c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller // 93071c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller int s = i->srcCount(0xff, true); 93171c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (s > 4 && s < 7) { 93271c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller if (i->srcExists(s)) // move potential predicate out of the way 93371c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->moveSources(s, 7 - s); 93471c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller while (s < 7) 93571c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller i->setSrc(s++, bld.loadImm(NULL, 0)); 93671c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 93771c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller } 93871c1c8a9b89ca1ecca1857c53cd8c648c9c9a871Christoph Bumiller 93957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 94057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 94157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 94257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 94357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleManualTXD(TexInstruction *i) 94457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 94557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller static const uint8_t qOps[4][2] = 94657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 94757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 94857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 94957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 95057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 95157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller }; 95257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def[4][4]; 95357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *crd[3]; 95457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *tex; 95557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *zero = bld.loadImm(bld.getSSA(), 0); 95657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int l, c; 95769e8b476d07544d6ef06414a1a78ce5c04761fdbIlia Mirkin const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 958f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin 959f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // This function is invoked after handleTEX lowering, so we have to expect 960f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // the arguments in the order that the hw wants them. For Fermi, array and 961f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // indirect are both in the leading arg, while for Kepler, array and 962f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // indirect are separate (and both precede the coordinates). Maxwell is 963f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin // handled in a separate function. 964f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin unsigned array; 965f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin if (targ->getChipset() < NVISA_GK104_CHIPSET) 966f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0; 967f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin else 968f667d15561820ee9dd8e836d43cce3ee52a4780eIlia Mirkin array = i->tex.target.isArray() + (i->tex.rIndirectSrc >= 0); 96957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 97057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_TEX; // no need to clone dPdx/dPdy later 97157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 97257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 97357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller crd[c] = bld.getScratch(); 97457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 97557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADON, TYPE_NONE, NULL); 97657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) { 9776eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin Value *src[3], *val; 97857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // mov coordinates from lane l to all lanes 97957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 980afea9bae67208cdb00b27a60c9cb013bf7d6de52Ilia Mirkin bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); 98157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdx from lane l to lanes dx 98257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 98357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); 98457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // add dPdy from lane l to lanes dy 98557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 98657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); 9876eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin // normalize cube coordinates 9886eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin if (i->tex.target.isCube()) { 9896eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) 9906eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); 9916eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin val = bld.getScratch(); 9926eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); 9936eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); 9946eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin bld.mkOp1(OP_RCP, TYPE_F32, val, val); 9956eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < 3; ++c) 9966eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); 9976eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } else { 9986eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin for (c = 0; c < dim; ++c) 9996eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin src[c] = crd[c]; 10006eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin } 100157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // texture 1002a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez bld.insert(tex = cloneForward(func, i)); 100357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; c < dim; ++c) 10046eeb284e4f74a2fe5ae6cba90f97f219935e24dfIlia Mirkin tex->setSrc(c + array, src[c]); 100557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // save results 100657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 100757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mov; 100857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def[c][l] = bld.getSSA(); 100957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov = bld.mkMov(def[c][l], tex->getDef(c)); 101057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->fixed = 1; 101157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mov->lanes = 1 << l; 101257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 101357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 101457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); 101557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 101657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (c = 0; i->defExists(c); ++c) { 101757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); 101857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (l = 0; l < 4; ++l) 101957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller u->setSrc(l, def[c][l]); 102057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 102157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 102257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->bb->remove(i); 102357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 102457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 102557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 102657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 102757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleTXD(TexInstruction *txd) 102857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 102969e8b476d07544d6ef06414a1a78ce5c04761fdbIlia Mirkin int dim = txd->tex.target.getDim() + txd->tex.target.isCube(); 10308aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin unsigned arg = txd->tex.target.getArgCount(); 10318aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin unsigned expected_args = arg; 10328aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin const int chipset = prog->getTarget()->getChipset(); 10338aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin 10348aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 10358aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (!txd->tex.target.isArray() && txd->tex.useOffsets) 10368aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin expected_args++; 1037b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0) 1038b3cbd862242e0ff75584fef706f2b2a3da8e49f2Ilia Mirkin expected_args++; 10398aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } else { 10408aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (txd->tex.useOffsets) 10418aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin expected_args++; 10427f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin if (!txd->tex.target.isArray() && ( 10437f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)) 10447f937875c0289c2ffc2dc8306add72d5de7951efIlia Mirkin expected_args++; 10458aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin } 10468aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin 10478aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (expected_args > 4 || 10488aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin dim > 2 || 104969e8b476d07544d6ef06414a1a78ce5c04761fdbIlia Mirkin txd->tex.target.isShadow()) 10508aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin txd->op = OP_TEX; 105157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 105257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleTEX(txd); 105338a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller while (txd->srcExists(arg)) 105457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++arg; 105557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10569c930639d9f6d713ccfd16b390a41a9f584f348cChristoph Bumiller txd->tex.derivAll = true; 10578aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin if (txd->op == OP_TEX) 105857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleManualTXD(txd); 105957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10608aa34dc9cb1f4b1b17e49da98e54066832afc98eIlia Mirkin assert(arg == expected_args); 106157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int c = 0; c < dim; ++c) { 106238a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); 106338a20281fcc2ed244aea0aaa268035533f48a183Christoph Bumiller txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); 10649362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdx[c].set(NULL); 10659362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller txd->dPdy[c].set(NULL); 106657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 10672ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin 10682ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin // In this case we have fewer than 4 "real" arguments, which means that 10692ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin // handleTEX didn't apply any padding. However we have to make sure that 10702ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin // the second "group" of arguments still gets padded up to 4. 10712ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin if (chipset >= NVISA_GK104_CHIPSET) { 10722ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin int s = arg + 2 * dim; 10732ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin if (s >= 4 && s < 7) { 10742ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin if (txd->srcExists(s)) // move potential predicate out of the way 10752ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin txd->moveSources(s, 7 - s); 10762ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin while (s < 7) 10772ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin txd->setSrc(s++, bld.loadImm(NULL, 0)); 10782ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin } 10792ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin } 10802ef3cdb07e42d985103b5a3e930b2bba676e920bIlia Mirkin 108157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 108257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 108357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 108457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 108530cb66cd745fc793a2349f1d17046c50cd51c558Christoph BumillerNVC0LoweringPass::handleTXQ(TexInstruction *txq) 108630cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller{ 10875877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin const int chipset = prog->getTarget()->getChipset(); 10885877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin if (chipset >= NVISA_GK104_CHIPSET && txq->tex.rIndirectSrc < 0) 10895877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin txq->tex.r += prog->driver->io.texBindBase / 4; 10905877a594d54fdd2b3aa329f4d35b3491a7ee8a33Ilia Mirkin 1091346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin if (txq->tex.rIndirectSrc < 0) 109220e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin return true; 109320e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 109420e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin Value *ticRel = txq->getIndirectR(); 109520e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 109620e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->setIndirectS(NULL); 109720e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->tex.sIndirectSrc = -1; 109820e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1099346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin assert(ticRel); 1100346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin 110120e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin if (chipset < NVISA_GK104_CHIPSET) { 110220e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 110320e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1104346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin txq->setSrc(txq->tex.rIndirectSrc, NULL); 1105346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin if (txq->tex.r) 1106346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), 1107346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin ticRel, bld.mkImm(txq->tex.r)); 110820e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1109346ce0b98832e33d5411200002571b3edea9e2bbIlia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, src, ticRel, bld.mkImm(0x17)); 111020e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 111120e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->moveSources(0, 1); 111220e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->setSrc(0, src); 111320e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin } else { 11149cdbe80745948ed429b62ac382cfd5ddcba02af1Samuel Pitoiset Value *hnd = loadTexHandle(txq->getIndirectR(), txq->tex.r); 111520e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->tex.r = 0xff; 111620e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin txq->tex.s = 0x1f; 111720e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 1118b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->setIndirectR(NULL); 1119b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->moveSources(0, 1); 1120b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->setSrc(0, hnd); 1121b346a84e270a50f0a8f1a6e474a51da04dd72f0eIlia Mirkin txq->tex.rIndirectSrc = 0; 112220e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin } 112320e484afa4874e87cd18daffd66286bb893cf3fbIlia Mirkin 112430cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return true; 112530cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller} 112630cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller 112730cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumillerbool 1128423f64e83ab5b1ea7de475ae80300a8408522743Ilia MirkinNVC0LoweringPass::handleTXLQ(TexInstruction *i) 1129423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin{ 1130423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The outputs are inverted compared to what the TGSI instruction 1131423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * expects. Take that into account in the mask. 1132423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 1133423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin assert((i->tex.mask & ~3) == 0); 1134423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 1) 1135423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 2; 1136423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin else if (i->tex.mask == 2) 1137423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->tex.mask = 1; 1138423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin handleTEX(i); 1139423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.setPosition(i, true); 1140423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 1141423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin /* The returned values are not quite what we want: 1142423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (a) convert from s16/u16 to f32 1143423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin * (b) multiply by 1/256 1144423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin */ 1145423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin for (int def = 0; def < 2; ++def) { 1146423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (!i->defExists(def)) 1147423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin continue; 1148423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin enum DataType type = TYPE_S16; 1149423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 2 || def > 0) 1150423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin type = TYPE_U16; 1151423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), type, i->getDef(def)); 1152423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), 1153423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); 1154423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 1155423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin if (i->tex.mask == 3) { 1156423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin LValue *t = new_LValue(func, FILE_GPR); 1157423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(t, i->getDef(0)); 1158423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(0), i->getDef(1)); 1159423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin bld.mkMov(i->getDef(1), t); 1160423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin } 1161423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return true; 1162423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin} 1163423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin 1164423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkinbool 11657c47db359e193f21be796df3a7b5d037dd42b28fSamuel PitoisetNVC0LoweringPass::handleBUFQ(Instruction *bufq) 11662c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin{ 11677c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->op = OP_MOV; 11687c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->setSrc(0, loadBufLength32(bufq->getIndirect(0, 1), 11697c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->getSrc(0)->reg.fileIndex * 16)); 11707c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->setIndirect(0, 0, NULL); 11717c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset bufq->setIndirect(0, 1, NULL); 11722c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin return true; 11732c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin} 11742c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin 117514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoisetvoid 1176543fb95473e404b7212eea3f00a23dd0d23758d5Samuel PitoisetNVC0LoweringPass::handleSharedATOMNVE4(Instruction *atom) 1177543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset{ 1178543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); 1179543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1180543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *currBB = atom->bb; 1181543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *tryLockBB = atom->bb->splitBefore(atom, false); 1182543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *joinBB = atom->bb->splitAfter(atom); 1183543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *setAndUnlockBB = new BasicBlock(func); 1184543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset BasicBlock *failLockBB = new BasicBlock(func); 1185543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1186543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(currBB, true); 1187543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset assert(!currBB->joinAt); 1188543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); 1189543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1190543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset CmpInstruction *pred = 1191543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 1192543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset TYPE_U32, bld.mkImm(0), bld.mkImm(1)); 1193543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1194543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockBB, CC_ALWAYS, NULL); 1195543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset currBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::TREE); 1196543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1197543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(tryLockBB, true); 1198543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1199543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset Instruction *ld = 1200dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkLoad(TYPE_U32, atom->getDef(0), atom->getSrc(0)->asSym(), 1201dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0)); 1202543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); 1203543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; 1204543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1205543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, setAndUnlockBB, CC_P, ld->getDef(1)); 1206543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL); 1207543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset tryLockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::CROSS); 1208543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset tryLockBB->cfg.attach(&setAndUnlockBB->cfg, Graph::Edge::TREE); 1209543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1210543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset tryLockBB->cfg.detach(&joinBB->cfg); 1211543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.remove(atom); 1212543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1213543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(setAndUnlockBB, true); 1214543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset Value *stVal; 1215543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 1216543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset // Read the old value, and write the new one. 1217543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset stVal = atom->getSrc(1); 1218543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { 1219543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset CmpInstruction *set = 1220543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(), 1221543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset TYPE_U32, ld->getDef(0), atom->getSrc(1)); 1222543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1223543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, (stVal = bld.getSSA()), 1224543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset TYPE_U32, atom->getSrc(2), ld->getDef(0), set->getDef(0)); 1225543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } else { 1226543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset operation op; 1227543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1228543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset switch (atom->subOp) { 1229543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_ADD: 1230543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_ADD; 1231543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1232543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_AND: 1233543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_AND; 1234543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1235543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_OR: 1236543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_OR; 1237543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1238543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_XOR: 1239543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_XOR; 1240543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1241543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MIN: 1242543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_MIN; 1243543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1244543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MAX: 1245543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset op = OP_MAX; 1246543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset break; 1247543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset default: 1248543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset assert(0); 1249543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset return; 1250543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } 1251543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1252543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset stVal = bld.mkOp2v(op, atom->dType, bld.getSSA(), ld->getDef(0), 1253543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset atom->getSrc(1)); 1254543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset } 1255543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1256543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset Instruction *st = 1257dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkStore(OP_STORE, TYPE_U32, atom->getSrc(0)->asSym(), 1258dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0), stVal); 1259543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset st->setDef(0, pred->getDef(0)); 1260543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; 1261543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1262543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, failLockBB, CC_ALWAYS, NULL); 1263543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset setAndUnlockBB->cfg.attach(&failLockBB->cfg, Graph::Edge::TREE); 1264543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1265543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset // Lock until the store has not been performed. 1266543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(failLockBB, true); 1267543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockBB, CC_NOT_P, pred->getDef(0)); 1268543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); 1269543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset failLockBB->cfg.attach(&tryLockBB->cfg, Graph::Edge::BACK); 1270543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset failLockBB->cfg.attach(&joinBB->cfg, Graph::Edge::TREE); 1271543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1272543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.setPosition(joinBB, false); 1273543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; 1274543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset} 1275543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset 1276543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoisetvoid 127714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel PitoisetNVC0LoweringPass::handleSharedATOM(Instruction *atom) 127814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset{ 127914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); 128014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 128114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset BasicBlock *currBB = atom->bb; 128214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false); 128314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset BasicBlock *joinBB = atom->bb->splitAfter(atom); 128414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 128514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.setPosition(currBB, true); 128614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset assert(!currBB->joinAt); 128714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); 128814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 128914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL); 129014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE); 129114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 129214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.setPosition(tryLockAndSetBB, true); 129314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 129414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Instruction *ld = 1295dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkLoad(TYPE_U32, atom->getDef(0), atom->getSrc(0)->asSym(), 1296dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0)); 129714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); 129814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; 129914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 130014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Value *stVal; 130114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 130214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset // Read the old value, and write the new one. 130314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset stVal = atom->getSrc(1); 130414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { 130514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset CmpInstruction *set = 130614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 130714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset TYPE_U32, ld->getDef(0), atom->getSrc(1)); 130814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset set->setPredicate(CC_P, ld->getDef(1)); 130914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 13106526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset Instruction *selp = 13116526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0), 13126526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset atom->getSrc(2), set->getDef(0)); 13136526225f888a08b301e8c39ec70b4e739081e490Samuel Pitoiset selp->src(2).mod = Modifier(NV50_IR_MOD_NOT); 131414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset selp->setPredicate(CC_P, ld->getDef(1)); 131514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 131614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset stVal = selp->getDef(0); 131714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } else { 131814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset operation op; 131914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 132014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset switch (atom->subOp) { 132114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_ADD: 132214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_ADD; 132314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 132414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_AND: 132514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_AND; 132614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 132714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_OR: 132814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_OR; 132914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 133014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_XOR: 133114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_XOR; 133214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 133314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MIN: 133414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_MIN; 133514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 133614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset case NV50_IR_SUBOP_ATOM_MAX: 133714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset op = OP_MAX; 133814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset break; 133914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset default: 134014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset assert(0); 1341e05492fd7f0e1a9454482a9174f5870b8cb5a41eSamuel Pitoiset return; 134214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } 134314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 134414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Instruction *i = 134514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0), 134614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset atom->getSrc(1)); 134714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset i->setPredicate(CC_P, ld->getDef(1)); 134814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 134914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset stVal = i->getDef(0); 135014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } 135114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 135214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset Instruction *st = 1353dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin bld.mkStore(OP_STORE, TYPE_U32, atom->getSrc(0)->asSym(), 1354dd4b44efc04413453e4cbf78434b29392eb148a9Ilia Mirkin atom->getIndirect(0, 0), stVal); 135514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset st->setPredicate(CC_P, ld->getDef(1)); 135614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; 135714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 135814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset // Loop until the lock is acquired. 135914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1)); 136014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK); 136114a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS); 136214a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); 136314a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 136414a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.remove(atom); 136514a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 136614a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.setPosition(joinBB, false); 136714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; 136814a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset} 136914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 13702c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkinbool 1371c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph BumillerNVC0LoweringPass::handleATOM(Instruction *atom) 1372c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller{ 1373c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller SVSemantic sv; 13747b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base; 1375c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1376c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller switch (atom->src(0).getFile()) { 1377c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_LOCAL: 1378c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller sv = SV_LBASE; 1379c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 1380c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case FILE_MEMORY_SHARED: 1381839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset // For Fermi/Kepler, we have to use ld lock/st unlock to perform atomic 1382839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset // operations on shared memory. For Maxwell, ATOMS is enough. 1383839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset if (targ->getChipset() < NVISA_GK104_CHIPSET) 1384543fb95473e404b7212eea3f00a23dd0d23758d5Samuel Pitoiset handleSharedATOM(atom); 1385839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset else if (targ->getChipset() < NVISA_GM107_CHIPSET) 1386839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset handleSharedATOMNVE4(atom); 138714a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset return true; 1388c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller default: 138961d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); 1390b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); 1391c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin assert(base->reg.size == 8); 1392c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin if (ptr) 1393c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); 1394c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin assert(base->reg.size == 8); 1395c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin atom->setIndirect(0, 0, base); 139661d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 1397b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 1398b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin // Harden against out-of-bounds accesses 1399b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *offset = bld.loadImm(NULL, atom->getSrc(0)->reg.data.offset + typeSizeof(atom->sType)); 1400b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *length = loadBufLength32(ind, atom->getSrc(0)->reg.fileIndex * 16); 1401b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *pred = new_LValue(func, FILE_PREDICATE); 1402b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin if (ptr) 1403b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, ptr); 1404b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); 1405b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin atom->setPredicate(CC_NOT_P, pred); 1406b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin if (atom->defExists(0)) { 1407b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *zero, *dst = atom->getDef(0); 1408b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin atom->setDef(0, bld.getSSA()); 1409b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 1410b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.setPosition(atom, true); 1411b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkMov((zero = bld.getSSA()), bld.mkImm(0)) 1412b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin ->setPredicate(CC_P, pred); 1413b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkOp2(OP_UNION, TYPE_U32, dst, atom->getDef(0), zero); 1414b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin } 1415b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 1416c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 1417c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller } 1418c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin base = 1419c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); 1420c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1421c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); 1422c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 1423c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller if (ptr) 1424c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); 14257b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin atom->setIndirect(0, 1, NULL); 1426c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller atom->setIndirect(0, 0, base); 1427c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 1428c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller return true; 1429c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller} 1430c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller 143175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumillerbool 143275f1f852b00ad0d766684d01695322b93a2acd55Christoph BumillerNVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) 143375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller{ 1434839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset if (targ->getChipset() < NVISA_GM107_CHIPSET) { 1435839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { 1436839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). 1437839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset return false; 1438839a469166b9c0b9959620eda85a6481f9efa15fSamuel Pitoiset } 143914a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset } 144014a810e9d0dbf52e547ae6e16a68487d7cb92004Samuel Pitoiset 144175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && 144275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) 144375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return false; 144475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, true); 144575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 144675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (needCctl) { 144775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); 144875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); 144975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->fixed = 1; 145075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->subOp = NV50_IR_SUBOP_CCTL_IV; 145175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller if (cas->isPredicated()) 145275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cctl->setPredicate(cas->cc, cas->getPredicate()); 145375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 145475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 1455df043f076464d817a9d88c4c43757e65b6eae3f9Ilia Mirkin if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { 145675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // CAS is crazy. It's 2nd source is a double reg, and the 3rd source 145775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // should be set to the high part of the double reg or bad things will 145875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // happen elsewhere in the universe. 145975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // Also, it sometimes returns the new value instead of the old one 146075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller // under mysterious circumstances. 146175f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller Value *dreg = bld.getSSA(8); 146275f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.setPosition(cas, false); 146375f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); 146475f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller cas->setSrc(1, dreg); 1465df043f076464d817a9d88c4c43757e65b6eae3f9Ilia Mirkin cas->setSrc(2, dreg); 146675f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 146775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 146875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller return true; 146975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller} 147075f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller 14714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 1472b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off, uint16_t base) 14734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 1474d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 1475b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset off += base; 1476b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset 14774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 14784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 14794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 14804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 14814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerinline Value * 1482b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off, uint16_t base) 1483c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin{ 1484d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 1485b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset off += base; 1486c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 14877b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin if (ptr) 14887b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); 14897b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin 1490c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin return bld. 1491c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr); 1492c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin} 1493c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 1494c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkininline Value * 1495b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off, uint16_t base) 1496c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin{ 1497d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset uint8_t b = prog->driver->io.auxCBSlot; 1498b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset off += base; 1499c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 15007b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin if (ptr) 15017b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4)); 15027b9a77b905bda3003dc57efb99879499ebc4ba41Ilia Mirkin 1503c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin return bld. 1504c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr); 1505c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin} 1506c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin 1507c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkininline Value * 1508b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadBufInfo64(Value *ptr, uint32_t off) 1509b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset{ 1510b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset return loadResInfo64(ptr, off, prog->driver->io.bufInfoBase); 1511b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset} 1512b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset 1513b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoisetinline Value * 1514b8b3af2932039c6105d61f6922157a250ed8b79aSamuel PitoisetNVC0LoweringPass::loadBufLength32(Value *ptr, uint32_t off) 1515b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset{ 1516b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset return loadResLength32(ptr, off, prog->driver->io.bufInfoBase); 1517b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset} 1518b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoiset 1519b8b3af2932039c6105d61f6922157a250ed8b79aSamuel Pitoisetinline Value * 15204f58b78c309db372d408912ca87e88d319b895daSamuel PitoisetNVC0LoweringPass::loadUboInfo64(Value *ptr, uint32_t off) 15214f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset{ 15224f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset return loadResInfo64(ptr, off, prog->driver->io.uboInfoBase); 15234f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset} 15244f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset 15254f58b78c309db372d408912ca87e88d319b895daSamuel Pitoisetinline Value * 15264f58b78c309db372d408912ca87e88d319b895daSamuel PitoisetNVC0LoweringPass::loadUboLength32(Value *ptr, uint32_t off) 15274f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset{ 15284f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset return loadResLength32(ptr, off, prog->driver->io.uboInfoBase); 15294f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset} 15304f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset 15314f58b78c309db372d408912ca87e88d319b895daSamuel Pitoisetinline Value * 15324506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) 15334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 15344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint8_t b = prog->driver->io.msInfoCBSlot; 15354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off += prog->driver->io.msInfoBase; 15364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return bld. 15374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); 15384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 15394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 15404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller/* On nvc0, surface info is obtained via the surface binding points passed 15414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * to the SULD/SUST instructions. 15424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * On nve4, surface info is stored in c[] and is used by various special 1543c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset * instructions, e.g. for clamping coordinates or generating an address. 15444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller * They couldn't just have added an equivalent to TIC now, couldn't they ? 15454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller */ 1546c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_ADDR 0x00 1547c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_FMT 0x04 1548c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM_X 0x08 1549c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_PITCH 0x0c 1550c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM_Y 0x10 1551c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_ARRAY 0x14 1552c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM_Z 0x18 1553c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_UNK1C 0x1c 1554c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_WIDTH 0x20 1555c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_HEIGHT 0x24 1556c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DEPTH 0x28 1557c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_TARGET 0x2c 1558c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_BSIZE 0x30 1559c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_RAW_X 0x34 1560c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_MS_X 0x38 1561c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_MS_Y 0x3c 1562c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset 1563c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO__STRIDE 0x40 1564c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset 1565c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8) 1566c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4) 1567c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) 15684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1569ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoisetinline Value * 1570ed3a284382b194292460a99c0dfe7fd3feccca40Samuel PitoisetNVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off) 1571ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset{ 1572ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset uint32_t base = slot * NVC0_SU_INFO__STRIDE; 1573ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset 1574ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset if (ptr) { 1575ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); 1576ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); 1577ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6)); 1578ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset base = 0; 1579ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset } 1580ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset off += base; 1581ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset 1582ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset return loadResInfo32(ptr, off, prog->driver->io.suInfoBase); 1583ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset} 1584ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset 15854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillerstatic inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) 15864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 15874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (su->tex.target.getEnum()) { 15884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1); 15894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_1D_ARRAY: return (c == 1) ? 15924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_PL(0, 2) : 15934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 15954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); 15964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 15994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 16004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); 16014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 16024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(0); 16034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return 0; 16044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 16054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 16064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16077c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoisetbool 1608d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel PitoisetNVC0LoweringPass::handleSUQ(TexInstruction *suq) 16097c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset{ 1610b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin int mask = suq->tex.mask; 1611d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset int dim = suq->tex.target.getDim(); 1612d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset int arg = dim + (suq->tex.target.isArray() || suq->tex.target.isCube()); 1613d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset Value *ind = suq->getIndirectR(); 1614ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset int slot = suq->tex.r; 1615b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin int c, d; 1616d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1617b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin for (c = 0, d = 0; c < 3; ++c, mask >>= 1) { 1618b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (c >= arg || !(mask & 1)) 1619b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin continue; 1620d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1621b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin int offset; 1622d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1623b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (c == 1 && suq->tex.target == TEX_TARGET_1D_ARRAY) { 1624c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset offset = NVC0_SU_INFO_SIZE(2); 1625b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin } else { 1626c1fb3290a6049498989f5144e6aa54e35a5e4552Samuel Pitoiset offset = NVC0_SU_INFO_SIZE(c); 1627d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset } 1628ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset)); 1629b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (c == 2 && suq->tex.target.isCube()) 1630b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1), 1631b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.loadImm(NULL, 6)); 1632d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset } 1633d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1634b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (mask & 1) { 1635b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin if (suq->tex.target.isMS()) { 1636ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); 1637ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); 1638b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y); 1639b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms); 1640b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin } else { 1641b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.mkMov(suq->getDef(d++), bld.loadImm(NULL, 1)); 1642b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin } 1643d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset } 1644d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset 1645d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset bld.remove(suq); 16467c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset return true; 16477c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset} 16487c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset 16494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 16504506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) 16514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 16524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int arg = tex->tex.target.getArgCount(); 1653ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset int slot = tex->tex.r; 16544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS) 16564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D; 16574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 16584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY) 16594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->tex.target = TEX_TARGET_2D_ARRAY; 16604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 16614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller return; 16624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *x = tex->getSrc(0); 16644506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = tex->getSrc(1); 16654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *s = tex->getSrc(arg - 1); 16664506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); 1668b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin Value *ind = tex->getIndirectR(); 16694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1670ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); 1671ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); 16724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); 16744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); 16754506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7)); 16774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3)); 16784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dx = loadMsInfo32(ts, 0x0); 16804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *dy = loadMsInfo32(ts, 0x4); 16814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); 16834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); 16844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(0, tx); 16864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->setSrc(1, ty); 16874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller tex->moveSources(arg, -1); 16884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 16894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 16904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// Sets 64-bit "generic address", predicate and format sources for SULD/SUST. 16914506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller// They're computed from the coordinates using the surface info in c[] space. 16924506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 16934506ed28de7f9d76bbc99c0758a7891b84528729Christoph BumillerNVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) 16944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 16954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Instruction *insn; 16964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP; 16974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const bool raw = 16984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; 1699ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset const int slot = su->tex.r; 17004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller const int dim = su->tex.target.getDim(); 1701e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 17024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller int c; 17034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *zero = bld.mkImm(0); 17044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *p1 = NULL; 17054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *v; 17064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *src[3]; 17074506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *bf, *eau, *off; 17084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *addr, *pred; 1709ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ind = su->getIndirectR(); 17104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller off = bld.getScratch(4); 17124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = bld.getScratch(4); 17134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller addr = bld.getSSA(8); 17144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller pred = bld.getScratch(1, FILE_PREDICATE); 17154506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17164506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.setPosition(su, false); 17174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller adjustCoordinatesMS(su); 17194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate clamped coordinates 17214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (c = 0; c < arg; ++c) { 17228b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset int dimc = c; 17238b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset 17248b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset if (c == 1 && su->tex.target == TEX_TARGET_1D_ARRAY) { 17258b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset // The array index is stored in the Z component for 1D arrays. 17268b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset dimc = 2; 17278b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset } 17288b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset 17294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = bld.getScratch(); 17304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (c == 0 && raw) 1731ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X); 17324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 1733ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc)); 17344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) 17358b540db44cd9adadd98d98d184ad9fe5afc4f1acSamuel Pitoiset ->subOp = getSuClampSubOp(su, dimc); 17364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller for (; c < 3; ++c) 17384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[c] = zero; 17394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set predicate output 17414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 17424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[0]->getInsn()->setFlagsDef(1, pred); 17434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 1744e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset if (su->tex.target.isArray() || su->tex.target.isCube()) { 17454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller p1 = bld.getSSA(1, FILE_PREDICATE); 17464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller src[dim]->getInsn()->setFlagsDef(1, p1); 17474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate pixel offset 17504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) { 17514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target != TEX_TARGET_BUFFER) 17524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); 17534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 17544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 3) { 1755ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); 17564506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) 17574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 17584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 1759ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); 17604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) 17614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l 17624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 17634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 2); 1764ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); 17654506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) 1766e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ? 17674506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l 17684506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17694506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17704506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // calculate effective address part 1 17714506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 17724506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (raw) { 17734506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bf = src[0]; 17744506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 1775ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); 17764506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) 17774506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); 17784506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17794506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 17804506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *y = src[1]; 17814506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *z = src[2]; 17824506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller uint16_t subOp = 0; 17834506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 17844506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller switch (dim) { 17854506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 1: 17864506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller y = zero; 17874506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = zero; 17884506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 17894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case 2: 17904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller z = off; 1791e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset if (!su->tex.target.isArray() && !su->tex.target.isCube()) { 1792ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); 17934506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 17944506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 17954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 17964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller default: 17974506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller subOp = NV50_IR_SUBOP_SUBFM_3D; 17984506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(dim == 3); 17994506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 18004506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18014506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z); 18024506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->subOp = subOp; 18034506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller insn->setFlagsDef(1, pred); 18044506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18054506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18064506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // part 2 1807ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR); 18084506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18094506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 18104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = v; 18114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else { 18124506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); 18134506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18144506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // add array layer offset 1815e478156ed7b57724fd189efd5c4ac42e769f6d04Samuel Pitoiset if (su->tex.target.isArray() || su->tex.target.isCube()) { 1816ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); 18174506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (dim == 1) 18184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) 18194506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 18204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller else 18214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau) 18224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32 18234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // combine predicates 18244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller assert(p1); 18254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1); 18264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18284506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom) { 18294506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller Value *lo = bf; 18304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->tex.target == TEX_TARGET_BUFFER) { 18314506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller lo = zero; 18324506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkMov(off, bf); 18334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18344506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // bf == g[] address & 0xff 18354506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // eau == g[] address >> 8 18364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau); 18374506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau); 18384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } else 18394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) { 18404506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Convert from u32 to u8 address format, which is what the library code 18414506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // doing SULDP currently uses. 18424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: can SUEAU do this ? 18434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // XXX: does it matter that we don't mask high bytes in bf ? 18444506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // Grrr. 18454506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8)); 18464506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off); 18474506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 18484506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18494506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau); 18504506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18514506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (atom && su->tex.target == TEX_TARGET_BUFFER) 18524506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off); 18534506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18544506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // let's just set it 0 for raw access and hope it works 18554506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller v = raw ? 1856ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); 18574506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18584506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // get rid of old coordinate sources, make space for fmt info and predicate 18594506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->moveSources(arg, 3 - arg); 18604506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller // set 64 bit address and 32-bit format sources 18614506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(0, addr); 18624506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(1, v); 18634506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller su->setSrc(2, pred); 1864fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 1865fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset // prevent read fault when the image is not actually bound 1866fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset CmpInstruction *pred1 = 1867fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 1868fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset TYPE_U32, bld.mkImm(0), 1869ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); 18706fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset 1871e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset if (su->op != OP_SUSTP && su->tex.format) { 18726fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset const TexInstruction::ImgFormatDesc *format = su->tex.format; 18736fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset int blockwidth = format->bits[0] + format->bits[1] + 18746fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset format->bits[2] + format->bits[3]; 18756fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset 1876e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset // make sure that the format doesn't mismatch 1877e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset assert(format->components != 0); 1878e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0), 1879e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset TYPE_U32, bld.loadImm(NULL, blockwidth / 8), 1880ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), 1881e7d2ef42a5a93789990922b4624096d7ce537cb7Samuel Pitoiset pred1->getDef(0)); 18826fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset } 1883fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset su->setPredicate(CC_NOT_P, pred1->getDef(0)); 18846fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset 18856fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // TODO: initialize def values to 0 when the surface operation is not 18866fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // performed (not needed for stores). Also, fix the "address bounds test" 18876fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // subtests from arb_shader_image_load_store-invalid for buffers, because it 18886fc6d548ed66caf4395190718163d964d9c0fe25Samuel Pitoiset // seems like that the predicate is not correctly set by suclamp. 18894506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 18904506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 18913ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkinstatic DataType 18923ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkingetSrcType(const TexInstruction::ImgFormatDesc *t, int c) 18933ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin{ 18943ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin switch (t->type) { 18953ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case FLOAT: return t->bits[c] == 16 ? TYPE_F16 : TYPE_F32; 18963ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UNORM: return t->bits[c] == 8 ? TYPE_U8 : TYPE_U16; 18973ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SNORM: return t->bits[c] == 8 ? TYPE_S8 : TYPE_S16; 18983ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UINT: 18993ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return (t->bits[c] == 8 ? TYPE_U8 : 19003ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin (t->bits[c] == 16 ? TYPE_U16 : TYPE_U32)); 19013ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SINT: 19023ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return (t->bits[c] == 8 ? TYPE_S8 : 19033ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin (t->bits[c] == 16 ? TYPE_S16 : TYPE_S32)); 19043ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19053ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_NONE; 19063ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin} 19073ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19083ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkinstatic DataType 19093ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkingetDestType(const ImgType type) { 19103ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin switch (type) { 19113ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case FLOAT: 19123ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UNORM: 19133ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SNORM: 19143ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_F32; 19153ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case UINT: 19163ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_U32; 19173ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin case SINT: 19183ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_S32; 19193ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin default: 19203ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin assert(!"Impossible type"); 19213ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin return TYPE_NONE; 19223ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19233ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin} 19243ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumillervoid 19263ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkinNVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) 19274506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller{ 19283ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin const TexInstruction::ImgFormatDesc *format = su->tex.format; 19293ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin int width = format->bits[0] + format->bits[1] + 19303ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin format->bits[2] + format->bits[3]; 19313ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin Value *untypedDst[4] = {}; 19323ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin Value *typedDst[4] = {}; 19333ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19343ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // We must convert this to a generic load. 19353ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->op = OP_SULDB; 19363ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19373ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->dType = typeOfSize(width / 8); 19383ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->sType = TYPE_U8; 19394506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 19403ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < width / 32; i++) 19413ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin untypedDst[i] = bld.getSSA(); 19423ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (width < 32) 19433ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin untypedDst[0] = bld.getSSA(); 19443ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19453ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < 4; i++) { 19463ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin typedDst[i] = su->getDef(i); 19473ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19483ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19493ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Set the untyped dsts as the su's destinations 19503ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < 4; i++) 19513ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->setDef(i, untypedDst[i]); 19523ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19533ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.setPosition(su, true); 19543ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19553ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Unpack each component into the typed dsts 19563ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin int bits = 0; 19573ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin for (int i = 0; i < 4; bits += format->bits[i], i++) { 19583ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (!typedDst[i]) 19593ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin continue; 19603ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (i >= format->components) { 19613ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->type == FLOAT || 19623ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin format->type == UNORM || 19633ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin format->type == SNORM) 19643ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.loadImm(typedDst[i], i == 3 ? 1.0f : 0.0f); 19653ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else 19663ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.loadImm(typedDst[i], i == 3 ? 1 : 0); 19673ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin continue; 19683ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19693ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19703ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Get just that component's data into the relevant place 19713ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->bits[i] == 32) 19723ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkMov(typedDst[i], untypedDst[i]); 19733ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->bits[i] == 16) 19743ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, getDestType(format->type), typedDst[i], 19753ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin getSrcType(format, i), untypedDst[i / 2]) 19763ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin ->subOp = (i & 1) << (format->type == FLOAT ? 0 : 1); 19773ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->bits[i] == 8) 19783ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, getDestType(format->type), typedDst[i], 19793ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin getSrcType(format, i), untypedDst[0])->subOp = i; 19803ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else { 19813ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_EXTBF, TYPE_U32, typedDst[i], untypedDst[bits / 32], 19823ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkImm((bits % 32) | (format->bits[i] << 8))); 19833ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->type == UNORM || format->type == SNORM) 19843ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], getSrcType(format, i), typedDst[i]); 19853ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin } 19863ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 19873ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin // Normalize / convert as necessary 19883ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (format->type == UNORM) 19893ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, typedDst[i], typedDst[i], bld.loadImm(NULL, 1.0f / ((1 << format->bits[i]) - 1))); 19903ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->type == SNORM) 19913ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_MUL, TYPE_F32, typedDst[i], typedDst[i], bld.loadImm(NULL, 1.0f / ((1 << (format->bits[i] - 1)) - 1))); 19923ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin else if (format->type == FLOAT && format->bits[i] < 16) { 19933ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, typedDst[i], typedDst[i], bld.loadImm(NULL, 15 - format->bits[i])); 19943ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]); 19954506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 19964506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 19978e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin 19988e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin if (format->bgra) { 19998e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin std::swap(typedDst[0], typedDst[2]); 20008e7893eb53213254997a1a3beb0575be11821f83Ilia Mirkin } 20013ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin} 20023ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 20033ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkinvoid 20043ce80f924d07648040ab08a9cd30588621fb47a1Ilia MirkinNVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) 20053ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin{ 20063ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin processSurfaceCoordsNVE4(su); 20073ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 20083ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (su->op == OP_SULDP) 20093ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin convertSurfaceFormat(su); 20104506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 20114506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->op == OP_SUREDB || su->op == OP_SUREDP) { 20122b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset assert(su->getPredicate()); 20132b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset Value *pred = 20142b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset bld.mkOp2v(OP_OR, TYPE_U8, bld.getScratch(1, FILE_PREDICATE), 20152b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset su->getPredicate(), su->getSrc(2)); 20162b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset 2017fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset Instruction *red = bld.mkOp(OP_ATOM, su->dType, bld.getSSA()); 20184506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->subOp = su->subOp; 2019974ab614d3b883bf094d4cdbfdb9792df6625f55Samuel Pitoiset red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0)); 20204506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(1, su->getSrc(3)); 20214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller if (su->subOp == NV50_IR_SUBOP_ATOM_CAS) 20224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setSrc(2, su->getSrc(4)); 20234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller red->setIndirect(0, 0, su->getSrc(0)); 2024fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 2025fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset // make sure to initialize dst value when the atomic operation is not 2026fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset // performed 2027fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); 2028fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 20292b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset assert(su->cc == CC_NOT_P); 20302b6e04e91f6b157dd00872f9a0965c0655e19ad9Samuel Pitoiset red->setPredicate(su->cc, pred); 2031fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset mov->setPredicate(CC_P, pred); 2032fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 2033fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset bld.mkOp2(OP_UNION, TYPE_U32, su->getDef(0), 2034fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset red->getDef(0), mov->getDef(0)); 2035fbeb69757c769f594d5f9340adf067a300eddc1dSamuel Pitoiset 20364506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller delete_Instruction(bld.getProgram(), su); 203775f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(red, true); 20384506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller } 20393ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin 20403ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin if (su->op == OP_SUSTB || su->op == OP_SUSTP) 20413ce80f924d07648040ab08a9cd30588621fb47a1Ilia Mirkin su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; 20424506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller} 20434506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller 2044362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoisetvoid 2045362e17a7123e55d22a188943886a7ead00def6b6Samuel PitoisetNVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) 2046362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset{ 2047ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset const int slot = su->tex.r; 2048362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int dim = su->tex.target.getDim(); 2049362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 2050362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset int c; 2051362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *zero = bld.mkImm(0); 2052362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *src[3]; 2053362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *v; 2054ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset Value *ind = su->getIndirectR(); 2055362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2056b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin bld.setPosition(su, false); 2057b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin 2058b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin adjustCoordinatesMS(su); 2059b2b5075e04bbe6c6462fd01711524abd80380f45Ilia Mirkin 206019d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset if (ind) { 206119d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset Value *ptr; 206219d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r)); 206319d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); 206419d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset su->setIndirectR(ptr); 206519d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset } 206619d0450b2758ec9681747e9229b46f9eb637378bSamuel Pitoiset 2067362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // get surface coordinates 2068362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset for (c = 0; c < arg; ++c) 2069362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset src[c] = su->getSrc(c); 2070362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset for (; c < 3; ++c) 2071362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset src[c] = zero; 2072362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2073362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // calculate pixel offset 2074362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op == OP_SULDP || su->op == OP_SUREDP) { 2075ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE); 2076362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v)); 2077362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2078362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2079362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // add array layer offset 2080362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->tex.target.isArray() || su->tex.target.isCube()) { 2081ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); 2082362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset assert(dim > 1); 2083362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)); 2084362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2085362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2086362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // prevent read fault when the image is not actually bound 2087362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset CmpInstruction *pred = 2088362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 2089362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset TYPE_U32, bld.mkImm(0), 2090ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); 2091362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op != OP_SUSTP && su->tex.format) { 2092362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const TexInstruction::ImgFormatDesc *format = su->tex.format; 2093362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset int blockwidth = format->bits[0] + format->bits[1] + 2094362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset format->bits[2] + format->bits[3]; 2095362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2096362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset assert(format->components != 0); 2097362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // make sure that the format doesn't mismatch when it's not FMT_NONE 2098362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), 2099362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset TYPE_U32, bld.loadImm(NULL, blockwidth / 8), 2100ed3a284382b194292460a99c0dfe7fd3feccca40Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), 2101362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset pred->getDef(0)); 2102362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2103362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setPredicate(CC_NOT_P, pred->getDef(0)); 2104362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset} 2105362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2106362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoisetvoid 2107362e17a7123e55d22a188943886a7ead00def6b6Samuel PitoisetNVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) 2108362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset{ 2109362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->tex.target == TEX_TARGET_1D_ARRAY) { 2110362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset /* As 1d arrays also need 3 coordinates, switching to TEX_TARGET_2D_ARRAY 2111362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset * will simplify the lowering pass and the texture constraints. */ 2112362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->moveSources(1, 1); 2113362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setSrc(1, bld.loadImm(NULL, 0)); 2114362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->tex.target = TEX_TARGET_2D_ARRAY; 2115362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2116362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2117362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset processSurfaceCoordsNVC0(su); 2118362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2119362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op == OP_SULDP) 2120362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset convertSurfaceFormat(su); 2121362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2122362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (su->op == OP_SUREDB || su->op == OP_SUREDP) { 2123362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int dim = su->tex.target.getDim(); 2124362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 2125362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset LValue *addr = bld.getSSA(8); 2126362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Value *def = su->getDef(0); 2127362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2128362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->op = OP_SULEA; 2129362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2130362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // Set the destination to the address 2131362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->dType = TYPE_U64; 2132362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setDef(0, addr); 2133362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset su->setDef(1, su->getPredicate()); 2134362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2135362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.setPosition(su, true); 2136362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2137362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // Perform the atomic op 2138362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA()); 2139362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->subOp = su->subOp; 2140362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0)); 2141362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setSrc(1, su->getSrc(arg)); 2142362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset if (red->subOp == NV50_IR_SUBOP_ATOM_CAS) 2143362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setSrc(2, su->getSrc(arg + 1)); 2144362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setIndirect(0, 0, addr); 2145362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2146362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // make sure to initialize dst value when the atomic operation is not 2147362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset // performed 2148362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); 2149362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2150362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset assert(su->cc == CC_NOT_P); 2151362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset red->setPredicate(su->cc, su->getPredicate()); 2152362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset mov->setPredicate(CC_P, su->getPredicate()); 2153362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2154362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0)); 2155362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2156362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset handleCasExch(red, false); 2157362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset } 2158362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset} 2159362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset 2160c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoisetvoid 2161c68989b2c8de2ad961774cb7402aa206517d88c5Samuel PitoisetNVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) 2162c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset{ 2163c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const int slot = su->tex.r; 2164c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const int dim = su->tex.target.getDim(); 2165c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); 2166c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset Value *ind = su->getIndirectR(); 2167c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset int pos = 0; 2168c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2169c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.setPosition(su, false); 2170c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2171c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // add texture handle 2172c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset switch (su->op) { 2173c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset case OP_SUSTP: 2174c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset pos = 4; 2175c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset break; 2176c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset case OP_SUREDP: 2177c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset pos = (su->subOp == NV50_IR_SUBOP_ATOM_CAS) ? 2 : 1; 2178c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset break; 2179c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset default: 2180c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset assert(pos == 0); 2181c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset break; 2182c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset } 2183c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->setSrc(arg + pos, loadTexHandle(ind, slot + 32)); 2184c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2185c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // prevent read fault when the image is not actually bound 2186c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset CmpInstruction *pred = 2187c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), 2188c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset TYPE_U32, bld.mkImm(0), 2189c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); 2190c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (su->op != OP_SUSTP && su->tex.format) { 2191c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset const TexInstruction::ImgFormatDesc *format = su->tex.format; 2192c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset int blockwidth = format->bits[0] + format->bits[1] + 2193c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset format->bits[2] + format->bits[3]; 2194c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2195c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset assert(format->components != 0); 2196c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // make sure that the format doesn't mismatch when it's not FMT_NONE 2197c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), 2198c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset TYPE_U32, bld.loadImm(NULL, blockwidth / 8), 2199c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), 2200c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset pred->getDef(0)); 2201c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset } 2202c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->setPredicate(CC_NOT_P, pred->getDef(0)); 2203c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset} 2204c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2205c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoisetvoid 2206c68989b2c8de2ad961774cb7402aa206517d88c5Samuel PitoisetNVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) 2207c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset{ 2208c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset processSurfaceCoordsGM107(su); 2209c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2210c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (su->op == OP_SULDP) 2211c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset convertSurfaceFormat(su); 2212c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2213c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (su->op == OP_SUREDP) { 2214c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset Value *def = su->getDef(0); 2215c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2216c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->op = OP_SUREDB; 2217c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset su->setDef(0, bld.getSSA()); 2218c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2219c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.setPosition(su, true); 2220c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2221c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // make sure to initialize dst value when the atomic operation is not 2222c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset // performed 2223c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); 2224c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2225c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset assert(su->cc == CC_NOT_P); 2226c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset mov->setPredicate(CC_P, su->getPredicate()); 2227c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2228c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0)); 2229c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset } 2230c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset} 2231c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset 2232c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumillerbool 223357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleWRSV(Instruction *i) 223457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 223557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *st; 223657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym; 223757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr; 223857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 223957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // must replace, $sreg are not writeable 224057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); 224157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (addr >= 0x400) 224257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 224357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); 224457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 224557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), 224657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(1)); 224757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->perPatch = i->perPatch; 224857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 224957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 225057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 225157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 225257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 225357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 22549d57c84994abe45133382cf72ae617570bfe89daSamuel PitoisetNVC0LoweringPass::handleLDST(Instruction *i) 22559d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset{ 22569d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->src(0).getFile() == FILE_SHADER_INPUT) { 22579d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (prog->getType() == Program::TYPE_COMPUTE) { 22589d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getSrc(0)->reg.file = FILE_MEMORY_CONST; 22599d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getSrc(0)->reg.fileIndex = 0; 22609d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else 22619d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (prog->getType() == Program::TYPE_GEOMETRY && 22629d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->src(0).isIndirect(0)) { 22639d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // XXX: this assumes vec4 units 22649d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 22659d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 0), bld.mkImm(4)); 22669d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 0, ptr); 22679d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->op = OP_VFETCH; 22689d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else { 22699d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->op = OP_VFETCH; 22709d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP 22719d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 22729d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { 22734f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset if (targ->getChipset() >= NVISA_GK104_CHIPSET && 22744f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset prog->getType() == Program::TYPE_COMPUTE) { 22754f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // The launch descriptor only allows to set up 8 CBs, but OpenGL 22764f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // requires at least 12 UBOs. To bypass this limitation, we store the 22774f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // addrs into the driver constbuf and we directly load from the global 22784f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset // memory. 22794f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1; 22804f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset Value *ind = i->getIndirect(0, 1); 22814f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset 228240281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin if (!ind && fileIndex == -1) 228340281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin return; 228440281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin 22857f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset if (ind) { 22867f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset // Clamp the UBO index when an indirect access is used to avoid 22877f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset // loading information from the wrong place in the driver cb. 22888ca2e38abbb13942705c64a213adda5683c8dec8Ilia Mirkin // TODO - synchronize the max with the driver. 22897f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset ind = bld.mkOp2v(OP_MIN, TYPE_U32, ind, 22907f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), 22917f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset ind, bld.loadImm(NULL, fileIndex)), 22928ca2e38abbb13942705c64a213adda5683c8dec8Ilia Mirkin bld.loadImm(NULL, 13)); 22938ca2e38abbb13942705c64a213adda5683c8dec8Ilia Mirkin fileIndex = 0; 22947f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset } 22957f257abc1bdd153b3981efffc3f201e1ea5fe843Samuel Pitoiset 229640281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); 229740281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin Value *ptr = loadUboInfo64(ind, fileIndex * 16); 229840281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin Value *length = loadUboLength32(ind, fileIndex * 16); 229940281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin Value *pred = new_LValue(func, FILE_PREDICATE); 230040281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin if (i->src(0).isIndirect(0)) { 230140281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); 230240281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); 23034f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } 230440281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 230540281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin i->setIndirect(0, 1, NULL); 230640281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin i->setIndirect(0, 0, ptr); 230740281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); 230840281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin i->setPredicate(CC_NOT_P, pred); 230940281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin Value *zero, *dst = i->getDef(0); 231040281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin i->setDef(0, bld.getSSA()); 231140281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin 231240281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin bld.setPosition(i, true); 231340281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin bld.mkMov((zero = bld.getSSA()), bld.mkImm(0)) 231440281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin ->setPredicate(CC_P, pred); 231540281fcaf9e97e457701bebd622e7c9cc69d08b4Ilia Mirkin bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero); 23164f58b78c309db372d408912ca87e88d319b895daSamuel Pitoiset } else if (i->src(0).isIndirect(1)) { 23179d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ptr; 23189d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->src(0).isIndirect(0)) 23199d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(), 23209d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 1), bld.mkImm(0x1010), 23219d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 0)); 23229d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset else 23239d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 23249d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->getIndirect(0, 1), bld.mkImm(16)); 23259d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 1, NULL); 23269d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 0, ptr); 23279d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->subOp = NV50_IR_SUBOP_LDC_IS; 23289d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23299d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { 23309d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); 23319d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->op = OP_VFETCH; 233261d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { 23339d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ind = i->getIndirect(0, 1); 23349d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *ptr = loadBufInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); 23359d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // XXX come up with a way not to do this for EVERY little access but 23369d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // rather to batch these up somehow. Unfortunately we've lost the 23379d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset // information about the field width by the time we get here. 23389d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType)); 23399d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *length = loadBufLength32(ind, i->getSrc(0)->reg.fileIndex * 16); 23409d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset Value *pred = new_LValue(func, FILE_PREDICATE); 23419d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->src(0).isIndirect(0)) { 23429d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0)); 23439d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0)); 23449d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23459d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 1, NULL); 23469d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setIndirect(0, 0, ptr); 234761d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; 23489d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); 23499d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset i->setPredicate(CC_NOT_P, pred); 23509d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset if (i->defExists(0)) { 2351b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin Value *zero, *dst = i->getDef(0); 2352b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin i->setDef(0, bld.getSSA()); 2353b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin 2354b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.setPosition(i, true); 2355b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkMov((zero = bld.getSSA()), bld.mkImm(0)) 2356b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin ->setPredicate(CC_P, pred); 2357b1d74e9486d7233412e4aa7bc07a1efbb72e768eIlia Mirkin bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero); 23589d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23599d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset } 23609d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset} 23619d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset 23629d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoisetvoid 236357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::readTessCoord(LValue *dst, int c) 236457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 236557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *laneid = bld.getSSA(); 236657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *x, *y; 236757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 236857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); 236957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 237057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 0) { 237157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = dst; 237257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = NULL; 237357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 237457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 1) { 237557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = NULL; 237657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = dst; 237757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 237857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(c == 2); 2379035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin if (prog->driver->prop.tp.domain != PIPE_PRIM_TRIANGLES) { 2380035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin bld.mkMov(dst, bld.loadImm(NULL, 0)); 2381035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin return; 2382035b1097db339fb605964da62d01553a2bc25061Ilia Mirkin } 238357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller x = bld.getSSA(); 238457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller y = bld.getSSA(); 238557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 238657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (x) 238757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); 238857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (y) 23893fc2818f2b9e8a19e5349442e50dcee4858452c6Christoph Bumiller bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); 239057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 239157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (c == 2) { 239257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); 239357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); 239457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 239557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 239657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 239757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 239857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleRDSV(Instruction *i) 239957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 240057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Symbol *sym = i->getSrc(0)->asSym(); 2401ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller const SVSemantic sv = sym->reg.data.sv.sv; 240257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *vtx = NULL; 240357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld; 240457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); 240557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2406ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (addr >= 0x400) { 2407ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // mov $sreg 2408ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 2409ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID 2410ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 2411ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); 2412ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 24137cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin if (sv == SV_VERTEX_COUNT) { 24147cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin bld.setPosition(i, true); 24157cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), bld.mkImm(0x808)); 24167cf2bffe8254de6808202d866598ec4c9afe1a51Ilia Mirkin } 241757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 2418ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 241957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2420ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller switch (sv) { 242157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_POSITION: 242257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_FRAGMENT); 2423acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin if (i->srcExists(1)) { 2424acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin // Pass offset through to the interpolation logic 2425acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin ld = bld.mkInterp(NV50_IR_INTERP_LINEAR | NV50_IR_INTERP_OFFSET, 2426acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin i->getDef(0), addr, NULL); 2427acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin ld->setSrc(1, i->getSrc(1)); 2428acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin } else { 2429acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); 2430acaed8f41d3cf57092f3fe3a607b8069c72b57f1Ilia Mirkin } 243152c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller break; 243252c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller case SV_FACE: 243352c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller { 243452c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller Value *face = i->getDef(0); 243552c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); 243652c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller if (i->dType == TYPE_F32) { 2437354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkOp2(OP_OR, TYPE_U32, face, face, bld.mkImm(0x00000001)); 2438354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkOp1(OP_NEG, TYPE_S32, face, face); 2439354206f407fffd5f0b553dcbcc46b178d0b22c47Ilia Mirkin bld.mkCvt(OP_CVT, TYPE_F32, face, TYPE_S32, face); 244052c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 244152c8c52b222e1fdb4c1f4ca3dedde9cd7b9c321fChristoph Bumiller } 244257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 244357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case SV_TESS_COORD: 244457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); 244557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); 244657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 2447ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NTID: 2448ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_NCTAID: 2449ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller case SV_GRIDID: 2450ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise 2451ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller if (sym->reg.data.sv.index == 3) { 2452ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->op = OP_MOV; 2453ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); 2454ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller return true; 2455ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller } 24562aa1197eee442ab960f6ad6b84d4cf58511d6cb7Hans de Goede // Fallthrough 24572aa1197eee442ab960f6ad6b84d4cf58511d6cb7Hans de Goede case SV_WORK_DIM: 2458ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller addr += prog->driver->prop.cp.gridInfoBase; 2459ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller bld.mkLoad(TYPE_U32, i->getDef(0), 246026cc411db87f924003f227874d7a047dd8b5e5a4Samuel Pitoiset bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, 246126cc411db87f924003f227874d7a047dd8b5e5a4Samuel Pitoiset TYPE_U32, addr), NULL); 2462ae59a7d35d2b6f23634617dc91e1baf85c9d6d81Christoph Bumiller break; 2463af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_INDEX: 2464af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // TODO: Properly pass source as an address in the PIX address space 2465af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // (which can be of the form [r0+offset]). But this is currently 2466af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin // unnecessary. 2467af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 2468af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 2469af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 2470af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin case SV_SAMPLE_POS: { 2471af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin Value *off = new_LValue(func, FILE_GPR); 2472af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 2473af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 2474af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); 2475af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkLoad(TYPE_F32, 2476af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin i->getDef(0), 2477af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin bld.mkSymbol( 2478d86933e6f42b9c2f5bb617c66c91795c560a9abdSamuel Pitoiset FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, 2479af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin TYPE_U32, prog->driver->io.sampleInfoBase + 2480af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin 4 * sym->reg.data.sv.index), 2481af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin off); 2482af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin break; 2483af38ef907c89ecb1125bf258cafa0793f79a5eb7Ilia Mirkin } 2484ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin case SV_SAMPLE_MASK: { 2485b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); 2486b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK; 2487ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin Instruction *sampleid = 2488ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp1(OP_PIXLD, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 2489ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin sampleid->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; 2490ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin Value *masked = 2491ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ld->getDef(0), 2492ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), 2493ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.loadImm(NULL, 1), sampleid->getDef(0))); 2494ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin if (prog->driver->prop.fp.persampleInvocation) { 2495ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkMov(i->getDef(0), masked); 2496ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } else { 2497ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), ld->getDef(0), masked, 2498ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin bld.mkImm(0)) 2499ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin ->subOp = 1; 2500ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 2501b3a2398aded19e25124a4a1d228eb3843827f6b2Ilia Mirkin break; 2502ba3f0b6d5920165c735d51500544da8c29b09060Ilia Mirkin } 2503517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin case SV_BASEVERTEX: 2504517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin case SV_BASEINSTANCE: 2505517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin case SV_DRAWID: 2506517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin ld = bld.mkLoad(TYPE_U32, i->getDef(0), 2507517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin bld.mkSymbol(FILE_MEMORY_CONST, 2508517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin prog->driver->io.auxCBSlot, 2509517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin TYPE_U32, 2510517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin prog->driver->io.drawInfoBase + 2511517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin 4 * (sv - SV_BASEVERTEX)), 2512517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin NULL); 2513517a93b346e720082e22e358b63b5dbc5c42aa09Ilia Mirkin break; 251457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 25157e0036a49258326cc2d875f2960d18c6b3665036Ilia Mirkin if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch) 251657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 25175ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin if (prog->getType() == Program::TYPE_FRAGMENT) { 25185ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin bld.mkInterp(NV50_IR_INTERP_FLAT, i->getDef(0), addr, NULL); 25195ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin } else { 25205ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin ld = bld.mkFetch(i->getDef(0), i->dType, 25215ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); 25225ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin ld->perPatch = i->perPatch; 25235ba380c226b127cbfad00dd647471e1518ba2cb2Ilia Mirkin } 252457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 252557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 252657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.getBB()->remove(i); 252757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 252857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 252957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 253057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 253157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleDIV(Instruction *i) 253257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 253357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isFloatType(i->dType)) 253457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 2535b5f2c0505fd4f66422e034b041cdf0bc3dc46e99Christoph Bumiller bld.setPosition(i, false); 2536b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1)); 253757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MUL; 253857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, rcp->getDef(0)); 253957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 254057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 254157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 254257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 254357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleMOD(Instruction *i) 254457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 2545b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin if (!isFloatType(i->dType)) 254657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 2547b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin LValue *value = bld.getScratch(typeSizeof(i->dType)); 2548b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1)); 2549b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value); 2550b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp1(OP_TRUNC, i->dType, value, value); 2551b87b498b88c51fb8c80901b8f581331d3fbcd972Ilia Mirkin bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value); 255257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_SUB; 255357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, value); 255457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 255557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 255657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 255757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 255857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleSQRT(Instruction *i) 255957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 2560c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin if (i->dType == TYPE_F64) { 2561c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin Value *pred = bld.getSSA(1, FILE_PREDICATE); 25629184d9a0bbe8a8b88d676a20f95d66ceee9eaf21Pierre Moreau Value *zero = bld.loadImm(NULL, 0.0); 2563c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin Value *dst = bld.getSSA(8); 2564c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0)); 2565c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero); 2566c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkOp3(OP_SELP, TYPE_U64, dst, zero, dst, pred); 2567c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin i->op = OP_MUL; 2568c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin i->setSrc(1, dst); 2569c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin // TODO: Handle this properly with a library function 2570c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin } else { 2571c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.setPosition(i, true); 2572c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin i->op = OP_RSQ; 2573c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0)); 2574c1e4a6bfbf015801c6a8b0ae694482421a22c2d9Ilia Mirkin } 257557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 257657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 257757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 257857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 257957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 258057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handlePOW(Instruction *i) 258157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 258257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *val = bld.getScratch(); 258357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 258457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); 258557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; 258657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); 258757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 258857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_EX2; 258957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, val); 259057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 259157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 259257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 259357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 259457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 259557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 259657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleEXPORT(Instruction *i) 259757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 259857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_FRAGMENT) { 259957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int id = i->getSrc(0)->reg.data.offset / 4; 260057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 26019362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(0).isIndirect(0)) // TODO, ugly 260257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 260357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 260400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller i->subOp = NV50_IR_SUBOP_MOV_FINAL; 26059362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).set(i->src(1)); 260657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 260757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, new_LValue(func, FILE_GPR)); 260857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getDef(0)->reg.data.id = id; 260957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 261057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prog->maxGPR = MAX2(prog->maxGPR, id); 261157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 261257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getType() == Program::TYPE_GEOMETRY) { 261357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setIndirect(0, 1, gpEmitAddress); 261457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 261557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 261657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 261757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 261857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 261957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::handleOUT(Instruction *i) 262057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 26212f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin Instruction *prev = i->prev; 26222f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin ImmediateValue stream, prevStream; 26232f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin 26242f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin // Only merge if the stream ids match. Also, note that the previous 26252f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin // instruction would have already been lowered, so we take arg1 from it. 26262f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin if (i->op == OP_RESTART && prev && prev->op == OP_EMIT && 26272f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin i->src(0).getImmediate(stream) && 26282f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin prev->src(1).getImmediate(prevStream) && 26292f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin stream.reg.data.u32 == prevStream.reg.data.u32) { 263057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; 263157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 263257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 263357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(gpEmitAddress); 263457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setDef(0, gpEmitAddress); 26352f2467cb23ce19770c95ce1f004dc11750dffc6dIlia Mirkin i->setSrc(1, i->getSrc(0)); 263657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, gpEmitAddress); 263757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 263857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 263957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 264057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 264157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Generate a binary predicate if an instruction is predicated by 264257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// e.g. an f32 value. 264357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 264457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::checkPredicate(Instruction *insn) 264557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 264657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = insn->getPredicate(); 264757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pdst; 264857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 264957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!pred || pred->reg.file == FILE_PREDICATE) 265057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 265157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller pdst = new_LValue(func, FILE_PREDICATE); 265257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 265357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // CAUTION: don't use pdst->getInsn, the definition might not be unique, 265457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass 265557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2656bbe3d6dc29f218e4d790e5ea359d3c6736e94226Dave Airlie bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred); 265757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 265857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->setPredicate(insn->cc, pdst); 265957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 266057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 266157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 266257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - add quadop dance for texturing 266357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - put FP outputs in GPRs 266457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// - convert instruction sequences 266557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// 266657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 266757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerNVC0LoweringPass::visit(Instruction *i) 266857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 26699d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin bool ret = true; 2670405bd00f3c98cb78d1dda1f3bf5d74155b18cd57Christoph Bumiller bld.setPosition(i, false); 267157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 267257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->cc != CC_ALWAYS) 267357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkPredicate(i); 267457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 267557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 267657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TEX: 267757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXB: 267857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXL: 267957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXF: 268057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXG: 268157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTEX(i->asTex()); 268257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_TXD: 268357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleTXD(i->asTex()); 2684423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin case OP_TXLQ: 2685423f64e83ab5b1ea7de475ae80300a8408522743Ilia Mirkin return handleTXLQ(i->asTex()); 268630cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller case OP_TXQ: 268730cb66cd745fc793a2349f1d17046c50cd51c558Christoph Bumiller return handleTXQ(i->asTex()); 268857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: 268957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); 269057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getDef(0)); 269157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 269257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_POW: 269357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handlePOW(i); 269457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 269557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleDIV(i); 269657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOD: 269757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleMOD(i); 269857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: 269957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleSQRT(i); 270057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EXPORT: 27019d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin ret = handleEXPORT(i); 27029d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin break; 270357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EMIT: 270457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RESTART: 270557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleOUT(i); 270657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RDSV: 270757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleRDSV(i); 270857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_WRSV: 270957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return handleWRSV(i); 2710c3083c70823d8f4bfdabcf38f98dfebeff0a2b2bIlia Mirkin case OP_STORE: 271157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LOAD: 27129d57c84994abe45133382cf72ae617570bfe89daSamuel Pitoiset handleLDST(i); 271357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 2714c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller case OP_ATOM: 271575f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller { 271661d52a5fb9379eede3bf68b011f9477176341ee9Hans de Goede const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; 2717c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller handleATOM(i); 271875f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller handleCasExch(i, cctl); 271975f1f852b00ad0d766684d01695322b93a2acd55Christoph Bumiller } 2720c0fc3463e9be19495ed31018fc093e726d5f4fbbChristoph Bumiller break; 27214506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDB: 27224506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SULDP: 27234506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTB: 27244506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUSTP: 27254506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDB: 27264506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller case OP_SUREDP: 2727c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset if (targ->getChipset() >= NVISA_GM107_CHIPSET) 2728c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset handleSurfaceOpGM107(i->asTex()); 2729c68989b2c8de2ad961774cb7402aa206517d88c5Samuel Pitoiset else if (targ->getChipset() >= NVISA_GK104_CHIPSET) 27304506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller handleSurfaceOpNVE4(i->asTex()); 2731362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset else 2732362e17a7123e55d22a188943886a7ead00def6b6Samuel Pitoiset handleSurfaceOpNVC0(i->asTex()); 27334506ed28de7f9d76bbc99c0758a7891b84528729Christoph Bumiller break; 27342c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin case OP_SUQ: 2735d64ea4e48e1da072cae51df11bfbef7d6a432cb0Samuel Pitoiset handleSUQ(i->asTex()); 27362c4eeb0b5cf17caa06cb3fa46d4f64e6a8005d23Ilia Mirkin break; 27377c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset case OP_BUFQ: 27387c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset handleBUFQ(i); 27397c47db359e193f21be796df3a7b5d037dd42b28fSamuel Pitoiset break; 274057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 274157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 27426bca283ad5ebdd85e268c6757842b3c808c6b73dJohannes Obermayr } 27439d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin 27449d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin /* Kepler+ has a special opcode to compute a new base address to be used 27459d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin * for indirect loads. 2746a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin * 2747a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin * Maxwell+ has an additional similar requirement for indirect 2748a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin * interpolation ops in frag shaders. 27499d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin */ 2750a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin bool doAfetch = false; 2751a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin if (targ->getChipset() >= NVISA_GK104_CHIPSET && 2752a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin !i->perPatch && 2753a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin (i->op == OP_VFETCH || i->op == OP_EXPORT) && 2754a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->src(0).isIndirect(0)) { 2755a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin doAfetch = true; 2756a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin } 2757a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin if (targ->getChipset() >= NVISA_GM107_CHIPSET && 2758a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin (i->op == OP_LINTERP || i->op == OP_PINTERP) && 2759a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->src(0).isIndirect(0)) { 2760a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin doAfetch = true; 2761a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin } 2762a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin 2763a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin if (doAfetch) { 2764a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin Value *addr = cloneShallow(func, i->getSrc(0)); 27659d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(), 2766a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->getSrc(0)); 27679d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin afetch->setIndirect(0, 0, i->getIndirect(0, 0)); 2768a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin addr->reg.data.offset = 0; 2769a8c0c7301cccd6bba1842ae8aa901cd45d9d5ffdIlia Mirkin i->setSrc(0, addr); 27709d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin i->setIndirect(0, 0, afetch->getDef(0)); 27719d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin } 27729d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin 27739d60793a03e40e1d139b78fce0144cad57438741Ilia Mirkin return ret; 277457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 277557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 277657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 277757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerTargetNVC0::runLegalizePass(Program *prog, CGStage stage) const 277857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 277957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_PRE_SSA) { 278057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LoweringPass pass(prog); 278157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 278257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 278357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_POST_RA) { 2784e44089b2f79aa2dcaacf348911433d1e21235c0cChristoph Bumiller NVC0LegalizePostRA pass(prog); 278557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 278657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 278757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (stage == CG_STAGE_SSA) { 278857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller NVC0LegalizeSSA pass; 278957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return pass.run(prog, false, true); 279057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 279157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 279257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 279357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 279457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} // namespace nv50_ir 2795