12cd769179345799d383f92dd615991755ec24be1Vadim Girlin/* 22cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 32cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 42cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Permission is hereby granted, free of charge, to any person obtaining a 52cd769179345799d383f92dd615991755ec24be1Vadim Girlin * copy of this software and associated documentation files (the "Software"), 62cd769179345799d383f92dd615991755ec24be1Vadim Girlin * to deal in the Software without restriction, including without limitation 72cd769179345799d383f92dd615991755ec24be1Vadim Girlin * on the rights to use, copy, modify, merge, publish, distribute, sub 82cd769179345799d383f92dd615991755ec24be1Vadim Girlin * license, and/or sell copies of the Software, and to permit persons to whom 92cd769179345799d383f92dd615991755ec24be1Vadim Girlin * the Software is furnished to do so, subject to the following conditions: 102cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 112cd769179345799d383f92dd615991755ec24be1Vadim Girlin * The above copyright notice and this permission notice (including the next 122cd769179345799d383f92dd615991755ec24be1Vadim Girlin * paragraph) shall be included in all copies or substantial portions of the 132cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Software. 142cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 152cd769179345799d383f92dd615991755ec24be1Vadim Girlin * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 162cd769179345799d383f92dd615991755ec24be1Vadim Girlin * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 172cd769179345799d383f92dd615991755ec24be1Vadim Girlin * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 182cd769179345799d383f92dd615991755ec24be1Vadim Girlin * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 192cd769179345799d383f92dd615991755ec24be1Vadim Girlin * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 202cd769179345799d383f92dd615991755ec24be1Vadim Girlin * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 212cd769179345799d383f92dd615991755ec24be1Vadim Girlin * USE OR OTHER DEALINGS IN THE SOFTWARE. 222cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 232cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Authors: 242cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Vadim Girlin 252cd769179345799d383f92dd615991755ec24be1Vadim Girlin */ 262cd769179345799d383f92dd615991755ec24be1Vadim Girlin 272cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DEBUG 0 282cd769179345799d383f92dd615991755ec24be1Vadim Girlin 292cd769179345799d383f92dd615991755ec24be1Vadim Girlin#if BCP_DEBUG 302cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DUMP(q) do { q } while (0) 312cd769179345799d383f92dd615991755ec24be1Vadim Girlin#else 322cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DUMP(q) 332cd769179345799d383f92dd615991755ec24be1Vadim Girlin#endif 342cd769179345799d383f92dd615991755ec24be1Vadim Girlin 352cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "r600_pipe.h" 362cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "r600_shader.h" 3780c5062abfdef28e23615f44b214760449f6a582Glenn Kennard#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 382cd769179345799d383f92dd615991755ec24be1Vadim Girlin 392cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include <stack> 402cd769179345799d383f92dd615991755ec24be1Vadim Girlin 412cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_bc.h" 422cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_shader.h" 432cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_pass.h" 44322cd2457ccf66a0a88d92f0b0dec1cb3f93eae4Jan Vesely#include "util/macros.h" 452cd769179345799d383f92dd615991755ec24be1Vadim Girlin 462cd769179345799d383f92dd615991755ec24be1Vadim Girlinnamespace r600_sb { 472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode() { 492cd769179345799d383f92dd615991755ec24be1Vadim Girlin 502cd769179345799d383f92dd615991755ec24be1Vadim Girlin dw = bc->bytecode; 512cd769179345799d383f92dd615991755ec24be1Vadim Girlin bc_ndw = bc->ndw; 522cd769179345799d383f92dd615991755ec24be1Vadim Girlin max_cf = 0; 532cd769179345799d383f92dd615991755ec24be1Vadim Girlin 542cd769179345799d383f92dd615991755ec24be1Vadim Girlin dec = new bc_decoder(ctx, dw, bc_ndw); 552cd769179345799d383f92dd615991755ec24be1Vadim Girlin 562cd769179345799d383f92dd615991755ec24be1Vadim Girlin shader_target t = TARGET_UNKNOWN; 572cd769179345799d383f92dd615991755ec24be1Vadim Girlin 582cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (pshader) { 592cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (bc->type) { 60af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break; 61af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák case PIPE_SHADER_VERTEX: 62797012bb67be78b3907d39626900e55f179f3792Dave Airlie t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS); 631371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin break; 64af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break; 65af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break; 66af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break; 67af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break; 682cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: assert(!"unknown shader target"); return -1; break; 692cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 702cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 71af249a7da9bf2621ab836d5074ef692677b11bbfMarek Olšák if (bc->type == PIPE_SHADER_COMPUTE) 722cd769179345799d383f92dd615991755ec24be1Vadim Girlin t = TARGET_COMPUTE; 732cd769179345799d383f92dd615991755ec24be1Vadim Girlin else 742cd769179345799d383f92dd615991755ec24be1Vadim Girlin t = TARGET_FETCH; 752cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 762cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin sh = new shader(ctx, t, bc->debug_id); 78758ac6f91894c105c83a193e8f4f6ead06962949Vadim Girlin sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); 79758ac6f91894c105c83a193e8f4f6ead06962949Vadim Girlin 8057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin int r = decode_shader(); 812cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8246dfad8b36dda80982613a9d29b9a7e99db3abfbVadim Girlin delete dec; 8346dfad8b36dda80982613a9d29b9a7e99db3abfbVadim Girlin 843f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin sh->ngpr = bc->ngpr; 853f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin sh->nstack = bc->nstack; 863f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin 872cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 882cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 892cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_shader() { 912cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r = 0; 922cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned i = 0; 932cd769179345799d383f92dd615991755ec24be1Vadim Girlin bool eop = false; 942cd769179345799d383f92dd615991755ec24be1Vadim Girlin 952cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->init(); 962cd769179345799d383f92dd615991755ec24be1Vadim Girlin 972cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 982cd769179345799d383f92dd615991755ec24be1Vadim Girlin eop = false; 9957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = decode_cf(i, eop))) 1002cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 1012cd769179345799d383f92dd615991755ec24be1Vadim Girlin 102a830225adbb77073272961df409885cca6b861eeGlenn Kennard } while (!eop || (i >> 1) < max_cf); 1032cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1042cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 1052cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 1062cd769179345799d383f92dd615991755ec24be1Vadim Girlin 10757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare() { 10857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin int r = 0; 10957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = parse_decls())) 11057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return r; 11157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = prepare_ir())) 11257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return r; 11357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 11457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 1152cd769179345799d383f92dd615991755ec24be1Vadim Girlin 11657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::parse_decls() { 1172cd769179345799d383f92dd615991755ec24be1Vadim Girlin 11857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (!pshader) { 1195a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (gpr_reladdr) 1205a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin sh->add_gpr_array(0, bc->ngpr, 0x0F); 12144a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin 12244a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin // compute shaders have some values preloaded in R0, R1 12344a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); 12444a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); 12557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 12657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 1272cd769179345799d383f92dd615991755ec24be1Vadim Girlin 12880c5062abfdef28e23615f44b214760449f6a582Glenn Kennard if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) { 1292cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1302cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(pshader->num_arrays); 1312cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1322cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (pshader->num_arrays) { 1332cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (unsigned i = 0; i < pshader->num_arrays; ++i) { 1342cd769179345799d383f92dd615991755ec24be1Vadim Girlin r600_shader_array &a = pshader->arrays[i]; 1352cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 1362cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1372cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 138a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); 1392cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1402cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1412cd769179345799d383f92dd615991755ec24be1Vadim Girlin 142f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard // GS inputs can add indirect addressing 143f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (sh->target == TARGET_GS) { 144f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (pshader->num_arrays) { 145f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard for (unsigned i = 0; i < pshader->num_arrays; ++i) { 146f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard r600_shader_array &a = pshader->arrays[i]; 147f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 148f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 149f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 150f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 151f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard 152797012bb67be78b3907d39626900e55f179f3792Dave Airlie if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS) 153a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_input(0, 1, 0x0F); 1541371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin else if (sh->target == TARGET_GS) { 1551371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin sh->add_input(0, 1, 0x0F); 1561371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin sh->add_input(1, 1, 0x0F); 1571371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin } 1582cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1592cd769179345799d383f92dd615991755ec24be1Vadim Girlin bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN 1602cd769179345799d383f92dd615991755ec24be1Vadim Girlin && sh->target == TARGET_PS; 1612cd769179345799d383f92dd615991755ec24be1Vadim Girlin 162a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard bool ij_interpolators[6]; 163a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard memset(ij_interpolators, 0, sizeof(ij_interpolators)); 1642cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1652cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (unsigned i = 0; i < pshader->ninput; ++i) { 1662cd769179345799d383f92dd615991755ec24be1Vadim Girlin r600_shader_io & in = pshader->input[i]; 1672cd769179345799d383f92dd615991755ec24be1Vadim Girlin bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); 168a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); 1692cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (ps_interp && in.spi_sid) { 170a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); 171a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard if (k >= 0) 172a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard ij_interpolators[k] |= true; 1732cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1742cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1752cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1762cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (ps_interp) { 177a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard /* add the egcm ij interpolators to live inputs */ 178a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard unsigned num_ij = 0; 17947b390fe45e5e6f982c60b58985892438959cd8eJan Vesely for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) { 180a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard num_ij += ij_interpolators[i]; 181a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard } 182a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard 183a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard unsigned mask = (1 << (2 * num_ij)) - 1; 1842cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned gpr = 0; 1852cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1862cd769179345799d383f92dd615991755ec24be1Vadim Girlin while (mask) { 187a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_input(gpr, true, mask & 0x0F); 1882cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++gpr; 1892cd769179345799d383f92dd615991755ec24be1Vadim Girlin mask >>= 4; 1902cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1912cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1932cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 1942cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 1952cd769179345799d383f92dd615991755ec24be1Vadim Girlin 19657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_cf(unsigned &i, bool &eop) { 1972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1982cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r; 1992cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2002cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *cf = sh->create_cf(); 2012cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->root->push_back(cf); 2022cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2032cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned id = i >> 1; 2042cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2052cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf->bc.id = id; 2062cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2072cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (cf_map.size() < id + 1) 2082cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_map.resize(id + 1); 2092cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2102cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_map[id] = cf; 2112cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2122cd769179345799d383f92dd615991755ec24be1Vadim Girlin if ((r = dec->decode_cf(i, cf->bc))) 2132cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 2142cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2152cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; 2162cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2172cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & CF_ALU) { 21857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = decode_alu_clause(cf))) 2192cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 2202cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_FETCH) { 22157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = decode_fetch_clause(cf))) 222e129e6eb89e749015f55b827b7fd45c817149f21Jakob Sinclair return r; 2232cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_EXP) { 2245a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (cf->bc.rw_rel) 2255a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 2262cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!cf->bc.rw_rel); 2275758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie } else if (flags & CF_MEM) { 2285a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (cf->bc.rw_rel) 2295a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 2302cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!cf->bc.rw_rel); 2312cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_BRANCH) { 2322cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (cf->bc.addr > max_cf) 2332cd769179345799d383f92dd615991755ec24be1Vadim Girlin max_cf = cf->bc.addr; 2342cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 2352cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2362cd769179345799d383f92dd615991755ec24be1Vadim Girlin eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || 2372cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf->bc.op == CF_OP_RET; 2382cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 2392cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 2402cd769179345799d383f92dd615991755ec24be1Vadim Girlin 24157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_alu_clause(cf_node* cf) { 2422cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; 2432cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2443f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin cf->subtype = NST_ALU_CLAUSE; 2453f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin 2462cd769179345799d383f92dd615991755ec24be1Vadim Girlin cgroup = 0; 2472cd769179345799d383f92dd615991755ec24be1Vadim Girlin memset(slots[0], 0, 5*sizeof(slots[0][0])); 2482cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2492cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned ng = 0; 2502cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2512cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 25257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin decode_alu_group(cf, i, gcnt); 2532cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(gcnt <= cnt); 2542cd769179345799d383f92dd615991755ec24be1Vadim Girlin cnt -= gcnt; 2552cd769179345799d383f92dd615991755ec24be1Vadim Girlin ng++; 2562cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (cnt); 2572cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2582cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 2592cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 2602cd769179345799d383f92dd615991755ec24be1Vadim Girlin 26157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { 2622cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r; 2632cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_node *n; 2642cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_group_node *g = sh->create_alu_group(); 2652cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2662cd769179345799d383f92dd615991755ec24be1Vadim Girlin cgroup = !cgroup; 2672cd769179345799d383f92dd615991755ec24be1Vadim Girlin memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 2682cd769179345799d383f92dd615991755ec24be1Vadim Girlin gcnt = 0; 2692cd769179345799d383f92dd615991755ec24be1Vadim Girlin 27057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin unsigned literal_mask = 0; 27157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 2722cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 2732cd769179345799d383f92dd615991755ec24be1Vadim Girlin n = sh->create_alu(); 2742cd769179345799d383f92dd615991755ec24be1Vadim Girlin g->push_back(n); 2752cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2762cd769179345799d383f92dd615991755ec24be1Vadim Girlin if ((r = dec->decode_alu(i, n->bc))) 2772cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 2782cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2792cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!sh->assign_slot(n, slots[cgroup])) { 2802cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!"alu slot assignment failed"); 2812cd769179345799d383f92dd615991755ec24be1Vadim Girlin return -1; 2822cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 2832cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2842cd769179345799d383f92dd615991755ec24be1Vadim Girlin gcnt++; 2852cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2862cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (gcnt <= 5 && !n->bc.last); 2872cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2882cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(n->bc.last); 2892cd769179345799d383f92dd615991755ec24be1Vadim Girlin 29057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 29157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin n = static_cast<alu_node*>(*I); 29257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 2935a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (n->bc.dst_rel) 2945a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 2955a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin 29657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { 29757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin bc_alu_src &src = n->bc.src[k]; 2985a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (src.rel) 2995a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 30057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (src.sel == ALU_SRC_LITERAL) { 30157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_mask |= (1 << src.chan); 30257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin src.value.u = dw[i + src.chan]; 30357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 30757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin unsigned literal_ndw = 0; 30857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin while (literal_mask) { 30957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin g->literals.push_back(dw[i + literal_ndw]); 31057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_ndw += 1; 31157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_mask >>= 1; 31257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 31357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_ndw = (literal_ndw + 1) & ~1u; 31557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin i += literal_ndw; 31757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin gcnt += literal_ndw >> 1; 31857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin cf->push_back(g); 32057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 32157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 32257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 32357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_alu_clause(cf_node* cf) { 32457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 32557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // loop over alu groups 32657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 32757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin assert(I->subtype == NST_ALU_GROUP); 32857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_group_node *g = static_cast<alu_group_node*>(*I); 32957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin prepare_alu_group(cf, g); 33057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 33157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 33257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 33357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 33457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 33580c5062abfdef28e23615f44b214760449f6a582Glenn Kennardvoid bc_parser::save_set_cf_index(value *val, unsigned idx) 33680c5062abfdef28e23615f44b214760449f6a582Glenn Kennard{ 33780c5062abfdef28e23615f44b214760449f6a582Glenn Kennard assert(idx <= 1); 33880c5062abfdef28e23615f44b214760449f6a582Glenn Kennard assert(val); 33980c5062abfdef28e23615f44b214760449f6a582Glenn Kennard cf_index_value[idx] = val; 34080c5062abfdef28e23615f44b214760449f6a582Glenn Kennard} 34180c5062abfdef28e23615f44b214760449f6a582Glenn Kennardvalue *bc_parser::get_cf_index_value(unsigned idx) 34280c5062abfdef28e23615f44b214760449f6a582Glenn Kennard{ 34380c5062abfdef28e23615f44b214760449f6a582Glenn Kennard assert(idx <= 1); 3441befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard assert(cf_index_value[idx]); 34580c5062abfdef28e23615f44b214760449f6a582Glenn Kennard return cf_index_value[idx]; 34680c5062abfdef28e23615f44b214760449f6a582Glenn Kennard} 34780c5062abfdef28e23615f44b214760449f6a582Glenn Kennardvoid bc_parser::save_mova(alu_node *mova) 34880c5062abfdef28e23615f44b214760449f6a582Glenn Kennard{ 34980c5062abfdef28e23615f44b214760449f6a582Glenn Kennard assert(mova); 35080c5062abfdef28e23615f44b214760449f6a582Glenn Kennard this->mova = mova; 35180c5062abfdef28e23615f44b214760449f6a582Glenn Kennard} 35280c5062abfdef28e23615f44b214760449f6a582Glenn Kennardalu_node *bc_parser::get_mova() 35380c5062abfdef28e23615f44b214760449f6a582Glenn Kennard{ 35480c5062abfdef28e23615f44b214760449f6a582Glenn Kennard assert(mova); 35580c5062abfdef28e23615f44b214760449f6a582Glenn Kennard return mova; 35680c5062abfdef28e23615f44b214760449f6a582Glenn Kennard} 35780c5062abfdef28e23615f44b214760449f6a582Glenn Kennard 35857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { 35957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 36057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_node *n; 36157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 36257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin cgroup = !cgroup; 36357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 3642cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3652cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (node_iterator I = g->begin(), E = g->end(); 3662cd769179345799d383f92dd615991755ec24be1Vadim Girlin I != E; ++I) { 3672cd769179345799d383f92dd615991755ec24be1Vadim Girlin n = static_cast<alu_node*>(*I); 3681befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard bool ubo_indexing[2] = {}; 36957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 37057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (!sh->assign_slot(n, slots[cgroup])) { 37157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin assert(!"alu slot assignment failed"); 37257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return -1; 37357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 37457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 3752cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned src_count = n->bc.op_ptr->src_count; 3762cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3772cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (ctx.alu_slots(n->bc.op) & AF_4SLOT) 3782cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_ALU_4SLOT; 3792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3802cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src.resize(src_count); 3812cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3822cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned flags = n->bc.op_ptr->flags; 3832cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3842cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & AF_PRED) { 3852cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(3); 3862cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.update_pred) 3872cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[1] = sh->get_special_value(SV_ALU_PRED); 3882cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.update_exec_mask) 3892cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[2] = sh->get_special_value(SV_EXEC_MASK); 3902cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3912cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_DONT_HOIST; 3922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3932cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & AF_KILL) { 3942cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3952cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(2); 3962cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[1] = sh->get_special_value(SV_VALID_MASK); 3972cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->set_uses_kill(); 3982cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3992cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | 4002cd769179345799d383f92dd615991755ec24be1Vadim Girlin NF_DONT_KILL | NF_SCHEDULE_EARLY; 4012cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4022cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 4032cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(1); 4042cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4052cd769179345799d383f92dd615991755ec24be1Vadim Girlin 40680c5062abfdef28e23615f44b214760449f6a582Glenn Kennard if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) { 40780c5062abfdef28e23615f44b214760449f6a582Glenn Kennard // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX 40880c5062abfdef28e23615f44b214760449f6a582Glenn Kennard // DCE will kill this op 40980c5062abfdef28e23615f44b214760449f6a582Glenn Kennard save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1); 41080c5062abfdef28e23615f44b214760449f6a582Glenn Kennard } else if (flags & AF_MOVA) { 4112cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4122cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[0] = sh->get_special_value(SV_AR_INDEX); 41380c5062abfdef28e23615f44b214760449f6a582Glenn Kennard save_mova(n); 4142cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4152cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_DONT_HOIST; 4162cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4172cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) { 4182cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); 4192cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4202cd769179345799d383f92dd615991755ec24be1Vadim Girlin value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, 4212cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->bc.dst_rel); 4222cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4232cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[0] = v; 4242cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4252cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4262cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.pred_sel) { 4272cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->has_alu_predication = true; 4282cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->pred = sh->get_special_value(SV_ALU_PRED); 4292cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4302cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4312cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (unsigned s = 0; s < src_count; ++s) { 4322cd769179345799d383f92dd615991755ec24be1Vadim Girlin bc_alu_src &src = n->bc.src[s]; 4332cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4342cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (src.sel == ALU_SRC_LITERAL) { 4352cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(src.value); 4362cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { 4372cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? 4382cd769179345799d383f92dd615991755ec24be1Vadim Girlin SLOT_TRANS : src.chan; 43996efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin 44096efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin // XXX shouldn't happen but llvm backend uses PS on cayman 44196efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin if (prev_slot == SLOT_TRANS && ctx.is_cayman()) 44296efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin prev_slot = SLOT_X; 44396efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin 4442cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_node *prev_alu = slots[pgroup][prev_slot]; 4452cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4462cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(prev_alu); 4472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4482cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!prev_alu->dst[0]) { 4492cd769179345799d383f92dd615991755ec24be1Vadim Girlin value * t = sh->create_temp_value(); 4502cd769179345799d383f92dd615991755ec24be1Vadim Girlin prev_alu->dst[0] = t; 4512cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4522cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4532cd769179345799d383f92dd615991755ec24be1Vadim Girlin value *d = prev_alu->dst[0]; 4542cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4552cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (d->is_rel()) { 4562cd769179345799d383f92dd615991755ec24be1Vadim Girlin d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, 4572cd769179345799d383f92dd615991755ec24be1Vadim Girlin prev_alu->bc.dst_chan, 4582cd769179345799d383f92dd615991755ec24be1Vadim Girlin prev_alu->bc.dst_rel); 4592cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4602cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4612cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = d; 4622cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (ctx.is_kcache_sel(src.sel)) { 4632cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned sel = src.sel, kc_addr; 4642cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); 4652cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4662cd769179345799d383f92dd615991755ec24be1Vadim Girlin bc_kcache &kc = cf->bc.kc[kc_set]; 4672cd769179345799d383f92dd615991755ec24be1Vadim Girlin kc_addr = (kc.addr << 4) + (sel & 0x1F); 4681befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode); 4691befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard 4701befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard if (kc.index_mode != KC_INDEX_NONE) { 4711befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard assert(kc.index_mode != KC_LOCK_LOOP); 4721befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard ubo_indexing[kc.index_mode - KC_INDEX_0] = true; 4731befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard } 4742cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (src.sel < MAX_GPR) { 4752cd769179345799d383f92dd615991755ec24be1Vadim Girlin value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); 4762cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4772cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = v; 4782cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4792cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { 4802cd769179345799d383f92dd615991755ec24be1Vadim Girlin // using slot for value channel because in fact the slot 4812cd769179345799d383f92dd615991755ec24be1Vadim Girlin // determines the channel that is loaded by INTERP_LOAD_P0 4822cd769179345799d383f92dd615991755ec24be1Vadim Girlin // (and maybe some others). 4832cd769179345799d383f92dd615991755ec24be1Vadim Girlin // otherwise GVN will consider INTERP_LOAD_P0s with the same 4842cd769179345799d383f92dd615991755ec24be1Vadim Girlin // param index as equal instructions and leave only one of them 4852cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, 4862cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->bc.slot)); 4872cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 4882cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (src.sel) { 4892cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_0: 4902cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(0); 4912cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4922cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_0_5: 4932cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(0.5f); 4942cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4952cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_1: 4962cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(1.0f); 4972cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4982cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_1_INT: 4992cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(1); 5002cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 5012cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_M_1_INT: 5022cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(-1); 5032cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 5042cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: 5052cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_special_ro_value(src.sel); 5062cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 5072cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5082cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5092cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5101befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard 5111befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard // add UBO index values if any as dependencies 5121befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard if (ubo_indexing[0]) { 5131befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard n->src.push_back(get_cf_index_value(0)); 5141befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard } 5151befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard if (ubo_indexing[1]) { 5161befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard n->src.push_back(get_cf_index_value(1)); 5171befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard } 5181befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard 51980c5062abfdef28e23615f44b214760449f6a582Glenn Kennard if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && 52080c5062abfdef28e23615f44b214760449f6a582Glenn Kennard ctx.is_cayman()) 52180c5062abfdef28e23615f44b214760449f6a582Glenn Kennard // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX 52280c5062abfdef28e23615f44b214760449f6a582Glenn Kennard save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); 5232cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5242cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5252cd769179345799d383f92dd615991755ec24be1Vadim Girlin // pack multislot instructions into alu_packed_node 5262cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5272cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_packed_node *p = NULL; 5282cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { 5292cd769179345799d383f92dd615991755ec24be1Vadim Girlin N = I + 1; 5302cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_node *a = static_cast<alu_node*>(*I); 5312cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned sflags = a->bc.slot_flags; 5322cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5332cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) { 5342cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!p) 5352cd769179345799d383f92dd615991755ec24be1Vadim Girlin p = sh->create_alu_packed(); 5362cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5372cd769179345799d383f92dd615991755ec24be1Vadim Girlin a->remove(); 5382cd769179345799d383f92dd615991755ec24be1Vadim Girlin p->push_back(a); 5392cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5402cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5412cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5422cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (p) { 5432cd769179345799d383f92dd615991755ec24be1Vadim Girlin g->push_front(p); 5442cd769179345799d383f92dd615991755ec24be1Vadim Girlin 54557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (p->count() == 3 && ctx.is_cayman()) { 54657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // cayman's scalar instruction that can use 3 or 4 slots 5472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 54857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // FIXME for simplicity we'll always add 4th slot, 54957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // but probably we might want to always remove 4th slot and make 55057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // sure that regalloc won't choose 'w' component for dst 5512cd769179345799d383f92dd615991755ec24be1Vadim Girlin 55257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_node *f = static_cast<alu_node*>(p->first); 55357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_node *a = sh->create_alu(); 55457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->src = f->src; 55557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->dst.resize(f->dst.size()); 55657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->bc = f->bc; 55757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->bc.slot = SLOT_W; 55857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin p->push_back(a); 55957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 56057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 5612cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5622cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 5632cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 5642cd769179345799d383f92dd615991755ec24be1Vadim Girlin 56557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_fetch_clause(cf_node* cf) { 5662cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r; 5672cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; 5682cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5693f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin cf->subtype = NST_TEX_CLAUSE; 5703f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin 5712cd769179345799d383f92dd615991755ec24be1Vadim Girlin while (cnt--) { 5722cd769179345799d383f92dd615991755ec24be1Vadim Girlin fetch_node *n = sh->create_fetch(); 5732cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf->push_back(n); 5742cd769179345799d383f92dd615991755ec24be1Vadim Girlin if ((r = dec->decode_fetch(i, n->bc))) 5752cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 5765a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (n->bc.src_rel || n->bc.dst_rel) 5775a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 5782cd769179345799d383f92dd615991755ec24be1Vadim Girlin 57957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 58057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 58157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 58257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 58357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_fetch_clause(cf_node *cf) { 58457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 585dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard vvec grad_v, grad_h, texture_offsets; 58657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 58757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 58857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 58957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin fetch_node *n = static_cast<fetch_node*>(*I); 59057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin assert(n->is_valid()); 59157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 5922cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned flags = n->bc.op_ptr->flags; 5932cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5942cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned vtx = flags & FF_VTX; 5952cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned num_src = vtx ? ctx.vtx_src_num : 4; 5962cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5972cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(4); 5982cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5992cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { 6002cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->uses_gradients = true; 6012cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6022cd769179345799d383f92dd615991755ec24be1Vadim Girlin 603dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) { 6042cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6052cd769179345799d383f92dd615991755ec24be1Vadim Girlin vvec *grad = NULL; 6062cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6072cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (n->bc.op) { 6082cd769179345799d383f92dd615991755ec24be1Vadim Girlin case FETCH_OP_SET_GRADIENTS_V: 6092cd769179345799d383f92dd615991755ec24be1Vadim Girlin grad = &grad_v; 6102cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 6112cd769179345799d383f92dd615991755ec24be1Vadim Girlin case FETCH_OP_SET_GRADIENTS_H: 6122cd769179345799d383f92dd615991755ec24be1Vadim Girlin grad = &grad_h; 6132cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 614dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard case FETCH_OP_SET_TEXTURE_OFFSETS: 615dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard grad = &texture_offsets; 616dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard break; 6172cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: 6182cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!"unexpected SET_GRAD instruction"); 6192cd769179345799d383f92dd615991755ec24be1Vadim Girlin return -1; 6202cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6212cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6222cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (grad->empty()) 6232cd769179345799d383f92dd615991755ec24be1Vadim Girlin grad->resize(4); 6242cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6252cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(unsigned s = 0; s < 4; ++s) { 6262cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned sw = n->bc.src_sel[s]; 6272cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (sw <= SEL_W) 6282cd769179345799d383f92dd615991755ec24be1Vadim Girlin (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, 6292cd769179345799d383f92dd615991755ec24be1Vadim Girlin sw, false); 6302cd769179345799d383f92dd615991755ec24be1Vadim Girlin else if (sw == SEL_0) 6312cd769179345799d383f92dd615991755ec24be1Vadim Girlin (*grad)[s] = sh->get_const_value(0.0f); 6322cd769179345799d383f92dd615991755ec24be1Vadim Girlin else if (sw == SEL_1) 6332cd769179345799d383f92dd615991755ec24be1Vadim Girlin (*grad)[s] = sh->get_const_value(1.0f); 6342cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6352cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 636dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard // Fold source values for instructions with hidden target values in to the instructions 637dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard // using them. The set instructions are later re-emitted by bc_finalizer 6382cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & FF_USEGRAD) { 6392cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src.resize(12); 6402cd769179345799d383f92dd615991755ec24be1Vadim Girlin std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); 6412cd769179345799d383f92dd615991755ec24be1Vadim Girlin std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); 642dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard } else if (flags & FF_USE_TEXTURE_OFFSETS) { 643dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard n->src.resize(8); 644dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4); 6452cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 6462cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src.resize(4); 6472cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6482cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6492cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 4; ++s) { 6502cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.dst_sel[s] != SEL_MASK) 6512cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); 6522cd769179345799d383f92dd615991755ec24be1Vadim Girlin // NOTE: it doesn't matter here which components of the result we 6532cd769179345799d383f92dd615991755ec24be1Vadim Girlin // are using, but original n->bc.dst_sel should be taken into 6542cd769179345799d383f92dd615991755ec24be1Vadim Girlin // account when building the bytecode 6552cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6562cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(unsigned s = 0; s < num_src; ++s) { 6572cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.src_sel[s] <= SEL_W) 6582cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, 6592cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->bc.src_sel[s], false); 6602cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6612cd769179345799d383f92dd615991755ec24be1Vadim Girlin 66280c5062abfdef28e23615f44b214760449f6a582Glenn Kennard // Scheduler will emit the appropriate instructions to set CF_IDX0/1 66380c5062abfdef28e23615f44b214760449f6a582Glenn Kennard if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { 66480c5062abfdef28e23615f44b214760449f6a582Glenn Kennard n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1)); 66580c5062abfdef28e23615f44b214760449f6a582Glenn Kennard } 6661befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { 6671befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1)); 6681befb7ed9856381cbfe874f361fae73b8e331bb4Glenn Kennard } 6692cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6702cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 67157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 6722cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 6732cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 6742cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6752cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_ir() { 6762cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6772cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { 6782cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *c = *I; 6792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6802cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!c) 6812cd769179345799d383f92dd615991755ec24be1Vadim Girlin continue; 6822cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6832cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned flags = c->bc.op_ptr->flags; 6842cd769179345799d383f92dd615991755ec24be1Vadim Girlin 68557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (flags & CF_ALU) { 68657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin prepare_alu_clause(c); 68757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } else if (flags & CF_FETCH) { 68857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin prepare_fetch_clause(c); 68957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } else if (c->bc.op == CF_OP_CALL_FS) { 69057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin sh->init_call_fs(c); 69157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; 69257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } else if (flags & CF_LOOP_START) { 6932cd769179345799d383f92dd615991755ec24be1Vadim Girlin prepare_loop(c); 6942cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_JUMP) { 6952cd769179345799d383f92dd615991755ec24be1Vadim Girlin prepare_if(c); 6962cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_LOOP_END) { 6972cd769179345799d383f92dd615991755ec24be1Vadim Girlin loop_stack.pop(); 6982cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { 6992cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!loop_stack.empty()); 7002cd769179345799d383f92dd615991755ec24be1Vadim Girlin repeat_node *rep = sh->create_repeat(loop_stack.top()); 7012cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->parent->first != c) 7022cd769179345799d383f92dd615991755ec24be1Vadim Girlin rep->move(c->parent->first, c); 7032cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->replace_with(rep); 7042cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->simplify_dep_rep(rep); 7052cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_LOOP_BREAK) { 7062cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!loop_stack.empty()); 7072cd769179345799d383f92dd615991755ec24be1Vadim Girlin depart_node *dep = sh->create_depart(loop_stack.top()); 7082cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->parent->first != c) 7092cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep->move(c->parent->first, c); 7102cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->replace_with(dep); 7112cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->simplify_dep_rep(dep); 7122cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_EXP) { 7132cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7142cd769179345799d383f92dd615991755ec24be1Vadim Girlin // unroll burst exports 7152cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7162cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); 7172cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7182cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.set_op(CF_OP_EXPORT); 7192cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7202cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned burst_count = c->bc.burst_count; 7212cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned eop = c->bc.end_of_program; 7222cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7232cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = 0; 7242cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.burst_count = 0; 7252cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7262cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 7272cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src.resize(4); 7282cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7292cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 4; ++s) { 7302cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (c->bc.sel[s]) { 7312cd769179345799d383f92dd615991755ec24be1Vadim Girlin case SEL_0: 7322cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = sh->get_const_value(0.0f); 7332cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 7342cd769179345799d383f92dd615991755ec24be1Vadim Girlin case SEL_1: 7352cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = sh->get_const_value(1.0f); 7362cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 7372cd769179345799d383f92dd615991755ec24be1Vadim Girlin case SEL_MASK: 7382cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 7392cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: 7402cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->bc.sel[s] <= SEL_W) 7412cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, 7422cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.sel[s], false); 7432cd769179345799d383f92dd615991755ec24be1Vadim Girlin else 7442cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!"invalid src_sel for export"); 7452cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7462cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7482cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!burst_count--) 7492cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 7502cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7512cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *cf_next = sh->create_cf(); 7522cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_next->bc = c->bc; 7532cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++cf_next->bc.rw_gpr; 7542cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++cf_next->bc.array_base; 7552cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7562cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_after(cf_next); 7572cd769179345799d383f92dd615991755ec24be1Vadim Girlin c = cf_next; 7582cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7592cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (1); 7602cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7612cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = eop; 7625758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie } else if (flags & CF_MEM) { 7632cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7642cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned burst_count = c->bc.burst_count; 7652cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned eop = c->bc.end_of_program; 7662cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7672cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = 0; 7682cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.burst_count = 0; 7692cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7702cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 7712cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7722cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src.resize(4); 7732cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7742cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 4; ++s) { 7752cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->bc.comp_mask & (1 << s)) 7762cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = 7772cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->get_gpr_value(true, c->bc.rw_gpr, s, false); 7782cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7805758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write 7812cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src.resize(8); 7822cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 3; ++s) { 7832cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[4 + s] = 7842cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->get_gpr_value(true, c->bc.index_gpr, s, false); 7852cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7862cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7872cd769179345799d383f92dd615991755ec24be1Vadim Girlin // FIXME probably we can relax it a bit 7882cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; 7892cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7902cd769179345799d383f92dd615991755ec24be1Vadim Girlin 791f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (flags & CF_EMIT) { 792f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX 793f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 794f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 795f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (sh->target == TARGET_ES) { 796f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard // For ES shaders this is an export 797f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->flags |= NF_DONT_KILL; 798f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 799f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 800f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard 8012cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!burst_count--) 8022cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 8032cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8042cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *cf_next = sh->create_cf(); 8052cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_next->bc = c->bc; 8062cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++cf_next->bc.rw_gpr; 8072cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8082cd769179345799d383f92dd615991755ec24be1Vadim Girlin // FIXME is it correct? 8092cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_next->bc.array_base += cf_next->bc.elem_size + 1; 8102cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8112cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_after(cf_next); 8122cd769179345799d383f92dd615991755ec24be1Vadim Girlin c = cf_next; 8132cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (1); 8142cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8152cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = eop; 8162cd769179345799d383f92dd615991755ec24be1Vadim Girlin 817f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } else if (flags & CF_EMIT) { 8183d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie /* quick peephole */ 8193d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie cf_node *prev = static_cast<cf_node *>(c->prev); 8203d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie if (c->bc.op == CF_OP_CUT_VERTEX && 8213d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie prev && prev->is_valid() && 8223d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie prev->bc.op == CF_OP_EMIT_VERTEX && 8233d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie c->bc.count == prev->bc.count) { 8243d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX); 8253d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie prev->bc.end_of_program = c->bc.end_of_program; 8263d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie c->remove(); 8273d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie } 8283d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie else { 8293d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; 830f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard 8313d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 8323d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 8333d497e0d915df8b71cd845c2cfbc6703db313628Dave Airlie } 8342cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 8352cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 8362cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8372cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(loop_stack.empty()); 8382cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 8392cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 8402cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8412cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_loop(cf_node* c) { 842a830225adbb77073272961df409885cca6b861eeGlenn Kennard assert(c->bc.addr-1 < cf_map.size()); 8432cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8442cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *end = cf_map[c->bc.addr - 1]; 8452cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(end->bc.op == CF_OP_LOOP_END); 8462cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(c->parent == end->parent); 8472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8482cd769179345799d383f92dd615991755ec24be1Vadim Girlin region_node *reg = sh->create_region(); 8492cd769179345799d383f92dd615991755ec24be1Vadim Girlin repeat_node *rep = sh->create_repeat(reg); 8502cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8512cd769179345799d383f92dd615991755ec24be1Vadim Girlin reg->push_back(rep); 8522cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_before(reg); 8532cd769179345799d383f92dd615991755ec24be1Vadim Girlin rep->move(c, end->next); 8542cd769179345799d383f92dd615991755ec24be1Vadim Girlin 855de0fd375f6de8f3357d05decc4a7dc231c679645Vadim Girlin reg->src_loop = true; 856de0fd375f6de8f3357d05decc4a7dc231c679645Vadim Girlin 8572cd769179345799d383f92dd615991755ec24be1Vadim Girlin loop_stack.push(reg); 8582cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 8592cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 8602cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8612cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_if(cf_node* c) { 862a830225adbb77073272961df409885cca6b861eeGlenn Kennard assert(c->bc.addr-1 < cf_map.size()); 8632cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; 8642cd769179345799d383f92dd615991755ec24be1Vadim Girlin 865608c7b4a63d5818f7ae0b3d48496b02cf8458d9bGlenn Kennard if (!end) 866608c7b4a63d5818f7ae0b3d48496b02cf8458d9bGlenn Kennard return 0; // not quite sure how this happens, malformed input? 867608c7b4a63d5818f7ae0b3d48496b02cf8458d9bGlenn Kennard 8682cd769179345799d383f92dd615991755ec24be1Vadim Girlin BCP_DUMP( 869ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << "parsing JUMP @" << c->bc.id; 870ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << "\n"; 8712cd769179345799d383f92dd615991755ec24be1Vadim Girlin ); 8722cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8732cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (end->bc.op == CF_OP_ELSE) { 8742cd769179345799d383f92dd615991755ec24be1Vadim Girlin BCP_DUMP( 875ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << " found ELSE : "; 8762cd769179345799d383f92dd615991755ec24be1Vadim Girlin dump::dump_op(end); 877ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << "\n"; 8782cd769179345799d383f92dd615991755ec24be1Vadim Girlin ); 8792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8802cd769179345799d383f92dd615991755ec24be1Vadim Girlin c_else = end; 8812cd769179345799d383f92dd615991755ec24be1Vadim Girlin end = cf_map[c_else->bc.addr]; 8822cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 8832cd769179345799d383f92dd615991755ec24be1Vadim Girlin BCP_DUMP( 884ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << " no else\n"; 8852cd769179345799d383f92dd615991755ec24be1Vadim Girlin ); 8862cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8872cd769179345799d383f92dd615991755ec24be1Vadim Girlin c_else = end; 8882cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 8892cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8902cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c_else->parent != c->parent) 8912cd769179345799d383f92dd615991755ec24be1Vadim Girlin c_else = NULL; 8922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 893608c7b4a63d5818f7ae0b3d48496b02cf8458d9bGlenn Kennard if (end && end->parent != c->parent) 8942cd769179345799d383f92dd615991755ec24be1Vadim Girlin end = NULL; 8952cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8962cd769179345799d383f92dd615991755ec24be1Vadim Girlin region_node *reg = sh->create_region(); 8972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8982cd769179345799d383f92dd615991755ec24be1Vadim Girlin depart_node *dep2 = sh->create_depart(reg); 8992cd769179345799d383f92dd615991755ec24be1Vadim Girlin depart_node *dep = sh->create_depart(reg); 9002cd769179345799d383f92dd615991755ec24be1Vadim Girlin if_node *n_if = sh->create_if(); 9012cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9022cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_before(reg); 9032cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9042cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c_else != end) 9052cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep->move(c_else, end); 9062cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep2->move(c, end); 9072cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9082cd769179345799d383f92dd615991755ec24be1Vadim Girlin reg->push_back(dep); 9092cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep->push_front(n_if); 9102cd769179345799d383f92dd615991755ec24be1Vadim Girlin n_if->push_back(dep2); 9112cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9122cd769179345799d383f92dd615991755ec24be1Vadim Girlin n_if->cond = sh->get_special_value(SV_EXEC_MASK); 9132cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9142cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 9152cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 9162cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9172cd769179345799d383f92dd615991755ec24be1Vadim Girlin 9182cd769179345799d383f92dd615991755ec24be1Vadim Girlin} // namespace r600_sb 919