sb_bc_parser.cpp revision f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5
12cd769179345799d383f92dd615991755ec24be1Vadim Girlin/* 22cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 32cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 42cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Permission is hereby granted, free of charge, to any person obtaining a 52cd769179345799d383f92dd615991755ec24be1Vadim Girlin * copy of this software and associated documentation files (the "Software"), 62cd769179345799d383f92dd615991755ec24be1Vadim Girlin * to deal in the Software without restriction, including without limitation 72cd769179345799d383f92dd615991755ec24be1Vadim Girlin * on the rights to use, copy, modify, merge, publish, distribute, sub 82cd769179345799d383f92dd615991755ec24be1Vadim Girlin * license, and/or sell copies of the Software, and to permit persons to whom 92cd769179345799d383f92dd615991755ec24be1Vadim Girlin * the Software is furnished to do so, subject to the following conditions: 102cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 112cd769179345799d383f92dd615991755ec24be1Vadim Girlin * The above copyright notice and this permission notice (including the next 122cd769179345799d383f92dd615991755ec24be1Vadim Girlin * paragraph) shall be included in all copies or substantial portions of the 132cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Software. 142cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 152cd769179345799d383f92dd615991755ec24be1Vadim Girlin * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 162cd769179345799d383f92dd615991755ec24be1Vadim Girlin * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 172cd769179345799d383f92dd615991755ec24be1Vadim Girlin * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 182cd769179345799d383f92dd615991755ec24be1Vadim Girlin * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 192cd769179345799d383f92dd615991755ec24be1Vadim Girlin * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 202cd769179345799d383f92dd615991755ec24be1Vadim Girlin * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 212cd769179345799d383f92dd615991755ec24be1Vadim Girlin * USE OR OTHER DEALINGS IN THE SOFTWARE. 222cd769179345799d383f92dd615991755ec24be1Vadim Girlin * 232cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Authors: 242cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Vadim Girlin 252cd769179345799d383f92dd615991755ec24be1Vadim Girlin */ 262cd769179345799d383f92dd615991755ec24be1Vadim Girlin 272cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DEBUG 0 282cd769179345799d383f92dd615991755ec24be1Vadim Girlin 292cd769179345799d383f92dd615991755ec24be1Vadim Girlin#if BCP_DEBUG 302cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DUMP(q) do { q } while (0) 312cd769179345799d383f92dd615991755ec24be1Vadim Girlin#else 322cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DUMP(q) 332cd769179345799d383f92dd615991755ec24be1Vadim Girlin#endif 342cd769179345799d383f92dd615991755ec24be1Vadim Girlin 352cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "r600_pipe.h" 362cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "r600_shader.h" 372cd769179345799d383f92dd615991755ec24be1Vadim Girlin 382cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include <stack> 392cd769179345799d383f92dd615991755ec24be1Vadim Girlin 402cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_bc.h" 412cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_shader.h" 422cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_pass.h" 432cd769179345799d383f92dd615991755ec24be1Vadim Girlin 442cd769179345799d383f92dd615991755ec24be1Vadim Girlinnamespace r600_sb { 452cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode() { 472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 482cd769179345799d383f92dd615991755ec24be1Vadim Girlin dw = bc->bytecode; 492cd769179345799d383f92dd615991755ec24be1Vadim Girlin bc_ndw = bc->ndw; 502cd769179345799d383f92dd615991755ec24be1Vadim Girlin max_cf = 0; 512cd769179345799d383f92dd615991755ec24be1Vadim Girlin 522cd769179345799d383f92dd615991755ec24be1Vadim Girlin dec = new bc_decoder(ctx, dw, bc_ndw); 532cd769179345799d383f92dd615991755ec24be1Vadim Girlin 542cd769179345799d383f92dd615991755ec24be1Vadim Girlin shader_target t = TARGET_UNKNOWN; 552cd769179345799d383f92dd615991755ec24be1Vadim Girlin 562cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (pshader) { 572cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (bc->type) { 582cd769179345799d383f92dd615991755ec24be1Vadim Girlin case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break; 591371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin case TGSI_PROCESSOR_VERTEX: 601371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin t = pshader->vs_as_es ? TARGET_ES : TARGET_VS; 611371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin break; 621371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break; 632cd769179345799d383f92dd615991755ec24be1Vadim Girlin case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break; 642cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: assert(!"unknown shader target"); return -1; break; 652cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 662cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 672cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (bc->type == TGSI_PROCESSOR_COMPUTE) 682cd769179345799d383f92dd615991755ec24be1Vadim Girlin t = TARGET_COMPUTE; 692cd769179345799d383f92dd615991755ec24be1Vadim Girlin else 702cd769179345799d383f92dd615991755ec24be1Vadim Girlin t = TARGET_FETCH; 712cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 722cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin sh = new shader(ctx, t, bc->debug_id); 74758ac6f91894c105c83a193e8f4f6ead06962949Vadim Girlin sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); 75758ac6f91894c105c83a193e8f4f6ead06962949Vadim Girlin 7657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin int r = decode_shader(); 772cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7846dfad8b36dda80982613a9d29b9a7e99db3abfbVadim Girlin delete dec; 7946dfad8b36dda80982613a9d29b9a7e99db3abfbVadim Girlin 803f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin sh->ngpr = bc->ngpr; 813f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin sh->nstack = bc->nstack; 823f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin 832cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 842cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 852cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_shader() { 872cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r = 0; 882cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned i = 0; 892cd769179345799d383f92dd615991755ec24be1Vadim Girlin bool eop = false; 902cd769179345799d383f92dd615991755ec24be1Vadim Girlin 912cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->init(); 922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 932cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 942cd769179345799d383f92dd615991755ec24be1Vadim Girlin eop = false; 9557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = decode_cf(i, eop))) 962cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 982cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (!eop || (i >> 1) <= max_cf); 992cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1002cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 1012cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 1022cd769179345799d383f92dd615991755ec24be1Vadim Girlin 10357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare() { 10457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin int r = 0; 10557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = parse_decls())) 10657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return r; 10757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = prepare_ir())) 10857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return r; 10957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 11057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 1112cd769179345799d383f92dd615991755ec24be1Vadim Girlin 11257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::parse_decls() { 1132cd769179345799d383f92dd615991755ec24be1Vadim Girlin 11457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (!pshader) { 1155a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (gpr_reladdr) 1165a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin sh->add_gpr_array(0, bc->ngpr, 0x0F); 11744a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin 11844a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin // compute shaders have some values preloaded in R0, R1 11944a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); 12044a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); 12157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 12257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 1232cd769179345799d383f92dd615991755ec24be1Vadim Girlin 12457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { 1252cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1262cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(pshader->num_arrays); 1272cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1282cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (pshader->num_arrays) { 1292cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (unsigned i = 0; i < pshader->num_arrays; ++i) { 1302cd769179345799d383f92dd615991755ec24be1Vadim Girlin r600_shader_array &a = pshader->arrays[i]; 1312cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 1322cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1332cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 134a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); 1352cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1362cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1372cd769179345799d383f92dd615991755ec24be1Vadim Girlin 138f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard // GS inputs can add indirect addressing 139f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (sh->target == TARGET_GS) { 140f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (pshader->num_arrays) { 141f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard for (unsigned i = 0; i < pshader->num_arrays; ++i) { 142f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard r600_shader_array &a = pshader->arrays[i]; 143f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 144f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 145f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 146f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 147f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard 1481371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin if (sh->target == TARGET_VS || sh->target == TARGET_ES) 149a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_input(0, 1, 0x0F); 1501371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin else if (sh->target == TARGET_GS) { 1511371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin sh->add_input(0, 1, 0x0F); 1521371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin sh->add_input(1, 1, 0x0F); 1531371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin } 1542cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1552cd769179345799d383f92dd615991755ec24be1Vadim Girlin bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN 1562cd769179345799d383f92dd615991755ec24be1Vadim Girlin && sh->target == TARGET_PS; 1572cd769179345799d383f92dd615991755ec24be1Vadim Girlin 158a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard bool ij_interpolators[6]; 159a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard memset(ij_interpolators, 0, sizeof(ij_interpolators)); 1602cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1612cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (unsigned i = 0; i < pshader->ninput; ++i) { 1622cd769179345799d383f92dd615991755ec24be1Vadim Girlin r600_shader_io & in = pshader->input[i]; 1632cd769179345799d383f92dd615991755ec24be1Vadim Girlin bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); 164a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); 1652cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (ps_interp && in.spi_sid) { 166a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); 167a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard if (k >= 0) 168a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard ij_interpolators[k] |= true; 1692cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1702cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1712cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1722cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (ps_interp) { 173a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard /* add the egcm ij interpolators to live inputs */ 174a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard unsigned num_ij = 0; 175a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard for (unsigned i = 0; i < Elements(ij_interpolators); i++) { 176a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard num_ij += ij_interpolators[i]; 177a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard } 178a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard 179a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard unsigned mask = (1 << (2 * num_ij)) - 1; 1802cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned gpr = 0; 1812cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1822cd769179345799d383f92dd615991755ec24be1Vadim Girlin while (mask) { 183a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin sh->add_input(gpr, true, mask & 0x0F); 1842cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++gpr; 1852cd769179345799d383f92dd615991755ec24be1Vadim Girlin mask >>= 4; 1862cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1872cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 1882cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1892cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 1902cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 1912cd769179345799d383f92dd615991755ec24be1Vadim Girlin 19257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_cf(unsigned &i, bool &eop) { 1932cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1942cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r; 1952cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1962cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *cf = sh->create_cf(); 1972cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->root->push_back(cf); 1982cd769179345799d383f92dd615991755ec24be1Vadim Girlin 1992cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned id = i >> 1; 2002cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2012cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf->bc.id = id; 2022cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2032cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (cf_map.size() < id + 1) 2042cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_map.resize(id + 1); 2052cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2062cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_map[id] = cf; 2072cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2082cd769179345799d383f92dd615991755ec24be1Vadim Girlin if ((r = dec->decode_cf(i, cf->bc))) 2092cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 2102cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2112cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; 2122cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2132cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & CF_ALU) { 21457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = decode_alu_clause(cf))) 2152cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 2162cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_FETCH) { 21757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if ((r = decode_fetch_clause(cf))) 2182cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r;; 2192cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_EXP) { 2205a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (cf->bc.rw_rel) 2215a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 2222cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!cf->bc.rw_rel); 2235758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie } else if (flags & CF_MEM) { 2245a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (cf->bc.rw_rel) 2255a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 2262cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!cf->bc.rw_rel); 2272cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_BRANCH) { 2282cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (cf->bc.addr > max_cf) 2292cd769179345799d383f92dd615991755ec24be1Vadim Girlin max_cf = cf->bc.addr; 2302cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 2312cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2322cd769179345799d383f92dd615991755ec24be1Vadim Girlin eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || 2332cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf->bc.op == CF_OP_RET; 2342cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 2352cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 2362cd769179345799d383f92dd615991755ec24be1Vadim Girlin 23757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_alu_clause(cf_node* cf) { 2382cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; 2392cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2403f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin cf->subtype = NST_ALU_CLAUSE; 2413f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin 2422cd769179345799d383f92dd615991755ec24be1Vadim Girlin cgroup = 0; 2432cd769179345799d383f92dd615991755ec24be1Vadim Girlin memset(slots[0], 0, 5*sizeof(slots[0][0])); 2442cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2452cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned ng = 0; 2462cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2472cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 24857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin decode_alu_group(cf, i, gcnt); 2492cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(gcnt <= cnt); 2502cd769179345799d383f92dd615991755ec24be1Vadim Girlin cnt -= gcnt; 2512cd769179345799d383f92dd615991755ec24be1Vadim Girlin ng++; 2522cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (cnt); 2532cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2542cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 2552cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 2562cd769179345799d383f92dd615991755ec24be1Vadim Girlin 25757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { 2582cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r; 2592cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_node *n; 2602cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_group_node *g = sh->create_alu_group(); 2612cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2622cd769179345799d383f92dd615991755ec24be1Vadim Girlin cgroup = !cgroup; 2632cd769179345799d383f92dd615991755ec24be1Vadim Girlin memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 2642cd769179345799d383f92dd615991755ec24be1Vadim Girlin gcnt = 0; 2652cd769179345799d383f92dd615991755ec24be1Vadim Girlin 26657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin unsigned literal_mask = 0; 26757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 2682cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 2692cd769179345799d383f92dd615991755ec24be1Vadim Girlin n = sh->create_alu(); 2702cd769179345799d383f92dd615991755ec24be1Vadim Girlin g->push_back(n); 2712cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2722cd769179345799d383f92dd615991755ec24be1Vadim Girlin if ((r = dec->decode_alu(i, n->bc))) 2732cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 2742cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2752cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!sh->assign_slot(n, slots[cgroup])) { 2762cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!"alu slot assignment failed"); 2772cd769179345799d383f92dd615991755ec24be1Vadim Girlin return -1; 2782cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 2792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2802cd769179345799d383f92dd615991755ec24be1Vadim Girlin gcnt++; 2812cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2822cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (gcnt <= 5 && !n->bc.last); 2832cd769179345799d383f92dd615991755ec24be1Vadim Girlin 2842cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(n->bc.last); 2852cd769179345799d383f92dd615991755ec24be1Vadim Girlin 28657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 28757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin n = static_cast<alu_node*>(*I); 28857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 2895a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (n->bc.dst_rel) 2905a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 2915a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin 29257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { 29357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin bc_alu_src &src = n->bc.src[k]; 2945a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (src.rel) 2955a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 29657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (src.sel == ALU_SRC_LITERAL) { 29757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_mask |= (1 << src.chan); 29857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin src.value.u = dw[i + src.chan]; 29957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 30357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin unsigned literal_ndw = 0; 30457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin while (literal_mask) { 30557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin g->literals.push_back(dw[i + literal_ndw]); 30657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_ndw += 1; 30757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_mask >>= 1; 30857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 30957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin literal_ndw = (literal_ndw + 1) & ~1u; 31157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin i += literal_ndw; 31357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin gcnt += literal_ndw >> 1; 31457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin cf->push_back(g); 31657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 31757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 31857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 31957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_alu_clause(cf_node* cf) { 32057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 32157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // loop over alu groups 32257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 32357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin assert(I->subtype == NST_ALU_GROUP); 32457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_group_node *g = static_cast<alu_group_node*>(*I); 32557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin prepare_alu_group(cf, g); 32657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 32757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 32857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 32957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 33057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 33157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { 33257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 33357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_node *n; 33457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 33557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin cgroup = !cgroup; 33657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 3372cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3382cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (node_iterator I = g->begin(), E = g->end(); 3392cd769179345799d383f92dd615991755ec24be1Vadim Girlin I != E; ++I) { 3402cd769179345799d383f92dd615991755ec24be1Vadim Girlin n = static_cast<alu_node*>(*I); 34157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 34257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (!sh->assign_slot(n, slots[cgroup])) { 34357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin assert(!"alu slot assignment failed"); 34457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return -1; 34557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 34657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 3472cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned src_count = n->bc.op_ptr->src_count; 3482cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3492cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (ctx.alu_slots(n->bc.op) & AF_4SLOT) 3502cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_ALU_4SLOT; 3512cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3522cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src.resize(src_count); 3532cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3542cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned flags = n->bc.op_ptr->flags; 3552cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3562cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & AF_PRED) { 3572cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(3); 3582cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.update_pred) 3592cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[1] = sh->get_special_value(SV_ALU_PRED); 3602cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.update_exec_mask) 3612cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[2] = sh->get_special_value(SV_EXEC_MASK); 3622cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3632cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_DONT_HOIST; 3642cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3652cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & AF_KILL) { 3662cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3672cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(2); 3682cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[1] = sh->get_special_value(SV_VALID_MASK); 3692cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->set_uses_kill(); 3702cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3712cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | 3722cd769179345799d383f92dd615991755ec24be1Vadim Girlin NF_DONT_KILL | NF_SCHEDULE_EARLY; 3732cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3742cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 3752cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(1); 3762cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 3772cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3782cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & AF_MOVA) { 3792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3802cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[0] = sh->get_special_value(SV_AR_INDEX); 3812cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3822cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->flags |= NF_DONT_HOIST; 3832cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3842cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) { 3852cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); 3862cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3872cd769179345799d383f92dd615991755ec24be1Vadim Girlin value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, 3882cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->bc.dst_rel); 3892cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3902cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[0] = v; 3912cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 3922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3932cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.pred_sel) { 3942cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->has_alu_predication = true; 3952cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->pred = sh->get_special_value(SV_ALU_PRED); 3962cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 3972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 3982cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (unsigned s = 0; s < src_count; ++s) { 3992cd769179345799d383f92dd615991755ec24be1Vadim Girlin bc_alu_src &src = n->bc.src[s]; 4002cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4012cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (src.sel == ALU_SRC_LITERAL) { 4022cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(src.value); 4032cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { 4042cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? 4052cd769179345799d383f92dd615991755ec24be1Vadim Girlin SLOT_TRANS : src.chan; 40696efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin 40796efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin // XXX shouldn't happen but llvm backend uses PS on cayman 40896efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin if (prev_slot == SLOT_TRANS && ctx.is_cayman()) 40996efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin prev_slot = SLOT_X; 41096efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin 4112cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_node *prev_alu = slots[pgroup][prev_slot]; 4122cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4132cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(prev_alu); 4142cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4152cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!prev_alu->dst[0]) { 4162cd769179345799d383f92dd615991755ec24be1Vadim Girlin value * t = sh->create_temp_value(); 4172cd769179345799d383f92dd615991755ec24be1Vadim Girlin prev_alu->dst[0] = t; 4182cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4192cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4202cd769179345799d383f92dd615991755ec24be1Vadim Girlin value *d = prev_alu->dst[0]; 4212cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4222cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (d->is_rel()) { 4232cd769179345799d383f92dd615991755ec24be1Vadim Girlin d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, 4242cd769179345799d383f92dd615991755ec24be1Vadim Girlin prev_alu->bc.dst_chan, 4252cd769179345799d383f92dd615991755ec24be1Vadim Girlin prev_alu->bc.dst_rel); 4262cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4272cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4282cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = d; 4292cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (ctx.is_kcache_sel(src.sel)) { 4302cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned sel = src.sel, kc_addr; 4312cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); 4322cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4332cd769179345799d383f92dd615991755ec24be1Vadim Girlin bc_kcache &kc = cf->bc.kc[kc_set]; 4342cd769179345799d383f92dd615991755ec24be1Vadim Girlin kc_addr = (kc.addr << 4) + (sel & 0x1F); 4352cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan); 4362cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (src.sel < MAX_GPR) { 4372cd769179345799d383f92dd615991755ec24be1Vadim Girlin value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); 4382cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4392cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = v; 4402cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4412cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { 4422cd769179345799d383f92dd615991755ec24be1Vadim Girlin // using slot for value channel because in fact the slot 4432cd769179345799d383f92dd615991755ec24be1Vadim Girlin // determines the channel that is loaded by INTERP_LOAD_P0 4442cd769179345799d383f92dd615991755ec24be1Vadim Girlin // (and maybe some others). 4452cd769179345799d383f92dd615991755ec24be1Vadim Girlin // otherwise GVN will consider INTERP_LOAD_P0s with the same 4462cd769179345799d383f92dd615991755ec24be1Vadim Girlin // param index as equal instructions and leave only one of them 4472cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, 4482cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->bc.slot)); 4492cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 4502cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (src.sel) { 4512cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_0: 4522cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(0); 4532cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4542cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_0_5: 4552cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(0.5f); 4562cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4572cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_1: 4582cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(1.0f); 4592cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4602cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_1_INT: 4612cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(1); 4622cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4632cd769179345799d383f92dd615991755ec24be1Vadim Girlin case ALU_SRC_M_1_INT: 4642cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_const_value(-1); 4652cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4662cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: 4672cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_special_ro_value(src.sel); 4682cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 4692cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4702cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4712cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4722cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4732cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4742cd769179345799d383f92dd615991755ec24be1Vadim Girlin // pack multislot instructions into alu_packed_node 4752cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4762cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_packed_node *p = NULL; 4772cd769179345799d383f92dd615991755ec24be1Vadim Girlin for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { 4782cd769179345799d383f92dd615991755ec24be1Vadim Girlin N = I + 1; 4792cd769179345799d383f92dd615991755ec24be1Vadim Girlin alu_node *a = static_cast<alu_node*>(*I); 4802cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned sflags = a->bc.slot_flags; 4812cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4822cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) { 4832cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!p) 4842cd769179345799d383f92dd615991755ec24be1Vadim Girlin p = sh->create_alu_packed(); 4852cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4862cd769179345799d383f92dd615991755ec24be1Vadim Girlin a->remove(); 4872cd769179345799d383f92dd615991755ec24be1Vadim Girlin p->push_back(a); 4882cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4892cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 4902cd769179345799d383f92dd615991755ec24be1Vadim Girlin 4912cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (p) { 4922cd769179345799d383f92dd615991755ec24be1Vadim Girlin g->push_front(p); 4932cd769179345799d383f92dd615991755ec24be1Vadim Girlin 49457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (p->count() == 3 && ctx.is_cayman()) { 49557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // cayman's scalar instruction that can use 3 or 4 slots 4962cd769179345799d383f92dd615991755ec24be1Vadim Girlin 49757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // FIXME for simplicity we'll always add 4th slot, 49857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // but probably we might want to always remove 4th slot and make 49957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin // sure that regalloc won't choose 'w' component for dst 5002cd769179345799d383f92dd615991755ec24be1Vadim Girlin 50157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_node *f = static_cast<alu_node*>(p->first); 50257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin alu_node *a = sh->create_alu(); 50357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->src = f->src; 50457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->dst.resize(f->dst.size()); 50557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->bc = f->bc; 50657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin a->bc.slot = SLOT_W; 50757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin p->push_back(a); 50857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 50957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 5102cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5112cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 5122cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 5132cd769179345799d383f92dd615991755ec24be1Vadim Girlin 51457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_fetch_clause(cf_node* cf) { 5152cd769179345799d383f92dd615991755ec24be1Vadim Girlin int r; 5162cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; 5172cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5183f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin cf->subtype = NST_TEX_CLAUSE; 5193f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin 5202cd769179345799d383f92dd615991755ec24be1Vadim Girlin while (cnt--) { 5212cd769179345799d383f92dd615991755ec24be1Vadim Girlin fetch_node *n = sh->create_fetch(); 5222cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf->push_back(n); 5232cd769179345799d383f92dd615991755ec24be1Vadim Girlin if ((r = dec->decode_fetch(i, n->bc))) 5242cd769179345799d383f92dd615991755ec24be1Vadim Girlin return r; 5255a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin if (n->bc.src_rel || n->bc.dst_rel) 5265a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin gpr_reladdr = true; 5272cd769179345799d383f92dd615991755ec24be1Vadim Girlin 52857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } 52957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin return 0; 53057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin} 53157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 53257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_fetch_clause(cf_node *cf) { 53357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 534dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard vvec grad_v, grad_h, texture_offsets; 53557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 53657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 53757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 53857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin fetch_node *n = static_cast<fetch_node*>(*I); 53957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin assert(n->is_valid()); 54057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 5412cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned flags = n->bc.op_ptr->flags; 5422cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5432cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned vtx = flags & FF_VTX; 5442cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned num_src = vtx ? ctx.vtx_src_num : 4; 5452cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5462cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst.resize(4); 5472cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5482cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { 5492cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->uses_gradients = true; 5502cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5512cd769179345799d383f92dd615991755ec24be1Vadim Girlin 552dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) { 5532cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5542cd769179345799d383f92dd615991755ec24be1Vadim Girlin vvec *grad = NULL; 5552cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5562cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (n->bc.op) { 5572cd769179345799d383f92dd615991755ec24be1Vadim Girlin case FETCH_OP_SET_GRADIENTS_V: 5582cd769179345799d383f92dd615991755ec24be1Vadim Girlin grad = &grad_v; 5592cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 5602cd769179345799d383f92dd615991755ec24be1Vadim Girlin case FETCH_OP_SET_GRADIENTS_H: 5612cd769179345799d383f92dd615991755ec24be1Vadim Girlin grad = &grad_h; 5622cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 563dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard case FETCH_OP_SET_TEXTURE_OFFSETS: 564dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard grad = &texture_offsets; 565dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard break; 5662cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: 5672cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!"unexpected SET_GRAD instruction"); 5682cd769179345799d383f92dd615991755ec24be1Vadim Girlin return -1; 5692cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5702cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5712cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (grad->empty()) 5722cd769179345799d383f92dd615991755ec24be1Vadim Girlin grad->resize(4); 5732cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5742cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(unsigned s = 0; s < 4; ++s) { 5752cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned sw = n->bc.src_sel[s]; 5762cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (sw <= SEL_W) 5772cd769179345799d383f92dd615991755ec24be1Vadim Girlin (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, 5782cd769179345799d383f92dd615991755ec24be1Vadim Girlin sw, false); 5792cd769179345799d383f92dd615991755ec24be1Vadim Girlin else if (sw == SEL_0) 5802cd769179345799d383f92dd615991755ec24be1Vadim Girlin (*grad)[s] = sh->get_const_value(0.0f); 5812cd769179345799d383f92dd615991755ec24be1Vadim Girlin else if (sw == SEL_1) 5822cd769179345799d383f92dd615991755ec24be1Vadim Girlin (*grad)[s] = sh->get_const_value(1.0f); 5832cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5842cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 585dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard // Fold source values for instructions with hidden target values in to the instructions 586dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard // using them. The set instructions are later re-emitted by bc_finalizer 5872cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (flags & FF_USEGRAD) { 5882cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src.resize(12); 5892cd769179345799d383f92dd615991755ec24be1Vadim Girlin std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); 5902cd769179345799d383f92dd615991755ec24be1Vadim Girlin std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); 591dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard } else if (flags & FF_USE_TEXTURE_OFFSETS) { 592dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard n->src.resize(8); 593dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4); 5942cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 5952cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src.resize(4); 5962cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 5972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 5982cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 4; ++s) { 5992cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.dst_sel[s] != SEL_MASK) 6002cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); 6012cd769179345799d383f92dd615991755ec24be1Vadim Girlin // NOTE: it doesn't matter here which components of the result we 6022cd769179345799d383f92dd615991755ec24be1Vadim Girlin // are using, but original n->bc.dst_sel should be taken into 6032cd769179345799d383f92dd615991755ec24be1Vadim Girlin // account when building the bytecode 6042cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6052cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(unsigned s = 0; s < num_src; ++s) { 6062cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (n->bc.src_sel[s] <= SEL_W) 6072cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, 6082cd769179345799d383f92dd615991755ec24be1Vadim Girlin n->bc.src_sel[s], false); 6092cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6102cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6112cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6122cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 61357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin 6142cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 6152cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 6162cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6172cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_ir() { 6182cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6192cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { 6202cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *c = *I; 6212cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6222cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!c) 6232cd769179345799d383f92dd615991755ec24be1Vadim Girlin continue; 6242cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6252cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned flags = c->bc.op_ptr->flags; 6262cd769179345799d383f92dd615991755ec24be1Vadim Girlin 62757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin if (flags & CF_ALU) { 62857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin prepare_alu_clause(c); 62957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } else if (flags & CF_FETCH) { 63057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin prepare_fetch_clause(c); 63157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } else if (c->bc.op == CF_OP_CALL_FS) { 63257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin sh->init_call_fs(c); 63357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; 63457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin } else if (flags & CF_LOOP_START) { 6352cd769179345799d383f92dd615991755ec24be1Vadim Girlin prepare_loop(c); 6362cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_JUMP) { 6372cd769179345799d383f92dd615991755ec24be1Vadim Girlin prepare_if(c); 6382cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_LOOP_END) { 6392cd769179345799d383f92dd615991755ec24be1Vadim Girlin loop_stack.pop(); 6402cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { 6412cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!loop_stack.empty()); 6422cd769179345799d383f92dd615991755ec24be1Vadim Girlin repeat_node *rep = sh->create_repeat(loop_stack.top()); 6432cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->parent->first != c) 6442cd769179345799d383f92dd615991755ec24be1Vadim Girlin rep->move(c->parent->first, c); 6452cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->replace_with(rep); 6462cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->simplify_dep_rep(rep); 6472cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (c->bc.op == CF_OP_LOOP_BREAK) { 6482cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!loop_stack.empty()); 6492cd769179345799d383f92dd615991755ec24be1Vadim Girlin depart_node *dep = sh->create_depart(loop_stack.top()); 6502cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->parent->first != c) 6512cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep->move(c->parent->first, c); 6522cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->replace_with(dep); 6532cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->simplify_dep_rep(dep); 6542cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else if (flags & CF_EXP) { 6552cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6562cd769179345799d383f92dd615991755ec24be1Vadim Girlin // unroll burst exports 6572cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6582cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); 6592cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6602cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.set_op(CF_OP_EXPORT); 6612cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6622cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned burst_count = c->bc.burst_count; 6632cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned eop = c->bc.end_of_program; 6642cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6652cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = 0; 6662cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.burst_count = 0; 6672cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6682cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 6692cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src.resize(4); 6702cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6712cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 4; ++s) { 6722cd769179345799d383f92dd615991755ec24be1Vadim Girlin switch (c->bc.sel[s]) { 6732cd769179345799d383f92dd615991755ec24be1Vadim Girlin case SEL_0: 6742cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = sh->get_const_value(0.0f); 6752cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 6762cd769179345799d383f92dd615991755ec24be1Vadim Girlin case SEL_1: 6772cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = sh->get_const_value(1.0f); 6782cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 6792cd769179345799d383f92dd615991755ec24be1Vadim Girlin case SEL_MASK: 6802cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 6812cd769179345799d383f92dd615991755ec24be1Vadim Girlin default: 6822cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->bc.sel[s] <= SEL_W) 6832cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, 6842cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.sel[s], false); 6852cd769179345799d383f92dd615991755ec24be1Vadim Girlin else 6862cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(!"invalid src_sel for export"); 6872cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6882cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 6892cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6902cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!burst_count--) 6912cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 6922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6932cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *cf_next = sh->create_cf(); 6942cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_next->bc = c->bc; 6952cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++cf_next->bc.rw_gpr; 6962cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++cf_next->bc.array_base; 6972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 6982cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_after(cf_next); 6992cd769179345799d383f92dd615991755ec24be1Vadim Girlin c = cf_next; 7002cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7012cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (1); 7022cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7032cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = eop; 7045758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie } else if (flags & CF_MEM) { 7052cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7062cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned burst_count = c->bc.burst_count; 7072cd769179345799d383f92dd615991755ec24be1Vadim Girlin unsigned eop = c->bc.end_of_program; 7082cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7092cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = 0; 7102cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.burst_count = 0; 7112cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7122cd769179345799d383f92dd615991755ec24be1Vadim Girlin do { 7132cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7142cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src.resize(4); 7152cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7162cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 4; ++s) { 7172cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c->bc.comp_mask & (1 << s)) 7182cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[s] = 7192cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->get_gpr_value(true, c->bc.rw_gpr, s, false); 7202cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7212cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7225758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write 7232cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src.resize(8); 7242cd769179345799d383f92dd615991755ec24be1Vadim Girlin for(int s = 0; s < 3; ++s) { 7252cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->src[4 + s] = 7262cd769179345799d383f92dd615991755ec24be1Vadim Girlin sh->get_gpr_value(true, c->bc.index_gpr, s, false); 7272cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7282cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7292cd769179345799d383f92dd615991755ec24be1Vadim Girlin // FIXME probably we can relax it a bit 7302cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; 7312cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7322cd769179345799d383f92dd615991755ec24be1Vadim Girlin 733f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (flags & CF_EMIT) { 734f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX 735f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 736f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 737f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard if (sh->target == TARGET_ES) { 738f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard // For ES shaders this is an export 739f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->flags |= NF_DONT_KILL; 740f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 741f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } 742f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard 7432cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (!burst_count--) 7442cd769179345799d383f92dd615991755ec24be1Vadim Girlin break; 7452cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7462cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *cf_next = sh->create_cf(); 7472cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_next->bc = c->bc; 7482cd769179345799d383f92dd615991755ec24be1Vadim Girlin ++cf_next->bc.rw_gpr; 7492cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7502cd769179345799d383f92dd615991755ec24be1Vadim Girlin // FIXME is it correct? 7512cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_next->bc.array_base += cf_next->bc.elem_size + 1; 7522cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7532cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_after(cf_next); 7542cd769179345799d383f92dd615991755ec24be1Vadim Girlin c = cf_next; 7552cd769179345799d383f92dd615991755ec24be1Vadim Girlin } while (1); 7562cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7572cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->bc.end_of_program = eop; 7582cd769179345799d383f92dd615991755ec24be1Vadim Girlin 759f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard } else if (flags & CF_EMIT) { 760f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; 761f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard 762f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 763f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 7642cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7652cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 7662cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7672cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(loop_stack.empty()); 7682cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 7692cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 7702cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7712cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_loop(cf_node* c) { 7722cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7732cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *end = cf_map[c->bc.addr - 1]; 7742cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(end->bc.op == CF_OP_LOOP_END); 7752cd769179345799d383f92dd615991755ec24be1Vadim Girlin assert(c->parent == end->parent); 7762cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7772cd769179345799d383f92dd615991755ec24be1Vadim Girlin region_node *reg = sh->create_region(); 7782cd769179345799d383f92dd615991755ec24be1Vadim Girlin repeat_node *rep = sh->create_repeat(reg); 7792cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7802cd769179345799d383f92dd615991755ec24be1Vadim Girlin reg->push_back(rep); 7812cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_before(reg); 7822cd769179345799d383f92dd615991755ec24be1Vadim Girlin rep->move(c, end->next); 7832cd769179345799d383f92dd615991755ec24be1Vadim Girlin 784de0fd375f6de8f3357d05decc4a7dc231c679645Vadim Girlin reg->src_loop = true; 785de0fd375f6de8f3357d05decc4a7dc231c679645Vadim Girlin 7862cd769179345799d383f92dd615991755ec24be1Vadim Girlin loop_stack.push(reg); 7872cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 7882cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 7892cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7902cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_if(cf_node* c) { 7912cd769179345799d383f92dd615991755ec24be1Vadim Girlin cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; 7922cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7932cd769179345799d383f92dd615991755ec24be1Vadim Girlin BCP_DUMP( 794ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << "parsing JUMP @" << c->bc.id; 795ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << "\n"; 7962cd769179345799d383f92dd615991755ec24be1Vadim Girlin ); 7972cd769179345799d383f92dd615991755ec24be1Vadim Girlin 7982cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (end->bc.op == CF_OP_ELSE) { 7992cd769179345799d383f92dd615991755ec24be1Vadim Girlin BCP_DUMP( 800ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << " found ELSE : "; 8012cd769179345799d383f92dd615991755ec24be1Vadim Girlin dump::dump_op(end); 802ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << "\n"; 8032cd769179345799d383f92dd615991755ec24be1Vadim Girlin ); 8042cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8052cd769179345799d383f92dd615991755ec24be1Vadim Girlin c_else = end; 8062cd769179345799d383f92dd615991755ec24be1Vadim Girlin end = cf_map[c_else->bc.addr]; 8072cd769179345799d383f92dd615991755ec24be1Vadim Girlin } else { 8082cd769179345799d383f92dd615991755ec24be1Vadim Girlin BCP_DUMP( 809ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin sblog << " no else\n"; 8102cd769179345799d383f92dd615991755ec24be1Vadim Girlin ); 8112cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8122cd769179345799d383f92dd615991755ec24be1Vadim Girlin c_else = end; 8132cd769179345799d383f92dd615991755ec24be1Vadim Girlin } 8142cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8152cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c_else->parent != c->parent) 8162cd769179345799d383f92dd615991755ec24be1Vadim Girlin c_else = NULL; 8172cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8182cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (end->parent != c->parent) 8192cd769179345799d383f92dd615991755ec24be1Vadim Girlin end = NULL; 8202cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8212cd769179345799d383f92dd615991755ec24be1Vadim Girlin region_node *reg = sh->create_region(); 8222cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8232cd769179345799d383f92dd615991755ec24be1Vadim Girlin depart_node *dep2 = sh->create_depart(reg); 8242cd769179345799d383f92dd615991755ec24be1Vadim Girlin depart_node *dep = sh->create_depart(reg); 8252cd769179345799d383f92dd615991755ec24be1Vadim Girlin if_node *n_if = sh->create_if(); 8262cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8272cd769179345799d383f92dd615991755ec24be1Vadim Girlin c->insert_before(reg); 8282cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8292cd769179345799d383f92dd615991755ec24be1Vadim Girlin if (c_else != end) 8302cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep->move(c_else, end); 8312cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep2->move(c, end); 8322cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8332cd769179345799d383f92dd615991755ec24be1Vadim Girlin reg->push_back(dep); 8342cd769179345799d383f92dd615991755ec24be1Vadim Girlin dep->push_front(n_if); 8352cd769179345799d383f92dd615991755ec24be1Vadim Girlin n_if->push_back(dep2); 8362cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8372cd769179345799d383f92dd615991755ec24be1Vadim Girlin n_if->cond = sh->get_special_value(SV_EXEC_MASK); 8382cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8392cd769179345799d383f92dd615991755ec24be1Vadim Girlin return 0; 8402cd769179345799d383f92dd615991755ec24be1Vadim Girlin} 8412cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8422cd769179345799d383f92dd615991755ec24be1Vadim Girlin 8432cd769179345799d383f92dd615991755ec24be1Vadim Girlin} // namespace r600_sb 844