sb_bc_parser.cpp revision f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5
12cd769179345799d383f92dd615991755ec24be1Vadim Girlin/*
22cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
32cd769179345799d383f92dd615991755ec24be1Vadim Girlin *
42cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Permission is hereby granted, free of charge, to any person obtaining a
52cd769179345799d383f92dd615991755ec24be1Vadim Girlin * copy of this software and associated documentation files (the "Software"),
62cd769179345799d383f92dd615991755ec24be1Vadim Girlin * to deal in the Software without restriction, including without limitation
72cd769179345799d383f92dd615991755ec24be1Vadim Girlin * on the rights to use, copy, modify, merge, publish, distribute, sub
82cd769179345799d383f92dd615991755ec24be1Vadim Girlin * license, and/or sell copies of the Software, and to permit persons to whom
92cd769179345799d383f92dd615991755ec24be1Vadim Girlin * the Software is furnished to do so, subject to the following conditions:
102cd769179345799d383f92dd615991755ec24be1Vadim Girlin *
112cd769179345799d383f92dd615991755ec24be1Vadim Girlin * The above copyright notice and this permission notice (including the next
122cd769179345799d383f92dd615991755ec24be1Vadim Girlin * paragraph) shall be included in all copies or substantial portions of the
132cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Software.
142cd769179345799d383f92dd615991755ec24be1Vadim Girlin *
152cd769179345799d383f92dd615991755ec24be1Vadim Girlin * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
162cd769179345799d383f92dd615991755ec24be1Vadim Girlin * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
172cd769179345799d383f92dd615991755ec24be1Vadim Girlin * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
182cd769179345799d383f92dd615991755ec24be1Vadim Girlin * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
192cd769179345799d383f92dd615991755ec24be1Vadim Girlin * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
202cd769179345799d383f92dd615991755ec24be1Vadim Girlin * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
212cd769179345799d383f92dd615991755ec24be1Vadim Girlin * USE OR OTHER DEALINGS IN THE SOFTWARE.
222cd769179345799d383f92dd615991755ec24be1Vadim Girlin *
232cd769179345799d383f92dd615991755ec24be1Vadim Girlin * Authors:
242cd769179345799d383f92dd615991755ec24be1Vadim Girlin *      Vadim Girlin
252cd769179345799d383f92dd615991755ec24be1Vadim Girlin */
262cd769179345799d383f92dd615991755ec24be1Vadim Girlin
272cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DEBUG 0
282cd769179345799d383f92dd615991755ec24be1Vadim Girlin
292cd769179345799d383f92dd615991755ec24be1Vadim Girlin#if BCP_DEBUG
302cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DUMP(q) do { q } while (0)
312cd769179345799d383f92dd615991755ec24be1Vadim Girlin#else
322cd769179345799d383f92dd615991755ec24be1Vadim Girlin#define BCP_DUMP(q)
332cd769179345799d383f92dd615991755ec24be1Vadim Girlin#endif
342cd769179345799d383f92dd615991755ec24be1Vadim Girlin
352cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "r600_pipe.h"
362cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "r600_shader.h"
372cd769179345799d383f92dd615991755ec24be1Vadim Girlin
382cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include <stack>
392cd769179345799d383f92dd615991755ec24be1Vadim Girlin
402cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_bc.h"
412cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_shader.h"
422cd769179345799d383f92dd615991755ec24be1Vadim Girlin#include "sb_pass.h"
432cd769179345799d383f92dd615991755ec24be1Vadim Girlin
442cd769179345799d383f92dd615991755ec24be1Vadim Girlinnamespace r600_sb {
452cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode() {
472cd769179345799d383f92dd615991755ec24be1Vadim Girlin
482cd769179345799d383f92dd615991755ec24be1Vadim Girlin	dw = bc->bytecode;
492cd769179345799d383f92dd615991755ec24be1Vadim Girlin	bc_ndw = bc->ndw;
502cd769179345799d383f92dd615991755ec24be1Vadim Girlin	max_cf = 0;
512cd769179345799d383f92dd615991755ec24be1Vadim Girlin
522cd769179345799d383f92dd615991755ec24be1Vadim Girlin	dec = new bc_decoder(ctx, dw, bc_ndw);
532cd769179345799d383f92dd615991755ec24be1Vadim Girlin
542cd769179345799d383f92dd615991755ec24be1Vadim Girlin	shader_target t = TARGET_UNKNOWN;
552cd769179345799d383f92dd615991755ec24be1Vadim Girlin
562cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (pshader) {
572cd769179345799d383f92dd615991755ec24be1Vadim Girlin		switch (bc->type) {
582cd769179345799d383f92dd615991755ec24be1Vadim Girlin		case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
591371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin		case TGSI_PROCESSOR_VERTEX:
601371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin			t = pshader->vs_as_es ? TARGET_ES : TARGET_VS;
611371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin			break;
621371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin		case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break;
632cd769179345799d383f92dd615991755ec24be1Vadim Girlin		case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
642cd769179345799d383f92dd615991755ec24be1Vadim Girlin		default: assert(!"unknown shader target"); return -1; break;
652cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
662cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} else {
672cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (bc->type == TGSI_PROCESSOR_COMPUTE)
682cd769179345799d383f92dd615991755ec24be1Vadim Girlin			t = TARGET_COMPUTE;
692cd769179345799d383f92dd615991755ec24be1Vadim Girlin		else
702cd769179345799d383f92dd615991755ec24be1Vadim Girlin			t = TARGET_FETCH;
712cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
722cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	sh = new shader(ctx, t, bc->debug_id);
74758ac6f91894c105c83a193e8f4f6ead06962949Vadim Girlin	sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
75758ac6f91894c105c83a193e8f4f6ead06962949Vadim Girlin
7657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	int r = decode_shader();
772cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7846dfad8b36dda80982613a9d29b9a7e99db3abfbVadim Girlin	delete dec;
7946dfad8b36dda80982613a9d29b9a7e99db3abfbVadim Girlin
803f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin	sh->ngpr = bc->ngpr;
813f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin	sh->nstack = bc->nstack;
823f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin
832cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return r;
842cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
852cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_shader() {
872cd769179345799d383f92dd615991755ec24be1Vadim Girlin	int r = 0;
882cd769179345799d383f92dd615991755ec24be1Vadim Girlin	unsigned i = 0;
892cd769179345799d383f92dd615991755ec24be1Vadim Girlin	bool eop = false;
902cd769179345799d383f92dd615991755ec24be1Vadim Girlin
912cd769179345799d383f92dd615991755ec24be1Vadim Girlin	sh->init();
922cd769179345799d383f92dd615991755ec24be1Vadim Girlin
932cd769179345799d383f92dd615991755ec24be1Vadim Girlin	do {
942cd769179345799d383f92dd615991755ec24be1Vadim Girlin		eop = false;
9557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		if ((r = decode_cf(i, eop)))
962cd769179345799d383f92dd615991755ec24be1Vadim Girlin			return r;
972cd769179345799d383f92dd615991755ec24be1Vadim Girlin
982cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} while (!eop || (i >> 1) <= max_cf);
992cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1002cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
1012cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
1022cd769179345799d383f92dd615991755ec24be1Vadim Girlin
10357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare() {
10457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	int r = 0;
10557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	if ((r = parse_decls()))
10657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		return r;
10757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	if ((r = prepare_ir()))
10857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		return r;
10957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	return 0;
11057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin}
1112cd769179345799d383f92dd615991755ec24be1Vadim Girlin
11257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::parse_decls() {
1132cd769179345799d383f92dd615991755ec24be1Vadim Girlin
11457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	if (!pshader) {
1155a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin		if (gpr_reladdr)
1165a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin			sh->add_gpr_array(0, bc->ngpr, 0x0F);
11744a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin
11844a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin		// compute shaders have some values preloaded in R0, R1
11944a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin		sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
12044a117ab9ac7e18ffddf4d59ae35fe2c266efc1dVadim Girlin		sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
12157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		return 0;
12257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	}
1232cd769179345799d383f92dd615991755ec24be1Vadim Girlin
12457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
1252cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1262cd769179345799d383f92dd615991755ec24be1Vadim Girlin		assert(pshader->num_arrays);
1272cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1282cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (pshader->num_arrays) {
1292cd769179345799d383f92dd615991755ec24be1Vadim Girlin			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
1302cd769179345799d383f92dd615991755ec24be1Vadim Girlin				r600_shader_array &a = pshader->arrays[i];
1312cd769179345799d383f92dd615991755ec24be1Vadim Girlin				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
1322cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
1332cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else {
134a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin			sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
1352cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
1362cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
1372cd769179345799d383f92dd615991755ec24be1Vadim Girlin
138f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard	// GS inputs can add indirect addressing
139f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard	if (sh->target == TARGET_GS) {
140f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard		if (pshader->num_arrays) {
141f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
142f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard				r600_shader_array &a = pshader->arrays[i];
143f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
144f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard			}
145f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard		}
146f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard	}
147f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard
1481371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin	if (sh->target == TARGET_VS || sh->target == TARGET_ES)
149a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin		sh->add_input(0, 1, 0x0F);
1501371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin	else if (sh->target == TARGET_GS) {
1511371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin		sh->add_input(0, 1, 0x0F);
1521371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin		sh->add_input(1, 1, 0x0F);
1531371d65a7fbd695d3516861fe733685569d890d0Vadim Girlin	}
1542cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1552cd769179345799d383f92dd615991755ec24be1Vadim Girlin	bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
1562cd769179345799d383f92dd615991755ec24be1Vadim Girlin			&& sh->target == TARGET_PS;
1572cd769179345799d383f92dd615991755ec24be1Vadim Girlin
158a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard	bool ij_interpolators[6];
159a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard	memset(ij_interpolators, 0, sizeof(ij_interpolators));
1602cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1612cd769179345799d383f92dd615991755ec24be1Vadim Girlin	for (unsigned i = 0; i < pshader->ninput; ++i) {
1622cd769179345799d383f92dd615991755ec24be1Vadim Girlin		r600_shader_io & in = pshader->input[i];
1632cd769179345799d383f92dd615991755ec24be1Vadim Girlin		bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
164a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin		sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
1652cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (ps_interp && in.spi_sid) {
166a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard			int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
167a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard			if (k >= 0)
168a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard				ij_interpolators[k] |= true;
1692cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
1702cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
1712cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1722cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (ps_interp) {
173a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard		/* add the egcm ij interpolators to live inputs */
174a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard		unsigned num_ij = 0;
175a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard		for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
176a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard			num_ij += ij_interpolators[i];
177a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard		}
178a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard
179a327fa3a068af49bb7ae00a4b03abcb91906e0d2Glenn Kennard		unsigned mask = (1 << (2 * num_ij)) - 1;
1802cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned gpr = 0;
1812cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1822cd769179345799d383f92dd615991755ec24be1Vadim Girlin		while (mask) {
183a6fe055fa77e42e35f25272bdd5ca7213b436a1aVadim Girlin			sh->add_input(gpr, true, mask & 0x0F);
1842cd769179345799d383f92dd615991755ec24be1Vadim Girlin			++gpr;
1852cd769179345799d383f92dd615991755ec24be1Vadim Girlin			mask >>= 4;
1862cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
1872cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
1882cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1892cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
1902cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
1912cd769179345799d383f92dd615991755ec24be1Vadim Girlin
19257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_cf(unsigned &i, bool &eop) {
1932cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1942cd769179345799d383f92dd615991755ec24be1Vadim Girlin	int r;
1952cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1962cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cf_node *cf = sh->create_cf();
1972cd769179345799d383f92dd615991755ec24be1Vadim Girlin	sh->root->push_back(cf);
1982cd769179345799d383f92dd615991755ec24be1Vadim Girlin
1992cd769179345799d383f92dd615991755ec24be1Vadim Girlin	unsigned id = i >> 1;
2002cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2012cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cf->bc.id = id;
2022cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2032cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (cf_map.size() < id + 1)
2042cd769179345799d383f92dd615991755ec24be1Vadim Girlin		cf_map.resize(id + 1);
2052cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2062cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cf_map[id] = cf;
2072cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2082cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if ((r = dec->decode_cf(i, cf->bc)))
2092cd769179345799d383f92dd615991755ec24be1Vadim Girlin		return r;
2102cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2112cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
2122cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2132cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (flags & CF_ALU) {
21457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		if ((r = decode_alu_clause(cf)))
2152cd769179345799d383f92dd615991755ec24be1Vadim Girlin			return r;
2162cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} else if (flags & CF_FETCH) {
21757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		if ((r = decode_fetch_clause(cf)))
2182cd769179345799d383f92dd615991755ec24be1Vadim Girlin			return r;;
2192cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} else if (flags & CF_EXP) {
2205a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin		if (cf->bc.rw_rel)
2215a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin			gpr_reladdr = true;
2222cd769179345799d383f92dd615991755ec24be1Vadim Girlin		assert(!cf->bc.rw_rel);
2235758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie	} else if (flags & CF_MEM) {
2245a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin		if (cf->bc.rw_rel)
2255a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin			gpr_reladdr = true;
2262cd769179345799d383f92dd615991755ec24be1Vadim Girlin		assert(!cf->bc.rw_rel);
2272cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} else if (flags & CF_BRANCH) {
2282cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (cf->bc.addr > max_cf)
2292cd769179345799d383f92dd615991755ec24be1Vadim Girlin			max_cf = cf->bc.addr;
2302cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
2312cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2322cd769179345799d383f92dd615991755ec24be1Vadim Girlin	eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
2332cd769179345799d383f92dd615991755ec24be1Vadim Girlin			cf->bc.op == CF_OP_RET;
2342cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
2352cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
2362cd769179345799d383f92dd615991755ec24be1Vadim Girlin
23757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_alu_clause(cf_node* cf) {
2382cd769179345799d383f92dd615991755ec24be1Vadim Girlin	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
2392cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2403f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin	cf->subtype = NST_ALU_CLAUSE;
2413f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin
2422cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cgroup = 0;
2432cd769179345799d383f92dd615991755ec24be1Vadim Girlin	memset(slots[0], 0, 5*sizeof(slots[0][0]));
2442cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2452cd769179345799d383f92dd615991755ec24be1Vadim Girlin	unsigned ng = 0;
2462cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2472cd769179345799d383f92dd615991755ec24be1Vadim Girlin	do {
24857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		decode_alu_group(cf, i, gcnt);
2492cd769179345799d383f92dd615991755ec24be1Vadim Girlin		assert(gcnt <= cnt);
2502cd769179345799d383f92dd615991755ec24be1Vadim Girlin		cnt -= gcnt;
2512cd769179345799d383f92dd615991755ec24be1Vadim Girlin		ng++;
2522cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} while (cnt);
2532cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2542cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
2552cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
2562cd769179345799d383f92dd615991755ec24be1Vadim Girlin
25757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
2582cd769179345799d383f92dd615991755ec24be1Vadim Girlin	int r;
2592cd769179345799d383f92dd615991755ec24be1Vadim Girlin	alu_node *n;
2602cd769179345799d383f92dd615991755ec24be1Vadim Girlin	alu_group_node *g = sh->create_alu_group();
2612cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2622cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cgroup = !cgroup;
2632cd769179345799d383f92dd615991755ec24be1Vadim Girlin	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
2642cd769179345799d383f92dd615991755ec24be1Vadim Girlin	gcnt = 0;
2652cd769179345799d383f92dd615991755ec24be1Vadim Girlin
26657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	unsigned literal_mask = 0;
26757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
2682cd769179345799d383f92dd615991755ec24be1Vadim Girlin	do {
2692cd769179345799d383f92dd615991755ec24be1Vadim Girlin		n = sh->create_alu();
2702cd769179345799d383f92dd615991755ec24be1Vadim Girlin		g->push_back(n);
2712cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2722cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if ((r = dec->decode_alu(i, n->bc)))
2732cd769179345799d383f92dd615991755ec24be1Vadim Girlin			return r;
2742cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2752cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (!sh->assign_slot(n, slots[cgroup])) {
2762cd769179345799d383f92dd615991755ec24be1Vadim Girlin			assert(!"alu slot assignment failed");
2772cd769179345799d383f92dd615991755ec24be1Vadim Girlin			return -1;
2782cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
2792cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2802cd769179345799d383f92dd615991755ec24be1Vadim Girlin		gcnt++;
2812cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2822cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} while (gcnt <= 5 && !n->bc.last);
2832cd769179345799d383f92dd615991755ec24be1Vadim Girlin
2842cd769179345799d383f92dd615991755ec24be1Vadim Girlin	assert(n->bc.last);
2852cd769179345799d383f92dd615991755ec24be1Vadim Girlin
28657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
28757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		n = static_cast<alu_node*>(*I);
28857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
2895a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin		if (n->bc.dst_rel)
2905a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin			gpr_reladdr = true;
2915a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin
29257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
29357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			bc_alu_src &src = n->bc.src[k];
2945a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin			if (src.rel)
2955a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin				gpr_reladdr = true;
29657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			if (src.sel == ALU_SRC_LITERAL) {
29757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin				literal_mask |= (1 << src.chan);
29857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin				src.value.u = dw[i + src.chan];
29957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			}
30057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		}
30157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	}
30257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
30357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	unsigned literal_ndw = 0;
30457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	while (literal_mask) {
30557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		g->literals.push_back(dw[i + literal_ndw]);
30657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		literal_ndw += 1;
30757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		literal_mask >>= 1;
30857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	}
30957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
31057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	literal_ndw = (literal_ndw + 1) & ~1u;
31157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
31257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	i += literal_ndw;
31357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	gcnt += literal_ndw >> 1;
31457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
31557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	cf->push_back(g);
31657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	return 0;
31757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin}
31857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
31957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_alu_clause(cf_node* cf) {
32057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
32157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	// loop over alu groups
32257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
32357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		assert(I->subtype == NST_ALU_GROUP);
32457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		alu_group_node *g = static_cast<alu_group_node*>(*I);
32557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		prepare_alu_group(cf, g);
32657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	}
32757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
32857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	return 0;
32957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin}
33057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
33157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
33257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
33357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	alu_node *n;
33457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
33557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	cgroup = !cgroup;
33657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
3372cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3382cd769179345799d383f92dd615991755ec24be1Vadim Girlin	for (node_iterator I = g->begin(), E = g->end();
3392cd769179345799d383f92dd615991755ec24be1Vadim Girlin			I != E; ++I) {
3402cd769179345799d383f92dd615991755ec24be1Vadim Girlin		n = static_cast<alu_node*>(*I);
34157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
34257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		if (!sh->assign_slot(n, slots[cgroup])) {
34357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			assert(!"alu slot assignment failed");
34457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			return -1;
34557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		}
34657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
3472cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned src_count = n->bc.op_ptr->src_count;
3482cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3492cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
3502cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->flags |= NF_ALU_4SLOT;
3512cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3522cd769179345799d383f92dd615991755ec24be1Vadim Girlin		n->src.resize(src_count);
3532cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3542cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned flags = n->bc.op_ptr->flags;
3552cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3562cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (flags & AF_PRED) {
3572cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->dst.resize(3);
3582cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (n->bc.update_pred)
3592cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->dst[1] = sh->get_special_value(SV_ALU_PRED);
3602cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (n->bc.update_exec_mask)
3612cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
3622cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3632cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->flags |= NF_DONT_HOIST;
3642cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3652cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (flags & AF_KILL) {
3662cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3672cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->dst.resize(2);
3682cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->dst[1] = sh->get_special_value(SV_VALID_MASK);
3692cd769179345799d383f92dd615991755ec24be1Vadim Girlin			sh->set_uses_kill();
3702cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3712cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
3722cd769179345799d383f92dd615991755ec24be1Vadim Girlin					NF_DONT_KILL | NF_SCHEDULE_EARLY;
3732cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3742cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else {
3752cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->dst.resize(1);
3762cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
3772cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3782cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (flags & AF_MOVA) {
3792cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3802cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->dst[0] = sh->get_special_value(SV_AR_INDEX);
3812cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3822cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->flags |= NF_DONT_HOIST;
3832cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3842cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) {
3852cd769179345799d383f92dd615991755ec24be1Vadim Girlin			assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
3862cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3872cd769179345799d383f92dd615991755ec24be1Vadim Girlin			value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
3882cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->bc.dst_rel);
3892cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3902cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->dst[0] = v;
3912cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
3922cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3932cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (n->bc.pred_sel) {
3942cd769179345799d383f92dd615991755ec24be1Vadim Girlin			sh->has_alu_predication = true;
3952cd769179345799d383f92dd615991755ec24be1Vadim Girlin			n->pred = sh->get_special_value(SV_ALU_PRED);
3962cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
3972cd769179345799d383f92dd615991755ec24be1Vadim Girlin
3982cd769179345799d383f92dd615991755ec24be1Vadim Girlin		for (unsigned s = 0; s < src_count; ++s) {
3992cd769179345799d383f92dd615991755ec24be1Vadim Girlin			bc_alu_src &src = n->bc.src[s];
4002cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4012cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (src.sel == ALU_SRC_LITERAL) {
4022cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src[s] = sh->get_const_value(src.value);
4032cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
4042cd769179345799d383f92dd615991755ec24be1Vadim Girlin				unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
4052cd769179345799d383f92dd615991755ec24be1Vadim Girlin						SLOT_TRANS : src.chan;
40696efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin
40796efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin				// XXX shouldn't happen but llvm backend uses PS on cayman
40896efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin				if (prev_slot == SLOT_TRANS && ctx.is_cayman())
40996efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin					prev_slot = SLOT_X;
41096efa4cdf48f72fd6b165c4a725f0d1542159a5eVadim Girlin
4112cd769179345799d383f92dd615991755ec24be1Vadim Girlin				alu_node *prev_alu = slots[pgroup][prev_slot];
4122cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4132cd769179345799d383f92dd615991755ec24be1Vadim Girlin				assert(prev_alu);
4142cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4152cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (!prev_alu->dst[0]) {
4162cd769179345799d383f92dd615991755ec24be1Vadim Girlin					value * t = sh->create_temp_value();
4172cd769179345799d383f92dd615991755ec24be1Vadim Girlin					prev_alu->dst[0] = t;
4182cd769179345799d383f92dd615991755ec24be1Vadim Girlin				}
4192cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4202cd769179345799d383f92dd615991755ec24be1Vadim Girlin				value *d = prev_alu->dst[0];
4212cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4222cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (d->is_rel()) {
4232cd769179345799d383f92dd615991755ec24be1Vadim Girlin					d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
4242cd769179345799d383f92dd615991755ec24be1Vadim Girlin					                      prev_alu->bc.dst_chan,
4252cd769179345799d383f92dd615991755ec24be1Vadim Girlin					                      prev_alu->bc.dst_rel);
4262cd769179345799d383f92dd615991755ec24be1Vadim Girlin				}
4272cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4282cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src[s] = d;
4292cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} else if (ctx.is_kcache_sel(src.sel)) {
4302cd769179345799d383f92dd615991755ec24be1Vadim Girlin				unsigned sel = src.sel, kc_addr;
4312cd769179345799d383f92dd615991755ec24be1Vadim Girlin				unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
4322cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4332cd769179345799d383f92dd615991755ec24be1Vadim Girlin				bc_kcache &kc = cf->bc.kc[kc_set];
4342cd769179345799d383f92dd615991755ec24be1Vadim Girlin				kc_addr = (kc.addr << 4) + (sel & 0x1F);
4352cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan);
4362cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} else if (src.sel < MAX_GPR) {
4372cd769179345799d383f92dd615991755ec24be1Vadim Girlin				value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
4382cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4392cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src[s] = v;
4402cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4412cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
4422cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// using slot for value channel because in fact the slot
4432cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// determines the channel that is loaded by INTERP_LOAD_P0
4442cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// (and maybe some others).
4452cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// otherwise GVN will consider INTERP_LOAD_P0s with the same
4462cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// param index as equal instructions and leave only one of them
4472cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
4482cd769179345799d383f92dd615991755ec24be1Vadim Girlin				                                              n->bc.slot));
4492cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} else {
4502cd769179345799d383f92dd615991755ec24be1Vadim Girlin				switch (src.sel) {
4512cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case ALU_SRC_0:
4522cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_const_value(0);
4532cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
4542cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case ALU_SRC_0_5:
4552cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_const_value(0.5f);
4562cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
4572cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case ALU_SRC_1:
4582cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_const_value(1.0f);
4592cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
4602cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case ALU_SRC_1_INT:
4612cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_const_value(1);
4622cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
4632cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case ALU_SRC_M_1_INT:
4642cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_const_value(-1);
4652cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
4662cd769179345799d383f92dd615991755ec24be1Vadim Girlin				default:
4672cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_special_ro_value(src.sel);
4682cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
4692cd769179345799d383f92dd615991755ec24be1Vadim Girlin				}
4702cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
4712cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
4722cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
4732cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4742cd769179345799d383f92dd615991755ec24be1Vadim Girlin	// pack multislot instructions into alu_packed_node
4752cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4762cd769179345799d383f92dd615991755ec24be1Vadim Girlin	alu_packed_node *p = NULL;
4772cd769179345799d383f92dd615991755ec24be1Vadim Girlin	for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
4782cd769179345799d383f92dd615991755ec24be1Vadim Girlin		N = I + 1;
4792cd769179345799d383f92dd615991755ec24be1Vadim Girlin		alu_node *a = static_cast<alu_node*>(*I);
4802cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned sflags = a->bc.slot_flags;
4812cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4822cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
4832cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (!p)
4842cd769179345799d383f92dd615991755ec24be1Vadim Girlin				p = sh->create_alu_packed();
4852cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4862cd769179345799d383f92dd615991755ec24be1Vadim Girlin			a->remove();
4872cd769179345799d383f92dd615991755ec24be1Vadim Girlin			p->push_back(a);
4882cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
4892cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
4902cd769179345799d383f92dd615991755ec24be1Vadim Girlin
4912cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (p) {
4922cd769179345799d383f92dd615991755ec24be1Vadim Girlin		g->push_front(p);
4932cd769179345799d383f92dd615991755ec24be1Vadim Girlin
49457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		if (p->count() == 3 && ctx.is_cayman()) {
49557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			// cayman's scalar instruction that can use 3 or 4 slots
4962cd769179345799d383f92dd615991755ec24be1Vadim Girlin
49757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			// FIXME for simplicity we'll always add 4th slot,
49857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			// but probably we might want to always remove 4th slot and make
49957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			// sure that regalloc won't choose 'w' component for dst
5002cd769179345799d383f92dd615991755ec24be1Vadim Girlin
50157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			alu_node *f = static_cast<alu_node*>(p->first);
50257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			alu_node *a = sh->create_alu();
50357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			a->src = f->src;
50457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			a->dst.resize(f->dst.size());
50557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			a->bc = f->bc;
50657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			a->bc.slot = SLOT_W;
50757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			p->push_back(a);
50857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		}
50957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	}
5102cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5112cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
5122cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
5132cd769179345799d383f92dd615991755ec24be1Vadim Girlin
51457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::decode_fetch_clause(cf_node* cf) {
5152cd769179345799d383f92dd615991755ec24be1Vadim Girlin	int r;
5162cd769179345799d383f92dd615991755ec24be1Vadim Girlin	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
5172cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5183f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin	cf->subtype = NST_TEX_CLAUSE;
5193f18dd818f7ce0adab0afe1a4b4db2e242513086Vadim Girlin
5202cd769179345799d383f92dd615991755ec24be1Vadim Girlin	while (cnt--) {
5212cd769179345799d383f92dd615991755ec24be1Vadim Girlin		fetch_node *n = sh->create_fetch();
5222cd769179345799d383f92dd615991755ec24be1Vadim Girlin		cf->push_back(n);
5232cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if ((r = dec->decode_fetch(i, n->bc)))
5242cd769179345799d383f92dd615991755ec24be1Vadim Girlin			return r;
5255a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin		if (n->bc.src_rel || n->bc.dst_rel)
5265a68a29706002cd9f59faeb3ce18e7aed8a74201Vadim Girlin			gpr_reladdr = true;
5272cd769179345799d383f92dd615991755ec24be1Vadim Girlin
52857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	}
52957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	return 0;
53057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin}
53157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
53257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlinint bc_parser::prepare_fetch_clause(cf_node *cf) {
53357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
534dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard	vvec grad_v, grad_h, texture_offsets;
53557d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
53657d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
53757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
53857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		fetch_node *n = static_cast<fetch_node*>(*I);
53957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		assert(n->is_valid());
54057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
5412cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned flags = n->bc.op_ptr->flags;
5422cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5432cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned vtx = flags & FF_VTX;
5442cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned num_src = vtx ? ctx.vtx_src_num : 4;
5452cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5462cd769179345799d383f92dd615991755ec24be1Vadim Girlin		n->dst.resize(4);
5472cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5482cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
5492cd769179345799d383f92dd615991755ec24be1Vadim Girlin			sh->uses_gradients = true;
5502cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
5512cd769179345799d383f92dd615991755ec24be1Vadim Girlin
552dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard		if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
5532cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5542cd769179345799d383f92dd615991755ec24be1Vadim Girlin			vvec *grad = NULL;
5552cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5562cd769179345799d383f92dd615991755ec24be1Vadim Girlin			switch (n->bc.op) {
5572cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case FETCH_OP_SET_GRADIENTS_V:
5582cd769179345799d383f92dd615991755ec24be1Vadim Girlin					grad = &grad_v;
5592cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
5602cd769179345799d383f92dd615991755ec24be1Vadim Girlin				case FETCH_OP_SET_GRADIENTS_H:
5612cd769179345799d383f92dd615991755ec24be1Vadim Girlin					grad = &grad_h;
5622cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
563dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard				case FETCH_OP_SET_TEXTURE_OFFSETS:
564dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard					grad = &texture_offsets;
565dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard					break;
5662cd769179345799d383f92dd615991755ec24be1Vadim Girlin				default:
5672cd769179345799d383f92dd615991755ec24be1Vadim Girlin					assert(!"unexpected SET_GRAD instruction");
5682cd769179345799d383f92dd615991755ec24be1Vadim Girlin					return -1;
5692cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
5702cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5712cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (grad->empty())
5722cd769179345799d383f92dd615991755ec24be1Vadim Girlin				grad->resize(4);
5732cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5742cd769179345799d383f92dd615991755ec24be1Vadim Girlin			for(unsigned s = 0; s < 4; ++s) {
5752cd769179345799d383f92dd615991755ec24be1Vadim Girlin				unsigned sw = n->bc.src_sel[s];
5762cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (sw <= SEL_W)
5772cd769179345799d383f92dd615991755ec24be1Vadim Girlin					(*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
5782cd769179345799d383f92dd615991755ec24be1Vadim Girlin					                               sw, false);
5792cd769179345799d383f92dd615991755ec24be1Vadim Girlin				else if (sw == SEL_0)
5802cd769179345799d383f92dd615991755ec24be1Vadim Girlin					(*grad)[s] = sh->get_const_value(0.0f);
5812cd769179345799d383f92dd615991755ec24be1Vadim Girlin				else if (sw == SEL_1)
5822cd769179345799d383f92dd615991755ec24be1Vadim Girlin					(*grad)[s] = sh->get_const_value(1.0f);
5832cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
5842cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else {
585dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard			// Fold source values for instructions with hidden target values in to the instructions
586dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard			// using them. The set instructions are later re-emitted by bc_finalizer
5872cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (flags & FF_USEGRAD) {
5882cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src.resize(12);
5892cd769179345799d383f92dd615991755ec24be1Vadim Girlin				std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
5902cd769179345799d383f92dd615991755ec24be1Vadim Girlin				std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
591dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard			} else if (flags & FF_USE_TEXTURE_OFFSETS) {
592dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard				n->src.resize(8);
593dfa10ed2640a350a84e6e31edd22560155cd5016Glenn Kennard				std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
5942cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} else {
5952cd769179345799d383f92dd615991755ec24be1Vadim Girlin				n->src.resize(4);
5962cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
5972cd769179345799d383f92dd615991755ec24be1Vadim Girlin
5982cd769179345799d383f92dd615991755ec24be1Vadim Girlin			for(int s = 0; s < 4; ++s) {
5992cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (n->bc.dst_sel[s] != SEL_MASK)
6002cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
6012cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// NOTE: it doesn't matter here which components of the result we
6022cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// are using, but original n->bc.dst_sel should be taken into
6032cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// account when building the bytecode
6042cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
6052cd769179345799d383f92dd615991755ec24be1Vadim Girlin			for(unsigned s = 0; s < num_src; ++s) {
6062cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (n->bc.src_sel[s] <= SEL_W)
6072cd769179345799d383f92dd615991755ec24be1Vadim Girlin					n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
6082cd769179345799d383f92dd615991755ec24be1Vadim Girlin					                              n->bc.src_sel[s], false);
6092cd769179345799d383f92dd615991755ec24be1Vadim Girlin			}
6102cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6112cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
6122cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
61357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin
6142cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
6152cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
6162cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6172cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_ir() {
6182cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6192cd769179345799d383f92dd615991755ec24be1Vadim Girlin	for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
6202cd769179345799d383f92dd615991755ec24be1Vadim Girlin		cf_node *c = *I;
6212cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6222cd769179345799d383f92dd615991755ec24be1Vadim Girlin		if (!c)
6232cd769179345799d383f92dd615991755ec24be1Vadim Girlin			continue;
6242cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6252cd769179345799d383f92dd615991755ec24be1Vadim Girlin		unsigned flags = c->bc.op_ptr->flags;
6262cd769179345799d383f92dd615991755ec24be1Vadim Girlin
62757d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		if (flags & CF_ALU) {
62857d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			prepare_alu_clause(c);
62957d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		} else if (flags & CF_FETCH) {
63057d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			prepare_fetch_clause(c);
63157d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		} else if (c->bc.op == CF_OP_CALL_FS) {
63257d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			sh->init_call_fs(c);
63357d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin			c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
63457d1be0d2d195dac0e08585b6cd098779f7a7bd7Vadim Girlin		} else if (flags & CF_LOOP_START) {
6352cd769179345799d383f92dd615991755ec24be1Vadim Girlin			prepare_loop(c);
6362cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (c->bc.op == CF_OP_JUMP) {
6372cd769179345799d383f92dd615991755ec24be1Vadim Girlin			prepare_if(c);
6382cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (c->bc.op == CF_OP_LOOP_END) {
6392cd769179345799d383f92dd615991755ec24be1Vadim Girlin			loop_stack.pop();
6402cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
6412cd769179345799d383f92dd615991755ec24be1Vadim Girlin			assert(!loop_stack.empty());
6422cd769179345799d383f92dd615991755ec24be1Vadim Girlin			repeat_node *rep = sh->create_repeat(loop_stack.top());
6432cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (c->parent->first != c)
6442cd769179345799d383f92dd615991755ec24be1Vadim Girlin				rep->move(c->parent->first, c);
6452cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->replace_with(rep);
6462cd769179345799d383f92dd615991755ec24be1Vadim Girlin			sh->simplify_dep_rep(rep);
6472cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (c->bc.op == CF_OP_LOOP_BREAK) {
6482cd769179345799d383f92dd615991755ec24be1Vadim Girlin			assert(!loop_stack.empty());
6492cd769179345799d383f92dd615991755ec24be1Vadim Girlin			depart_node *dep = sh->create_depart(loop_stack.top());
6502cd769179345799d383f92dd615991755ec24be1Vadim Girlin			if (c->parent->first != c)
6512cd769179345799d383f92dd615991755ec24be1Vadim Girlin				dep->move(c->parent->first, c);
6522cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->replace_with(dep);
6532cd769179345799d383f92dd615991755ec24be1Vadim Girlin			sh->simplify_dep_rep(dep);
6542cd769179345799d383f92dd615991755ec24be1Vadim Girlin		} else if (flags & CF_EXP) {
6552cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6562cd769179345799d383f92dd615991755ec24be1Vadim Girlin			// unroll burst exports
6572cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6582cd769179345799d383f92dd615991755ec24be1Vadim Girlin			assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
6592cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6602cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.set_op(CF_OP_EXPORT);
6612cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6622cd769179345799d383f92dd615991755ec24be1Vadim Girlin			unsigned burst_count = c->bc.burst_count;
6632cd769179345799d383f92dd615991755ec24be1Vadim Girlin			unsigned eop = c->bc.end_of_program;
6642cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6652cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.end_of_program = 0;
6662cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.burst_count = 0;
6672cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6682cd769179345799d383f92dd615991755ec24be1Vadim Girlin			do {
6692cd769179345799d383f92dd615991755ec24be1Vadim Girlin				c->src.resize(4);
6702cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6712cd769179345799d383f92dd615991755ec24be1Vadim Girlin				for(int s = 0; s < 4; ++s) {
6722cd769179345799d383f92dd615991755ec24be1Vadim Girlin					switch (c->bc.sel[s]) {
6732cd769179345799d383f92dd615991755ec24be1Vadim Girlin					case SEL_0:
6742cd769179345799d383f92dd615991755ec24be1Vadim Girlin						c->src[s] = sh->get_const_value(0.0f);
6752cd769179345799d383f92dd615991755ec24be1Vadim Girlin						break;
6762cd769179345799d383f92dd615991755ec24be1Vadim Girlin					case SEL_1:
6772cd769179345799d383f92dd615991755ec24be1Vadim Girlin						c->src[s] = sh->get_const_value(1.0f);
6782cd769179345799d383f92dd615991755ec24be1Vadim Girlin						break;
6792cd769179345799d383f92dd615991755ec24be1Vadim Girlin					case SEL_MASK:
6802cd769179345799d383f92dd615991755ec24be1Vadim Girlin						break;
6812cd769179345799d383f92dd615991755ec24be1Vadim Girlin					default:
6822cd769179345799d383f92dd615991755ec24be1Vadim Girlin						if (c->bc.sel[s] <= SEL_W)
6832cd769179345799d383f92dd615991755ec24be1Vadim Girlin							c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
6842cd769179345799d383f92dd615991755ec24be1Vadim Girlin									c->bc.sel[s], false);
6852cd769179345799d383f92dd615991755ec24be1Vadim Girlin						else
6862cd769179345799d383f92dd615991755ec24be1Vadim Girlin							assert(!"invalid src_sel for export");
6872cd769179345799d383f92dd615991755ec24be1Vadim Girlin					}
6882cd769179345799d383f92dd615991755ec24be1Vadim Girlin				}
6892cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6902cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (!burst_count--)
6912cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
6922cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6932cd769179345799d383f92dd615991755ec24be1Vadim Girlin				cf_node *cf_next = sh->create_cf();
6942cd769179345799d383f92dd615991755ec24be1Vadim Girlin				cf_next->bc = c->bc;
6952cd769179345799d383f92dd615991755ec24be1Vadim Girlin				++cf_next->bc.rw_gpr;
6962cd769179345799d383f92dd615991755ec24be1Vadim Girlin				++cf_next->bc.array_base;
6972cd769179345799d383f92dd615991755ec24be1Vadim Girlin
6982cd769179345799d383f92dd615991755ec24be1Vadim Girlin				c->insert_after(cf_next);
6992cd769179345799d383f92dd615991755ec24be1Vadim Girlin				c = cf_next;
7002cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7012cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} while (1);
7022cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7032cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.end_of_program = eop;
7045758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie		} else if (flags & CF_MEM) {
7052cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7062cd769179345799d383f92dd615991755ec24be1Vadim Girlin			unsigned burst_count = c->bc.burst_count;
7072cd769179345799d383f92dd615991755ec24be1Vadim Girlin			unsigned eop = c->bc.end_of_program;
7082cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7092cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.end_of_program = 0;
7102cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.burst_count = 0;
7112cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7122cd769179345799d383f92dd615991755ec24be1Vadim Girlin			do {
7132cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7142cd769179345799d383f92dd615991755ec24be1Vadim Girlin				c->src.resize(4);
7152cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7162cd769179345799d383f92dd615991755ec24be1Vadim Girlin				for(int s = 0; s < 4; ++s) {
7172cd769179345799d383f92dd615991755ec24be1Vadim Girlin					if (c->bc.comp_mask & (1 << s))
7182cd769179345799d383f92dd615991755ec24be1Vadim Girlin						c->src[s] =
7192cd769179345799d383f92dd615991755ec24be1Vadim Girlin								sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
7202cd769179345799d383f92dd615991755ec24be1Vadim Girlin				}
7212cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7225758a76d04aef90342e2b823c5020c6addda6d9cDave Airlie				if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
7232cd769179345799d383f92dd615991755ec24be1Vadim Girlin					c->src.resize(8);
7242cd769179345799d383f92dd615991755ec24be1Vadim Girlin					for(int s = 0; s < 3; ++s) {
7252cd769179345799d383f92dd615991755ec24be1Vadim Girlin						c->src[4 + s] =
7262cd769179345799d383f92dd615991755ec24be1Vadim Girlin							sh->get_gpr_value(true, c->bc.index_gpr, s, false);
7272cd769179345799d383f92dd615991755ec24be1Vadim Girlin					}
7282cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7292cd769179345799d383f92dd615991755ec24be1Vadim Girlin					// FIXME probably we can relax it a bit
7302cd769179345799d383f92dd615991755ec24be1Vadim Girlin					c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
7312cd769179345799d383f92dd615991755ec24be1Vadim Girlin				}
7322cd769179345799d383f92dd615991755ec24be1Vadim Girlin
733f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard				if (flags & CF_EMIT) {
734f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard					// Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
735f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard					c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
736f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard					c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
737f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard					if (sh->target == TARGET_ES) {
738f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard						// For ES shaders this is an export
739f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard						c->flags |= NF_DONT_KILL;
740f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard					}
741f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard				}
742f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard
7432cd769179345799d383f92dd615991755ec24be1Vadim Girlin				if (!burst_count--)
7442cd769179345799d383f92dd615991755ec24be1Vadim Girlin					break;
7452cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7462cd769179345799d383f92dd615991755ec24be1Vadim Girlin				cf_node *cf_next = sh->create_cf();
7472cd769179345799d383f92dd615991755ec24be1Vadim Girlin				cf_next->bc = c->bc;
7482cd769179345799d383f92dd615991755ec24be1Vadim Girlin				++cf_next->bc.rw_gpr;
7492cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7502cd769179345799d383f92dd615991755ec24be1Vadim Girlin				// FIXME is it correct?
7512cd769179345799d383f92dd615991755ec24be1Vadim Girlin				cf_next->bc.array_base += cf_next->bc.elem_size + 1;
7522cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7532cd769179345799d383f92dd615991755ec24be1Vadim Girlin				c->insert_after(cf_next);
7542cd769179345799d383f92dd615991755ec24be1Vadim Girlin				c = cf_next;
7552cd769179345799d383f92dd615991755ec24be1Vadim Girlin			} while (1);
7562cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7572cd769179345799d383f92dd615991755ec24be1Vadim Girlin			c->bc.end_of_program = eop;
7582cd769179345799d383f92dd615991755ec24be1Vadim Girlin
759f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard		} else if (flags & CF_EMIT) {
760f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard			c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
761f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard
762f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard			c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
763f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5Glenn Kennard			c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
7642cd769179345799d383f92dd615991755ec24be1Vadim Girlin		}
7652cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
7662cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7672cd769179345799d383f92dd615991755ec24be1Vadim Girlin	assert(loop_stack.empty());
7682cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
7692cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
7702cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7712cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_loop(cf_node* c) {
7722cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7732cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cf_node *end = cf_map[c->bc.addr - 1];
7742cd769179345799d383f92dd615991755ec24be1Vadim Girlin	assert(end->bc.op == CF_OP_LOOP_END);
7752cd769179345799d383f92dd615991755ec24be1Vadim Girlin	assert(c->parent == end->parent);
7762cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7772cd769179345799d383f92dd615991755ec24be1Vadim Girlin	region_node *reg = sh->create_region();
7782cd769179345799d383f92dd615991755ec24be1Vadim Girlin	repeat_node *rep = sh->create_repeat(reg);
7792cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7802cd769179345799d383f92dd615991755ec24be1Vadim Girlin	reg->push_back(rep);
7812cd769179345799d383f92dd615991755ec24be1Vadim Girlin	c->insert_before(reg);
7822cd769179345799d383f92dd615991755ec24be1Vadim Girlin	rep->move(c, end->next);
7832cd769179345799d383f92dd615991755ec24be1Vadim Girlin
784de0fd375f6de8f3357d05decc4a7dc231c679645Vadim Girlin	reg->src_loop = true;
785de0fd375f6de8f3357d05decc4a7dc231c679645Vadim Girlin
7862cd769179345799d383f92dd615991755ec24be1Vadim Girlin	loop_stack.push(reg);
7872cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
7882cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
7892cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7902cd769179345799d383f92dd615991755ec24be1Vadim Girlinint bc_parser::prepare_if(cf_node* c) {
7912cd769179345799d383f92dd615991755ec24be1Vadim Girlin	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
7922cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7932cd769179345799d383f92dd615991755ec24be1Vadim Girlin	BCP_DUMP(
794ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin		sblog << "parsing JUMP @" << c->bc.id;
795ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin		sblog << "\n";
7962cd769179345799d383f92dd615991755ec24be1Vadim Girlin	);
7972cd769179345799d383f92dd615991755ec24be1Vadim Girlin
7982cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (end->bc.op == CF_OP_ELSE) {
7992cd769179345799d383f92dd615991755ec24be1Vadim Girlin		BCP_DUMP(
800ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin			sblog << "  found ELSE : ";
8012cd769179345799d383f92dd615991755ec24be1Vadim Girlin			dump::dump_op(end);
802ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin			sblog << "\n";
8032cd769179345799d383f92dd615991755ec24be1Vadim Girlin		);
8042cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8052cd769179345799d383f92dd615991755ec24be1Vadim Girlin		c_else = end;
8062cd769179345799d383f92dd615991755ec24be1Vadim Girlin		end = cf_map[c_else->bc.addr];
8072cd769179345799d383f92dd615991755ec24be1Vadim Girlin	} else {
8082cd769179345799d383f92dd615991755ec24be1Vadim Girlin		BCP_DUMP(
809ecde4b07e2208934a17a09d26c43baf314c10a60Vadim Girlin			sblog << "  no else\n";
8102cd769179345799d383f92dd615991755ec24be1Vadim Girlin		);
8112cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8122cd769179345799d383f92dd615991755ec24be1Vadim Girlin		c_else = end;
8132cd769179345799d383f92dd615991755ec24be1Vadim Girlin	}
8142cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8152cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (c_else->parent != c->parent)
8162cd769179345799d383f92dd615991755ec24be1Vadim Girlin		c_else = NULL;
8172cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8182cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (end->parent != c->parent)
8192cd769179345799d383f92dd615991755ec24be1Vadim Girlin		end = NULL;
8202cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8212cd769179345799d383f92dd615991755ec24be1Vadim Girlin	region_node *reg = sh->create_region();
8222cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8232cd769179345799d383f92dd615991755ec24be1Vadim Girlin	depart_node *dep2 = sh->create_depart(reg);
8242cd769179345799d383f92dd615991755ec24be1Vadim Girlin	depart_node *dep = sh->create_depart(reg);
8252cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if_node *n_if = sh->create_if();
8262cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8272cd769179345799d383f92dd615991755ec24be1Vadim Girlin	c->insert_before(reg);
8282cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8292cd769179345799d383f92dd615991755ec24be1Vadim Girlin	if (c_else != end)
8302cd769179345799d383f92dd615991755ec24be1Vadim Girlin		dep->move(c_else, end);
8312cd769179345799d383f92dd615991755ec24be1Vadim Girlin	dep2->move(c, end);
8322cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8332cd769179345799d383f92dd615991755ec24be1Vadim Girlin	reg->push_back(dep);
8342cd769179345799d383f92dd615991755ec24be1Vadim Girlin	dep->push_front(n_if);
8352cd769179345799d383f92dd615991755ec24be1Vadim Girlin	n_if->push_back(dep2);
8362cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8372cd769179345799d383f92dd615991755ec24be1Vadim Girlin	n_if->cond = sh->get_special_value(SV_EXEC_MASK);
8382cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8392cd769179345799d383f92dd615991755ec24be1Vadim Girlin	return 0;
8402cd769179345799d383f92dd615991755ec24be1Vadim Girlin}
8412cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8422cd769179345799d383f92dd615991755ec24be1Vadim Girlin
8432cd769179345799d383f92dd615991755ec24be1Vadim Girlin} // namespace r600_sb
844