r600_shader.c revision 40cc5bfcd70e412289dbb32a1ebca91bf109e1bd
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	for (i = 0; i < 10; i++) {
48		spi_vs_out_id[i] = 0;
49	}
50	for (i = 0; i < 32; i++) {
51		tmp = i << ((i & 3) * 8);
52		spi_vs_out_id[i / 4] |= tmp;
53	}
54	for (i = 0; i < 10; i++) {
55		r600_pipe_state_add_reg(rstate,
56					R_028614_SPI_VS_OUT_ID_0 + i * 4,
57					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58	}
59
60	r600_pipe_state_add_reg(rstate,
61			R_0286C4_SPI_VS_OUT_CONFIG,
62			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63			0xFFFFFFFF, NULL);
64	r600_pipe_state_add_reg(rstate,
65			R_028868_SQ_PGM_RESOURCES_VS,
66			S_028868_NUM_GPRS(rshader->bc.ngpr) |
67			S_028868_STACK_SIZE(rshader->bc.nstack),
68			0xFFFFFFFF, NULL);
69	r600_pipe_state_add_reg(rstate,
70			R_0288A4_SQ_PGM_RESOURCES_FS,
71			0x00000000, 0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_0288DC_SQ_PGM_CF_OFFSET_FS,
77			0x00000000, 0xFFFFFFFF, NULL);
78	r600_pipe_state_add_reg(rstate,
79			R_028858_SQ_PGM_START_VS,
80			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81	r600_pipe_state_add_reg(rstate,
82			R_028894_SQ_PGM_START_FS,
83			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
84
85	r600_pipe_state_add_reg(rstate,
86				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87				0xFFFFFFFF, NULL);
88
89}
90
91int r600_find_vs_semantic_index(struct r600_shader *vs,
92				struct r600_shader *ps, int id)
93{
94	struct r600_shader_io *input = &ps->input[id];
95
96	for (int i = 0; i < vs->noutput; i++) {
97		if (input->name == vs->output[i].name &&
98			input->sid == vs->output[i].sid) {
99			return i - 1;
100		}
101	}
102	return 0;
103}
104
105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106{
107	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108	struct r600_pipe_state *rstate = &shader->rstate;
109	struct r600_shader *rshader = &shader->shader;
110	unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z;
111	boolean have_pos = FALSE, have_face = FALSE;
112
113	/* clear previous register */
114	rstate->nregs = 0;
115
116	for (i = 0; i < rshader->ninput; i++) {
117		tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118		tmp |= S_028644_SEL_CENTROID(1);
119		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
120			have_pos = TRUE;
121		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
122		    rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
123		    rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
124			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
125		}
126		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
127			have_face = TRUE;
128		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
129			rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
130			tmp |= S_028644_PT_SPRITE_TEX(1);
131		}
132		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
133	}
134	for (i = 0; i < rshader->noutput; i++) {
135		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
136			r600_pipe_state_add_reg(rstate,
137						R_02880C_DB_SHADER_CONTROL,
138						S_02880C_Z_EXPORT_ENABLE(1),
139						S_02880C_Z_EXPORT_ENABLE(1), NULL);
140		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
141			r600_pipe_state_add_reg(rstate,
142						R_02880C_DB_SHADER_CONTROL,
143						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
144						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
145	}
146
147	exports_ps = 0;
148	num_cout = 0;
149	for (i = 0; i < rshader->noutput; i++) {
150		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
151			exports_ps |= 1;
152		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
153			num_cout++;
154		}
155	}
156	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
157	if (!exports_ps) {
158		/* always at least export 1 component per pixel */
159		exports_ps = 2;
160	}
161
162	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
163				S_0286CC_PERSP_GRADIENT_ENA(1);
164	spi_input_z = 0;
165	if (have_pos) {
166		spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
167					S_0286CC_BARYC_SAMPLE_CNTL(1);
168		spi_input_z |= 1;
169	}
170	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
171	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL);
172	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
173	r600_pipe_state_add_reg(rstate,
174				R_028840_SQ_PGM_START_PS,
175				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
176	r600_pipe_state_add_reg(rstate,
177				R_028850_SQ_PGM_RESOURCES_PS,
178				S_028868_NUM_GPRS(rshader->bc.ngpr) |
179				S_028868_STACK_SIZE(rshader->bc.nstack),
180				0xFFFFFFFF, NULL);
181	r600_pipe_state_add_reg(rstate,
182				R_028854_SQ_PGM_EXPORTS_PS,
183				exports_ps, 0xFFFFFFFF, NULL);
184	r600_pipe_state_add_reg(rstate,
185				R_0288CC_SQ_PGM_CF_OFFSET_PS,
186				0x00000000, 0xFFFFFFFF, NULL);
187
188	if (rshader->uses_kill) {
189		/* only set some bits here, the other bits are set in the dsa state */
190		r600_pipe_state_add_reg(rstate,
191					R_02880C_DB_SHADER_CONTROL,
192					S_02880C_KILL_ENABLE(1),
193					S_02880C_KILL_ENABLE(1), NULL);
194	}
195	r600_pipe_state_add_reg(rstate,
196				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
197				0xFFFFFFFF, NULL);
198}
199
200static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
201{
202	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
203	struct r600_shader *rshader = &shader->shader;
204	void *ptr;
205
206	/* copy new shader */
207	if (shader->bo == NULL) {
208		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0);
209		if (shader->bo == NULL) {
210			return -ENOMEM;
211		}
212		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
213		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
214		r600_bo_unmap(rctx->radeon, shader->bo);
215	}
216	/* build state */
217	rshader->flat_shade = rctx->flatshade;
218	switch (rshader->processor_type) {
219	case TGSI_PROCESSOR_VERTEX:
220		if (rshader->family >= CHIP_CEDAR) {
221			evergreen_pipe_shader_vs(ctx, shader);
222		} else {
223			r600_pipe_shader_vs(ctx, shader);
224		}
225		break;
226	case TGSI_PROCESSOR_FRAGMENT:
227		if (rshader->family >= CHIP_CEDAR) {
228			evergreen_pipe_shader_ps(ctx, shader);
229		} else {
230			r600_pipe_shader_ps(ctx, shader);
231		}
232		break;
233	default:
234		return -EINVAL;
235	}
236	r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
237	return 0;
238}
239
240static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
241{
242	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
243	struct r600_shader *shader = &rshader->shader;
244	const struct util_format_description *desc;
245	enum pipe_format resource_format[160];
246	unsigned i, nresources = 0;
247	struct r600_bc *bc = &shader->bc;
248	struct r600_bc_cf *cf;
249	struct r600_bc_vtx *vtx;
250
251	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
252		return 0;
253	/* doing a full memcmp fell over the refcount */
254	if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
255	    (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) {
256		return 0;
257	}
258	rshader->vertex_elements = *rctx->vertex_elements;
259	for (i = 0; i < rctx->vertex_elements->count; i++) {
260		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
261	}
262	r600_bo_reference(rctx->radeon, &rshader->bo, NULL);
263	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
264		switch (cf->inst) {
265		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
266		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
267			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
268				desc = util_format_description(resource_format[vtx->buffer_id]);
269				if (desc == NULL) {
270					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
271					return -EINVAL;
272				}
273				vtx->dst_sel_x = desc->swizzle[0];
274				vtx->dst_sel_y = desc->swizzle[1];
275				vtx->dst_sel_z = desc->swizzle[2];
276				vtx->dst_sel_w = desc->swizzle[3];
277			}
278			break;
279		default:
280			break;
281		}
282	}
283	return r600_bc_build(&shader->bc);
284}
285
286int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
287{
288	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
289	int r;
290
291	if (shader == NULL)
292		return -EINVAL;
293	/* there should be enough input */
294	if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
295		R600_ERR("%d resources provided, expecting %d\n",
296			rctx->vertex_elements->count, shader->shader.bc.nresource);
297		return -EINVAL;
298	}
299	r = r600_shader_update(ctx, shader);
300	if (r)
301		return r;
302	return r600_pipe_shader(ctx, shader);
303}
304
305int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
306int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
307{
308	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
309	int r;
310
311//fprintf(stderr, "--------------------------------------------------------------\n");
312//tgsi_dump(tokens, 0);
313	shader->shader.family = r600_get_family(rctx->radeon);
314	r = r600_shader_from_tgsi(tokens, &shader->shader);
315	if (r) {
316		R600_ERR("translation from TGSI failed !\n");
317		return r;
318	}
319	r = r600_bc_build(&shader->shader.bc);
320	if (r) {
321		R600_ERR("building bytecode failed !\n");
322		return r;
323	}
324//fprintf(stderr, "______________________________________________________________\n");
325	return 0;
326}
327
328/*
329 * tgsi -> r600 shader
330 */
331struct r600_shader_tgsi_instruction;
332
333struct r600_shader_ctx {
334	struct tgsi_shader_info			info;
335	struct tgsi_parse_context		parse;
336	const struct tgsi_token			*tokens;
337	unsigned				type;
338	unsigned				file_offset[TGSI_FILE_COUNT];
339	unsigned				temp_reg;
340	struct r600_shader_tgsi_instruction	*inst_info;
341	struct r600_bc				*bc;
342	struct r600_shader			*shader;
343	u32					value[4];
344	u32					*literals;
345	u32					nliterals;
346	u32					max_driver_temp_used;
347};
348
349struct r600_shader_tgsi_instruction {
350	unsigned	tgsi_opcode;
351	unsigned	is_op3;
352	unsigned	r600_opcode;
353	int (*process)(struct r600_shader_ctx *ctx);
354};
355
356static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
357static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
358
359static int tgsi_is_supported(struct r600_shader_ctx *ctx)
360{
361	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
362	int j;
363
364	if (i->Instruction.NumDstRegs > 1) {
365		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
366		return -EINVAL;
367	}
368	if (i->Instruction.Predicate) {
369		R600_ERR("predicate unsupported\n");
370		return -EINVAL;
371	}
372#if 0
373	if (i->Instruction.Label) {
374		R600_ERR("label unsupported\n");
375		return -EINVAL;
376	}
377#endif
378	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
379		if (i->Src[j].Register.Dimension ||
380			i->Src[j].Register.Absolute) {
381			R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
382				 i->Src[j].Register.Dimension,
383				 i->Src[j].Register.Absolute);
384			return -EINVAL;
385		}
386	}
387	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
388		if (i->Dst[j].Register.Dimension) {
389			R600_ERR("unsupported dst (dimension)\n");
390			return -EINVAL;
391		}
392	}
393	return 0;
394}
395
396static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
397{
398	int i, r;
399	struct r600_bc_alu alu;
400
401	for (i = 0; i < 8; i++) {
402		memset(&alu, 0, sizeof(struct r600_bc_alu));
403
404		if (i < 4)
405			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
406		else
407			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
408
409		if ((i > 1) && (i < 6)) {
410			alu.dst.sel = ctx->shader->input[gpr].gpr;
411			alu.dst.write = 1;
412		}
413
414		alu.dst.chan = i % 4;
415		alu.src[0].chan = (1 - (i % 2));
416		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr;
417
418		alu.bank_swizzle_force = SQ_ALU_VEC_210;
419		if ((i % 4) == 3)
420			alu.last = 1;
421		r = r600_bc_add_alu(ctx->bc, &alu);
422		if (r)
423			return r;
424	}
425	return 0;
426}
427
428
429static int tgsi_declaration(struct r600_shader_ctx *ctx)
430{
431	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
432	struct r600_bc_vtx vtx;
433	unsigned i;
434	int r;
435
436	switch (d->Declaration.File) {
437	case TGSI_FILE_INPUT:
438		i = ctx->shader->ninput++;
439		ctx->shader->input[i].name = d->Semantic.Name;
440		ctx->shader->input[i].sid = d->Semantic.Index;
441		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
442		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
443		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
444			/* turn input into fetch */
445			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
446			vtx.inst = 0;
447			vtx.fetch_type = 0;
448			vtx.buffer_id = i;
449			/* register containing the index into the buffer */
450			vtx.src_gpr = 0;
451			vtx.src_sel_x = 0;
452			vtx.mega_fetch_count = 0x1F;
453			vtx.dst_gpr = ctx->shader->input[i].gpr;
454			vtx.dst_sel_x = 0;
455			vtx.dst_sel_y = 1;
456			vtx.dst_sel_z = 2;
457			vtx.dst_sel_w = 3;
458			vtx.use_const_fields = 1;
459			r = r600_bc_add_vtx(ctx->bc, &vtx);
460			if (r)
461				return r;
462		}
463		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
464			/* turn input into interpolate on EG */
465			evergreen_interp_alu(ctx, i);
466		}
467		break;
468	case TGSI_FILE_OUTPUT:
469		i = ctx->shader->noutput++;
470		ctx->shader->output[i].name = d->Semantic.Name;
471		ctx->shader->output[i].sid = d->Semantic.Index;
472		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
473		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
474		break;
475	case TGSI_FILE_CONSTANT:
476	case TGSI_FILE_TEMPORARY:
477	case TGSI_FILE_SAMPLER:
478	case TGSI_FILE_ADDRESS:
479		break;
480	default:
481		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
482		return -EINVAL;
483	}
484	return 0;
485}
486
487static int r600_get_temp(struct r600_shader_ctx *ctx)
488{
489	return ctx->temp_reg + ctx->max_driver_temp_used++;
490}
491
492int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
493{
494	struct tgsi_full_immediate *immediate;
495	struct r600_shader_ctx ctx;
496	struct r600_bc_output output[32];
497	unsigned output_done, noutput;
498	unsigned opcode;
499	int i, r = 0, pos0;
500
501	ctx.bc = &shader->bc;
502	ctx.shader = shader;
503	r = r600_bc_init(ctx.bc, shader->family);
504	if (r)
505		return r;
506	ctx.tokens = tokens;
507	tgsi_scan_shader(tokens, &ctx.info);
508	tgsi_parse_init(&ctx.parse, tokens);
509	ctx.type = ctx.parse.FullHeader.Processor.Processor;
510	shader->processor_type = ctx.type;
511
512	/* register allocations */
513	/* Values [0,127] correspond to GPR[0..127].
514	 * Values [128,159] correspond to constant buffer bank 0
515	 * Values [160,191] correspond to constant buffer bank 1
516	 * Values [256,511] correspond to cfile constants c[0..255].
517	 * Other special values are shown in the list below.
518	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
519	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
520	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
521	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
522	 * 248	SQ_ALU_SRC_0: special constant 0.0.
523	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
524	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
525	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
526	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
527	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
528	 * 254	SQ_ALU_SRC_PV: previous vector result.
529	 * 255	SQ_ALU_SRC_PS: previous scalar result.
530	 */
531	for (i = 0; i < TGSI_FILE_COUNT; i++) {
532		ctx.file_offset[i] = 0;
533	}
534	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
535		ctx.file_offset[TGSI_FILE_INPUT] = 1;
536	}
537	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) {
538		ctx.file_offset[TGSI_FILE_INPUT] = 1;
539	}
540	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
541						ctx.info.file_count[TGSI_FILE_INPUT];
542	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
543						ctx.info.file_count[TGSI_FILE_OUTPUT];
544
545	ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
546
547	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
548	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
549			ctx.info.file_count[TGSI_FILE_TEMPORARY];
550
551	ctx.nliterals = 0;
552	ctx.literals = NULL;
553
554	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
555		tgsi_parse_token(&ctx.parse);
556		switch (ctx.parse.FullToken.Token.Type) {
557		case TGSI_TOKEN_TYPE_IMMEDIATE:
558			immediate = &ctx.parse.FullToken.FullImmediate;
559			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
560			if(ctx.literals == NULL) {
561				r = -ENOMEM;
562				goto out_err;
563			}
564			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
565			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
566			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
567			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
568			ctx.nliterals++;
569			break;
570		case TGSI_TOKEN_TYPE_DECLARATION:
571			r = tgsi_declaration(&ctx);
572			if (r)
573				goto out_err;
574			break;
575		case TGSI_TOKEN_TYPE_INSTRUCTION:
576			r = tgsi_is_supported(&ctx);
577			if (r)
578				goto out_err;
579			ctx.max_driver_temp_used = 0;
580			/* reserve first tmp for everyone */
581			r600_get_temp(&ctx);
582			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
583			if (ctx.bc->chiprev == 2)
584				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
585			else
586				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
587			r = ctx.inst_info->process(&ctx);
588			if (r)
589				goto out_err;
590			r = r600_bc_add_literal(ctx.bc, ctx.value);
591			if (r)
592				goto out_err;
593			break;
594		default:
595			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
596			r = -EINVAL;
597			goto out_err;
598		}
599	}
600	/* export output */
601	noutput = shader->noutput;
602	for (i = 0, pos0 = 0; i < noutput; i++) {
603		memset(&output[i], 0, sizeof(struct r600_bc_output));
604		output[i].gpr = shader->output[i].gpr;
605		output[i].elem_size = 3;
606		output[i].swizzle_x = 0;
607		output[i].swizzle_y = 1;
608		output[i].swizzle_z = 2;
609		output[i].swizzle_w = 3;
610		output[i].barrier = 1;
611		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
612		output[i].array_base = i - pos0;
613		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
614		switch (ctx.type) {
615		case TGSI_PROCESSOR_VERTEX:
616			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
617				output[i].array_base = 60;
618				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
619				/* position doesn't count in array_base */
620				pos0++;
621			}
622			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
623				output[i].array_base = 61;
624				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
625				/* position doesn't count in array_base */
626				pos0++;
627			}
628			break;
629		case TGSI_PROCESSOR_FRAGMENT:
630			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
631				output[i].array_base = shader->output[i].sid;
632				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
633			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
634				output[i].array_base = 61;
635				output[i].swizzle_x = 2;
636				output[i].swizzle_y = 7;
637				output[i].swizzle_z = output[i].swizzle_w = 7;
638				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
639			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
640				output[i].array_base = 61;
641				output[i].swizzle_x = 7;
642				output[i].swizzle_y = 1;
643				output[i].swizzle_z = output[i].swizzle_w = 7;
644				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
645			} else {
646				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
647				r = -EINVAL;
648				goto out_err;
649			}
650			break;
651		default:
652			R600_ERR("unsupported processor type %d\n", ctx.type);
653			r = -EINVAL;
654			goto out_err;
655		}
656	}
657	/* add fake param output for vertex shader if no param is exported */
658	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
659		for (i = 0, pos0 = 0; i < noutput; i++) {
660			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
661				pos0 = 1;
662				break;
663			}
664		}
665		if (!pos0) {
666			memset(&output[i], 0, sizeof(struct r600_bc_output));
667			output[i].gpr = 0;
668			output[i].elem_size = 3;
669			output[i].swizzle_x = 0;
670			output[i].swizzle_y = 1;
671			output[i].swizzle_z = 2;
672			output[i].swizzle_w = 3;
673			output[i].barrier = 1;
674			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
675			output[i].array_base = 0;
676			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
677			noutput++;
678		}
679	}
680	/* add fake pixel export */
681	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
682		memset(&output[0], 0, sizeof(struct r600_bc_output));
683		output[0].gpr = 0;
684		output[0].elem_size = 3;
685		output[0].swizzle_x = 7;
686		output[0].swizzle_y = 7;
687		output[0].swizzle_z = 7;
688		output[0].swizzle_w = 7;
689		output[0].barrier = 1;
690		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
691		output[0].array_base = 0;
692		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
693		noutput++;
694	}
695	/* set export done on last export of each type */
696	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
697		if (i == (noutput - 1)) {
698			output[i].end_of_program = 1;
699		}
700		if (!(output_done & (1 << output[i].type))) {
701			output_done |= (1 << output[i].type);
702			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
703		}
704	}
705	/* add output to bytecode */
706	for (i = 0; i < noutput; i++) {
707		r = r600_bc_add_output(ctx.bc, &output[i]);
708		if (r)
709			goto out_err;
710	}
711	free(ctx.literals);
712	tgsi_parse_free(&ctx.parse);
713	return 0;
714out_err:
715	free(ctx.literals);
716	tgsi_parse_free(&ctx.parse);
717	return r;
718}
719
720static int tgsi_unsupported(struct r600_shader_ctx *ctx)
721{
722	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
723	return -EINVAL;
724}
725
726static int tgsi_end(struct r600_shader_ctx *ctx)
727{
728	return 0;
729}
730
731static int tgsi_src(struct r600_shader_ctx *ctx,
732			const struct tgsi_full_src_register *tgsi_src,
733			struct r600_bc_alu_src *r600_src)
734{
735	int index;
736	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
737	r600_src->sel = tgsi_src->Register.Index;
738	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
739		r600_src->sel = 0;
740		index = tgsi_src->Register.Index;
741		ctx->value[0] = ctx->literals[index * 4 + 0];
742		ctx->value[1] = ctx->literals[index * 4 + 1];
743		ctx->value[2] = ctx->literals[index * 4 + 2];
744		ctx->value[3] = ctx->literals[index * 4 + 3];
745	}
746	if (tgsi_src->Register.Indirect)
747		r600_src->rel = V_SQ_REL_RELATIVE;
748	r600_src->neg = tgsi_src->Register.Negate;
749	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
750	return 0;
751}
752
753static int tgsi_dst(struct r600_shader_ctx *ctx,
754			const struct tgsi_full_dst_register *tgsi_dst,
755			unsigned swizzle,
756			struct r600_bc_alu_dst *r600_dst)
757{
758	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
759
760	r600_dst->sel = tgsi_dst->Register.Index;
761	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
762	r600_dst->chan = swizzle;
763	r600_dst->write = 1;
764	if (tgsi_dst->Register.Indirect)
765		r600_dst->rel = V_SQ_REL_RELATIVE;
766	if (inst->Instruction.Saturate) {
767		r600_dst->clamp = 1;
768	}
769	return 0;
770}
771
772static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
773{
774	switch (swizzle) {
775	case 0:
776		return tgsi_src->Register.SwizzleX;
777	case 1:
778		return tgsi_src->Register.SwizzleY;
779	case 2:
780		return tgsi_src->Register.SwizzleZ;
781	case 3:
782		return tgsi_src->Register.SwizzleW;
783	default:
784		return 0;
785	}
786}
787
788static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
789{
790	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
791	struct r600_bc_alu alu;
792	int i, j, k, nconst, r;
793
794	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
795		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
796			nconst++;
797		}
798		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
799		if (r) {
800			return r;
801		}
802	}
803	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
804		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
805			int treg = r600_get_temp(ctx);
806			for (k = 0; k < 4; k++) {
807				memset(&alu, 0, sizeof(struct r600_bc_alu));
808				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
809				alu.src[0].sel = r600_src[i].sel;
810				alu.src[0].chan = k;
811				alu.src[0].rel = r600_src[i].rel;
812				alu.dst.sel = treg;
813				alu.dst.chan = k;
814				alu.dst.write = 1;
815				if (k == 3)
816					alu.last = 1;
817				r = r600_bc_add_alu(ctx->bc, &alu);
818				if (r)
819					return r;
820			}
821			r600_src[i].sel = treg;
822			r600_src[i].rel =0;
823			j--;
824		}
825	}
826	return 0;
827}
828
829/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
830static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
831{
832	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
833	struct r600_bc_alu alu;
834	int i, j, k, nliteral, r;
835
836	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
837		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
838			nliteral++;
839		}
840	}
841	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
842		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
843			int treg = r600_get_temp(ctx);
844			for (k = 0; k < 4; k++) {
845				memset(&alu, 0, sizeof(struct r600_bc_alu));
846				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
847				alu.src[0].sel = r600_src[i].sel;
848				alu.src[0].chan = k;
849				alu.dst.sel = treg;
850				alu.dst.chan = k;
851				alu.dst.write = 1;
852				if (k == 3)
853					alu.last = 1;
854				r = r600_bc_add_alu(ctx->bc, &alu);
855				if (r)
856					return r;
857			}
858			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
859			if (r)
860				return r;
861			r600_src[i].sel = treg;
862			j--;
863		}
864	}
865	return 0;
866}
867
868static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
869{
870	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
871	struct r600_bc_alu_src r600_src[3];
872	struct r600_bc_alu alu;
873	int i, j, r;
874	int lasti = 0;
875
876	for (i = 0; i < 4; i++) {
877		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
878			lasti = i;
879		}
880	}
881
882	r = tgsi_split_constant(ctx, r600_src);
883	if (r)
884		return r;
885	r = tgsi_split_literal_constant(ctx, r600_src);
886	if (r)
887		return r;
888	for (i = 0; i < lasti + 1; i++) {
889		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
890			continue;
891
892		memset(&alu, 0, sizeof(struct r600_bc_alu));
893		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
894		if (r)
895			return r;
896
897		alu.inst = ctx->inst_info->r600_opcode;
898		if (!swap) {
899			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
900				alu.src[j] = r600_src[j];
901				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
902			}
903		} else {
904			alu.src[0] = r600_src[1];
905			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
906
907			alu.src[1] = r600_src[0];
908			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
909		}
910		/* handle some special cases */
911		switch (ctx->inst_info->tgsi_opcode) {
912		case TGSI_OPCODE_SUB:
913			alu.src[1].neg = 1;
914			break;
915		case TGSI_OPCODE_ABS:
916			alu.src[0].abs = 1;
917			break;
918		default:
919			break;
920		}
921		if (i == lasti) {
922			alu.last = 1;
923		}
924		r = r600_bc_add_alu(ctx->bc, &alu);
925		if (r)
926			return r;
927	}
928	return 0;
929}
930
931static int tgsi_op2(struct r600_shader_ctx *ctx)
932{
933	return tgsi_op2_s(ctx, 0);
934}
935
936static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
937{
938	return tgsi_op2_s(ctx, 1);
939}
940
941/*
942 * r600 - trunc to -PI..PI range
943 * r700 - normalize by dividing by 2PI
944 * see fdo bug 27901
945 */
946static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
947			   struct r600_bc_alu_src r600_src[3])
948{
949	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
950	int r;
951	uint32_t lit_vals[4];
952	struct r600_bc_alu alu;
953
954	memset(lit_vals, 0, 4*4);
955	r = tgsi_split_constant(ctx, r600_src);
956	if (r)
957		return r;
958	r = tgsi_split_literal_constant(ctx, r600_src);
959	if (r)
960		return r;
961
962	r = tgsi_split_literal_constant(ctx, r600_src);
963	if (r)
964		return r;
965
966	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
967	lit_vals[1] = fui(0.5f);
968
969	memset(&alu, 0, sizeof(struct r600_bc_alu));
970	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
971	alu.is_op3 = 1;
972
973	alu.dst.chan = 0;
974	alu.dst.sel = ctx->temp_reg;
975	alu.dst.write = 1;
976
977	alu.src[0] = r600_src[0];
978	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
979
980	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
981	alu.src[1].chan = 0;
982	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
983	alu.src[2].chan = 1;
984	alu.last = 1;
985	r = r600_bc_add_alu(ctx->bc, &alu);
986	if (r)
987		return r;
988	r = r600_bc_add_literal(ctx->bc, lit_vals);
989	if (r)
990		return r;
991
992	memset(&alu, 0, sizeof(struct r600_bc_alu));
993	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
994
995	alu.dst.chan = 0;
996	alu.dst.sel = ctx->temp_reg;
997	alu.dst.write = 1;
998
999	alu.src[0].sel = ctx->temp_reg;
1000	alu.src[0].chan = 0;
1001	alu.last = 1;
1002	r = r600_bc_add_alu(ctx->bc, &alu);
1003	if (r)
1004		return r;
1005
1006	if (ctx->bc->chiprev == 0) {
1007		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1008		lit_vals[1] = fui(-3.1415926535897f);
1009	} else {
1010		lit_vals[0] = fui(1.0f);
1011		lit_vals[1] = fui(-0.5f);
1012	}
1013
1014	memset(&alu, 0, sizeof(struct r600_bc_alu));
1015	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1016	alu.is_op3 = 1;
1017
1018	alu.dst.chan = 0;
1019	alu.dst.sel = ctx->temp_reg;
1020	alu.dst.write = 1;
1021
1022	alu.src[0].sel = ctx->temp_reg;
1023	alu.src[0].chan = 0;
1024
1025	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1026	alu.src[1].chan = 0;
1027	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1028	alu.src[2].chan = 1;
1029	alu.last = 1;
1030	r = r600_bc_add_alu(ctx->bc, &alu);
1031	if (r)
1032		return r;
1033	r = r600_bc_add_literal(ctx->bc, lit_vals);
1034	if (r)
1035		return r;
1036	return 0;
1037}
1038
1039static int tgsi_trig(struct r600_shader_ctx *ctx)
1040{
1041	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1042	struct r600_bc_alu_src r600_src[3];
1043	struct r600_bc_alu alu;
1044	int i, r;
1045	int lasti = 0;
1046
1047	r = tgsi_setup_trig(ctx, r600_src);
1048	if (r)
1049		return r;
1050
1051	memset(&alu, 0, sizeof(struct r600_bc_alu));
1052	alu.inst = ctx->inst_info->r600_opcode;
1053	alu.dst.chan = 0;
1054	alu.dst.sel = ctx->temp_reg;
1055	alu.dst.write = 1;
1056
1057	alu.src[0].sel = ctx->temp_reg;
1058	alu.src[0].chan = 0;
1059	alu.last = 1;
1060	r = r600_bc_add_alu(ctx->bc, &alu);
1061	if (r)
1062		return r;
1063
1064	/* replicate result */
1065	for (i = 0; i < 4; i++) {
1066		if (inst->Dst[0].Register.WriteMask & (1 << i))
1067			lasti = i;
1068	}
1069	for (i = 0; i < lasti + 1; i++) {
1070		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1071			continue;
1072
1073		memset(&alu, 0, sizeof(struct r600_bc_alu));
1074		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1075
1076		alu.src[0].sel = ctx->temp_reg;
1077		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1078		if (r)
1079			return r;
1080		if (i == lasti)
1081			alu.last = 1;
1082		r = r600_bc_add_alu(ctx->bc, &alu);
1083		if (r)
1084			return r;
1085	}
1086	return 0;
1087}
1088
1089static int tgsi_scs(struct r600_shader_ctx *ctx)
1090{
1091	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1092	struct r600_bc_alu_src r600_src[3];
1093	struct r600_bc_alu alu;
1094	int r;
1095
1096	/* We'll only need the trig stuff if we are going to write to the
1097	 * X or Y components of the destination vector.
1098	 */
1099	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1100		r = tgsi_setup_trig(ctx, r600_src);
1101		if (r)
1102			return r;
1103	}
1104
1105	/* dst.x = COS */
1106	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1107		memset(&alu, 0, sizeof(struct r600_bc_alu));
1108		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1109		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1110		if (r)
1111			return r;
1112
1113		alu.src[0].sel = ctx->temp_reg;
1114		alu.src[0].chan = 0;
1115		alu.last = 1;
1116		r = r600_bc_add_alu(ctx->bc, &alu);
1117		if (r)
1118			return r;
1119	}
1120
1121	/* dst.y = SIN */
1122	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1123		memset(&alu, 0, sizeof(struct r600_bc_alu));
1124		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1125		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1126		if (r)
1127			return r;
1128
1129		alu.src[0].sel = ctx->temp_reg;
1130		alu.src[0].chan = 0;
1131		alu.last = 1;
1132		r = r600_bc_add_alu(ctx->bc, &alu);
1133		if (r)
1134			return r;
1135	}
1136
1137	/* dst.z = 0.0; */
1138	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1139		memset(&alu, 0, sizeof(struct r600_bc_alu));
1140
1141		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1142
1143		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1144		if (r)
1145			return r;
1146
1147		alu.src[0].sel = V_SQ_ALU_SRC_0;
1148		alu.src[0].chan = 0;
1149
1150		alu.last = 1;
1151
1152		r = r600_bc_add_alu(ctx->bc, &alu);
1153		if (r)
1154			return r;
1155
1156		r = r600_bc_add_literal(ctx->bc, ctx->value);
1157		if (r)
1158			return r;
1159	}
1160
1161	/* dst.w = 1.0; */
1162	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1163		memset(&alu, 0, sizeof(struct r600_bc_alu));
1164
1165		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1166
1167		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1168		if (r)
1169			return r;
1170
1171		alu.src[0].sel = V_SQ_ALU_SRC_1;
1172		alu.src[0].chan = 0;
1173
1174		alu.last = 1;
1175
1176		r = r600_bc_add_alu(ctx->bc, &alu);
1177		if (r)
1178			return r;
1179
1180		r = r600_bc_add_literal(ctx->bc, ctx->value);
1181		if (r)
1182			return r;
1183	}
1184
1185	return 0;
1186}
1187
1188static int tgsi_kill(struct r600_shader_ctx *ctx)
1189{
1190	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1191	struct r600_bc_alu alu;
1192	int i, r;
1193
1194	for (i = 0; i < 4; i++) {
1195		memset(&alu, 0, sizeof(struct r600_bc_alu));
1196		alu.inst = ctx->inst_info->r600_opcode;
1197
1198		alu.dst.chan = i;
1199
1200		alu.src[0].sel = V_SQ_ALU_SRC_0;
1201
1202		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1203			alu.src[1].sel = V_SQ_ALU_SRC_1;
1204			alu.src[1].neg = 1;
1205		} else {
1206			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1207			if (r)
1208				return r;
1209			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1210		}
1211		if (i == 3) {
1212			alu.last = 1;
1213		}
1214		r = r600_bc_add_alu(ctx->bc, &alu);
1215		if (r)
1216			return r;
1217	}
1218	r = r600_bc_add_literal(ctx->bc, ctx->value);
1219	if (r)
1220		return r;
1221
1222	/* kill must be last in ALU */
1223	ctx->bc->force_add_cf = 1;
1224	ctx->shader->uses_kill = TRUE;
1225	return 0;
1226}
1227
1228static int tgsi_lit(struct r600_shader_ctx *ctx)
1229{
1230	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1231	struct r600_bc_alu alu;
1232	struct r600_bc_alu_src r600_src[3];
1233	int r;
1234
1235	r = tgsi_split_constant(ctx, r600_src);
1236	if (r)
1237		return r;
1238	r = tgsi_split_literal_constant(ctx, r600_src);
1239	if (r)
1240		return r;
1241
1242	/* dst.x, <- 1.0  */
1243	memset(&alu, 0, sizeof(struct r600_bc_alu));
1244	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1245	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1246	alu.src[0].chan = 0;
1247	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1248	if (r)
1249		return r;
1250	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1251	r = r600_bc_add_alu(ctx->bc, &alu);
1252	if (r)
1253		return r;
1254
1255	/* dst.y = max(src.x, 0.0) */
1256	memset(&alu, 0, sizeof(struct r600_bc_alu));
1257	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1258	alu.src[0] = r600_src[0];
1259	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1260	alu.src[1].chan = 0;
1261	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1262	if (r)
1263		return r;
1264	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1265	r = r600_bc_add_alu(ctx->bc, &alu);
1266	if (r)
1267		return r;
1268
1269	/* dst.w, <- 1.0  */
1270	memset(&alu, 0, sizeof(struct r600_bc_alu));
1271	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1272	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1273	alu.src[0].chan = 0;
1274	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1275	if (r)
1276		return r;
1277	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1278	alu.last = 1;
1279	r = r600_bc_add_alu(ctx->bc, &alu);
1280	if (r)
1281		return r;
1282
1283	r = r600_bc_add_literal(ctx->bc, ctx->value);
1284	if (r)
1285		return r;
1286
1287	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1288	{
1289		int chan;
1290		int sel;
1291
1292		/* dst.z = log(src.y) */
1293		memset(&alu, 0, sizeof(struct r600_bc_alu));
1294		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1295		alu.src[0] = r600_src[0];
1296		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1297		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1298		if (r)
1299			return r;
1300		alu.last = 1;
1301		r = r600_bc_add_alu(ctx->bc, &alu);
1302		if (r)
1303			return r;
1304
1305		r = r600_bc_add_literal(ctx->bc, ctx->value);
1306		if (r)
1307			return r;
1308
1309		chan = alu.dst.chan;
1310		sel = alu.dst.sel;
1311
1312		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1313		memset(&alu, 0, sizeof(struct r600_bc_alu));
1314		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1315		alu.src[0] = r600_src[0];
1316		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1317		alu.src[1].sel  = sel;
1318		alu.src[1].chan = chan;
1319
1320		alu.src[2] = r600_src[0];
1321		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1322		alu.dst.sel = ctx->temp_reg;
1323		alu.dst.chan = 0;
1324		alu.dst.write = 1;
1325		alu.is_op3 = 1;
1326		alu.last = 1;
1327		r = r600_bc_add_alu(ctx->bc, &alu);
1328		if (r)
1329			return r;
1330
1331		r = r600_bc_add_literal(ctx->bc, ctx->value);
1332		if (r)
1333			return r;
1334		/* dst.z = exp(tmp.x) */
1335		memset(&alu, 0, sizeof(struct r600_bc_alu));
1336		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1337		alu.src[0].sel = ctx->temp_reg;
1338		alu.src[0].chan = 0;
1339		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1340		if (r)
1341			return r;
1342		alu.last = 1;
1343		r = r600_bc_add_alu(ctx->bc, &alu);
1344		if (r)
1345			return r;
1346	}
1347	return 0;
1348}
1349
1350static int tgsi_rsq(struct r600_shader_ctx *ctx)
1351{
1352	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1353	struct r600_bc_alu alu;
1354	int i, r;
1355
1356	memset(&alu, 0, sizeof(struct r600_bc_alu));
1357
1358	/* FIXME:
1359	 * For state trackers other than OpenGL, we'll want to use
1360	 * _RECIPSQRT_IEEE instead.
1361	 */
1362	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1363
1364	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1365		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1366		if (r)
1367			return r;
1368		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1369		alu.src[i].abs = 1;
1370	}
1371	alu.dst.sel = ctx->temp_reg;
1372	alu.dst.write = 1;
1373	alu.last = 1;
1374	r = r600_bc_add_alu(ctx->bc, &alu);
1375	if (r)
1376		return r;
1377	r = r600_bc_add_literal(ctx->bc, ctx->value);
1378	if (r)
1379		return r;
1380	/* replicate result */
1381	return tgsi_helper_tempx_replicate(ctx);
1382}
1383
1384static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1385{
1386	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1387	struct r600_bc_alu alu;
1388	int i, r;
1389
1390	for (i = 0; i < 4; i++) {
1391		memset(&alu, 0, sizeof(struct r600_bc_alu));
1392		alu.src[0].sel = ctx->temp_reg;
1393		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1394		alu.dst.chan = i;
1395		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1396		if (r)
1397			return r;
1398		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1399		if (i == 3)
1400			alu.last = 1;
1401		r = r600_bc_add_alu(ctx->bc, &alu);
1402		if (r)
1403			return r;
1404	}
1405	return 0;
1406}
1407
1408static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1409{
1410	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1411	struct r600_bc_alu alu;
1412	int i, r;
1413
1414	memset(&alu, 0, sizeof(struct r600_bc_alu));
1415	alu.inst = ctx->inst_info->r600_opcode;
1416	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1417		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1418		if (r)
1419			return r;
1420		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1421	}
1422	alu.dst.sel = ctx->temp_reg;
1423	alu.dst.write = 1;
1424	alu.last = 1;
1425	r = r600_bc_add_alu(ctx->bc, &alu);
1426	if (r)
1427		return r;
1428	r = r600_bc_add_literal(ctx->bc, ctx->value);
1429	if (r)
1430		return r;
1431	/* replicate result */
1432	return tgsi_helper_tempx_replicate(ctx);
1433}
1434
1435static int tgsi_pow(struct r600_shader_ctx *ctx)
1436{
1437	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1438	struct r600_bc_alu alu;
1439	int r;
1440
1441	/* LOG2(a) */
1442	memset(&alu, 0, sizeof(struct r600_bc_alu));
1443	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1444	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1445	if (r)
1446		return r;
1447	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1448	alu.dst.sel = ctx->temp_reg;
1449	alu.dst.write = 1;
1450	alu.last = 1;
1451	r = r600_bc_add_alu(ctx->bc, &alu);
1452	if (r)
1453		return r;
1454	r = r600_bc_add_literal(ctx->bc,ctx->value);
1455	if (r)
1456		return r;
1457	/* b * LOG2(a) */
1458	memset(&alu, 0, sizeof(struct r600_bc_alu));
1459	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1460	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1461	if (r)
1462		return r;
1463	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1464	alu.src[1].sel = ctx->temp_reg;
1465	alu.dst.sel = ctx->temp_reg;
1466	alu.dst.write = 1;
1467	alu.last = 1;
1468	r = r600_bc_add_alu(ctx->bc, &alu);
1469	if (r)
1470		return r;
1471	r = r600_bc_add_literal(ctx->bc,ctx->value);
1472	if (r)
1473		return r;
1474	/* POW(a,b) = EXP2(b * LOG2(a))*/
1475	memset(&alu, 0, sizeof(struct r600_bc_alu));
1476	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1477	alu.src[0].sel = ctx->temp_reg;
1478	alu.dst.sel = ctx->temp_reg;
1479	alu.dst.write = 1;
1480	alu.last = 1;
1481	r = r600_bc_add_alu(ctx->bc, &alu);
1482	if (r)
1483		return r;
1484	r = r600_bc_add_literal(ctx->bc,ctx->value);
1485	if (r)
1486		return r;
1487	return tgsi_helper_tempx_replicate(ctx);
1488}
1489
1490static int tgsi_ssg(struct r600_shader_ctx *ctx)
1491{
1492	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1493	struct r600_bc_alu alu;
1494	struct r600_bc_alu_src r600_src[3];
1495	int i, r;
1496
1497	r = tgsi_split_constant(ctx, r600_src);
1498	if (r)
1499		return r;
1500	r = tgsi_split_literal_constant(ctx, r600_src);
1501	if (r)
1502		return r;
1503
1504	/* tmp = (src > 0 ? 1 : src) */
1505	for (i = 0; i < 4; i++) {
1506		memset(&alu, 0, sizeof(struct r600_bc_alu));
1507		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1508		alu.is_op3 = 1;
1509
1510		alu.dst.sel = ctx->temp_reg;
1511		alu.dst.chan = i;
1512
1513		alu.src[0] = r600_src[0];
1514		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1515
1516		alu.src[1].sel = V_SQ_ALU_SRC_1;
1517
1518		alu.src[2] = r600_src[0];
1519		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1520		if (i == 3)
1521			alu.last = 1;
1522		r = r600_bc_add_alu(ctx->bc, &alu);
1523		if (r)
1524			return r;
1525	}
1526	r = r600_bc_add_literal(ctx->bc, ctx->value);
1527	if (r)
1528		return r;
1529
1530	/* dst = (-tmp > 0 ? -1 : tmp) */
1531	for (i = 0; i < 4; i++) {
1532		memset(&alu, 0, sizeof(struct r600_bc_alu));
1533		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1534		alu.is_op3 = 1;
1535		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1536		if (r)
1537			return r;
1538
1539		alu.src[0].sel = ctx->temp_reg;
1540		alu.src[0].chan = i;
1541		alu.src[0].neg = 1;
1542
1543		alu.src[1].sel = V_SQ_ALU_SRC_1;
1544		alu.src[1].neg = 1;
1545
1546		alu.src[2].sel = ctx->temp_reg;
1547		alu.src[2].chan = i;
1548
1549		if (i == 3)
1550			alu.last = 1;
1551		r = r600_bc_add_alu(ctx->bc, &alu);
1552		if (r)
1553			return r;
1554	}
1555	return 0;
1556}
1557
1558static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1559{
1560	struct r600_bc_alu alu;
1561	int i, r;
1562
1563	r = r600_bc_add_literal(ctx->bc, ctx->value);
1564	if (r)
1565		return r;
1566	for (i = 0; i < 4; i++) {
1567		memset(&alu, 0, sizeof(struct r600_bc_alu));
1568		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1569			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1570			alu.dst.chan = i;
1571		} else {
1572			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1573			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1574			if (r)
1575				return r;
1576			alu.src[0].sel = ctx->temp_reg;
1577			alu.src[0].chan = i;
1578		}
1579		if (i == 3) {
1580			alu.last = 1;
1581		}
1582		r = r600_bc_add_alu(ctx->bc, &alu);
1583		if (r)
1584			return r;
1585	}
1586	return 0;
1587}
1588
1589static int tgsi_op3(struct r600_shader_ctx *ctx)
1590{
1591	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1592	struct r600_bc_alu_src r600_src[3];
1593	struct r600_bc_alu alu;
1594	int i, j, r;
1595
1596	r = tgsi_split_constant(ctx, r600_src);
1597	if (r)
1598		return r;
1599	r = tgsi_split_literal_constant(ctx, r600_src);
1600	if (r)
1601		return r;
1602	/* do it in 2 step as op3 doesn't support writemask */
1603	for (i = 0; i < 4; i++) {
1604		memset(&alu, 0, sizeof(struct r600_bc_alu));
1605		alu.inst = ctx->inst_info->r600_opcode;
1606		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1607			alu.src[j] = r600_src[j];
1608			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1609		}
1610		alu.dst.sel = ctx->temp_reg;
1611		alu.dst.chan = i;
1612		alu.dst.write = 1;
1613		alu.is_op3 = 1;
1614		if (i == 3) {
1615			alu.last = 1;
1616		}
1617		r = r600_bc_add_alu(ctx->bc, &alu);
1618		if (r)
1619			return r;
1620	}
1621	return tgsi_helper_copy(ctx, inst);
1622}
1623
1624static int tgsi_dp(struct r600_shader_ctx *ctx)
1625{
1626	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1627	struct r600_bc_alu_src r600_src[3];
1628	struct r600_bc_alu alu;
1629	int i, j, r;
1630
1631	r = tgsi_split_constant(ctx, r600_src);
1632	if (r)
1633		return r;
1634	r = tgsi_split_literal_constant(ctx, r600_src);
1635	if (r)
1636		return r;
1637	for (i = 0; i < 4; i++) {
1638		memset(&alu, 0, sizeof(struct r600_bc_alu));
1639		alu.inst = ctx->inst_info->r600_opcode;
1640		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1641			alu.src[j] = r600_src[j];
1642			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1643		}
1644		alu.dst.sel = ctx->temp_reg;
1645		alu.dst.chan = i;
1646		alu.dst.write = 1;
1647		/* handle some special cases */
1648		switch (ctx->inst_info->tgsi_opcode) {
1649		case TGSI_OPCODE_DP2:
1650			if (i > 1) {
1651				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1652				alu.src[0].chan = alu.src[1].chan = 0;
1653			}
1654			break;
1655		case TGSI_OPCODE_DP3:
1656			if (i > 2) {
1657				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1658				alu.src[0].chan = alu.src[1].chan = 0;
1659			}
1660			break;
1661		case TGSI_OPCODE_DPH:
1662			if (i == 3) {
1663				alu.src[0].sel = V_SQ_ALU_SRC_1;
1664				alu.src[0].chan = 0;
1665				alu.src[0].neg = 0;
1666			}
1667			break;
1668		default:
1669			break;
1670		}
1671		if (i == 3) {
1672			alu.last = 1;
1673		}
1674		r = r600_bc_add_alu(ctx->bc, &alu);
1675		if (r)
1676			return r;
1677	}
1678	return tgsi_helper_copy(ctx, inst);
1679}
1680
1681static int tgsi_tex(struct r600_shader_ctx *ctx)
1682{
1683	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1684	struct r600_bc_tex tex;
1685	struct r600_bc_alu alu;
1686	unsigned src_gpr;
1687	int r, i;
1688	int opcode;
1689	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1690	uint32_t lit_vals[4];
1691
1692	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1693
1694	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1695		/* Add perspective divide */
1696		memset(&alu, 0, sizeof(struct r600_bc_alu));
1697		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1698		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1699		if (r)
1700			return r;
1701
1702		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1703		alu.dst.sel = ctx->temp_reg;
1704		alu.dst.chan = 3;
1705		alu.last = 1;
1706		alu.dst.write = 1;
1707		r = r600_bc_add_alu(ctx->bc, &alu);
1708		if (r)
1709			return r;
1710
1711		for (i = 0; i < 3; i++) {
1712			memset(&alu, 0, sizeof(struct r600_bc_alu));
1713			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1714			alu.src[0].sel = ctx->temp_reg;
1715			alu.src[0].chan = 3;
1716			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1717			if (r)
1718				return r;
1719			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1720			alu.dst.sel = ctx->temp_reg;
1721			alu.dst.chan = i;
1722			alu.dst.write = 1;
1723			r = r600_bc_add_alu(ctx->bc, &alu);
1724			if (r)
1725				return r;
1726		}
1727		memset(&alu, 0, sizeof(struct r600_bc_alu));
1728		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1729		alu.src[0].sel = V_SQ_ALU_SRC_1;
1730		alu.src[0].chan = 0;
1731		alu.dst.sel = ctx->temp_reg;
1732		alu.dst.chan = 3;
1733		alu.last = 1;
1734		alu.dst.write = 1;
1735		r = r600_bc_add_alu(ctx->bc, &alu);
1736		if (r)
1737			return r;
1738		src_not_temp = FALSE;
1739		src_gpr = ctx->temp_reg;
1740	}
1741
1742	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1743		int src_chan, src2_chan;
1744
1745		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1746		for (i = 0; i < 4; i++) {
1747			memset(&alu, 0, sizeof(struct r600_bc_alu));
1748			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1749			switch (i) {
1750			case 0:
1751				src_chan = 2;
1752				src2_chan = 1;
1753				break;
1754			case 1:
1755				src_chan = 2;
1756				src2_chan = 0;
1757				break;
1758			case 2:
1759				src_chan = 0;
1760				src2_chan = 2;
1761				break;
1762			case 3:
1763				src_chan = 1;
1764				src2_chan = 2;
1765				break;
1766			default:
1767				assert(0);
1768				src_chan = 0;
1769				src2_chan = 0;
1770				break;
1771			}
1772			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1773			if (r)
1774				return r;
1775			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1776			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1777			if (r)
1778				return r;
1779			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1780			alu.dst.sel = ctx->temp_reg;
1781			alu.dst.chan = i;
1782			if (i == 3)
1783				alu.last = 1;
1784			alu.dst.write = 1;
1785			r = r600_bc_add_alu(ctx->bc, &alu);
1786			if (r)
1787				return r;
1788		}
1789
1790		/* tmp1.z = RCP_e(|tmp1.z|) */
1791		memset(&alu, 0, sizeof(struct r600_bc_alu));
1792		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1793		alu.src[0].sel = ctx->temp_reg;
1794		alu.src[0].chan = 2;
1795		alu.src[0].abs = 1;
1796		alu.dst.sel = ctx->temp_reg;
1797		alu.dst.chan = 2;
1798		alu.dst.write = 1;
1799		alu.last = 1;
1800		r = r600_bc_add_alu(ctx->bc, &alu);
1801		if (r)
1802			return r;
1803
1804		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1805		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1806		 * muladd has no writemask, have to use another temp
1807		 */
1808		memset(&alu, 0, sizeof(struct r600_bc_alu));
1809		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1810		alu.is_op3 = 1;
1811
1812		alu.src[0].sel = ctx->temp_reg;
1813		alu.src[0].chan = 0;
1814		alu.src[1].sel = ctx->temp_reg;
1815		alu.src[1].chan = 2;
1816
1817		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1818		alu.src[2].chan = 0;
1819
1820		alu.dst.sel = ctx->temp_reg;
1821		alu.dst.chan = 0;
1822		alu.dst.write = 1;
1823
1824		r = r600_bc_add_alu(ctx->bc, &alu);
1825		if (r)
1826			return r;
1827
1828		memset(&alu, 0, sizeof(struct r600_bc_alu));
1829		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1830		alu.is_op3 = 1;
1831
1832		alu.src[0].sel = ctx->temp_reg;
1833		alu.src[0].chan = 1;
1834		alu.src[1].sel = ctx->temp_reg;
1835		alu.src[1].chan = 2;
1836
1837		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1838		alu.src[2].chan = 0;
1839
1840		alu.dst.sel = ctx->temp_reg;
1841		alu.dst.chan = 1;
1842		alu.dst.write = 1;
1843
1844		alu.last = 1;
1845		r = r600_bc_add_alu(ctx->bc, &alu);
1846		if (r)
1847			return r;
1848
1849		lit_vals[0] = fui(1.5f);
1850
1851		r = r600_bc_add_literal(ctx->bc, lit_vals);
1852		if (r)
1853			return r;
1854		src_not_temp = FALSE;
1855		src_gpr = ctx->temp_reg;
1856	}
1857
1858	if (src_not_temp) {
1859		for (i = 0; i < 4; i++) {
1860			memset(&alu, 0, sizeof(struct r600_bc_alu));
1861			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1862			alu.src[0].sel = src_gpr;
1863			alu.src[0].chan = i;
1864			alu.dst.sel = ctx->temp_reg;
1865			alu.dst.chan = i;
1866			if (i == 3)
1867				alu.last = 1;
1868			alu.dst.write = 1;
1869			r = r600_bc_add_alu(ctx->bc, &alu);
1870			if (r)
1871				return r;
1872		}
1873		src_gpr = ctx->temp_reg;
1874	}
1875
1876	opcode = ctx->inst_info->r600_opcode;
1877	if (opcode == SQ_TEX_INST_SAMPLE &&
1878	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1879		opcode = SQ_TEX_INST_SAMPLE_C;
1880
1881	memset(&tex, 0, sizeof(struct r600_bc_tex));
1882	tex.inst = opcode;
1883	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1884	tex.resource_id = tex.sampler_id;
1885	if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX)
1886		tex.resource_id += PIPE_MAX_ATTRIBS;
1887	tex.src_gpr = src_gpr;
1888	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1889	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1890	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1891	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1892	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1893	tex.src_sel_x = 0;
1894	tex.src_sel_y = 1;
1895	tex.src_sel_z = 2;
1896	tex.src_sel_w = 3;
1897
1898	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1899		tex.src_sel_x = 1;
1900		tex.src_sel_y = 0;
1901		tex.src_sel_z = 3;
1902		tex.src_sel_w = 1;
1903	}
1904
1905	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1906		tex.coord_type_x = 1;
1907		tex.coord_type_y = 1;
1908		tex.coord_type_z = 1;
1909		tex.coord_type_w = 1;
1910	}
1911
1912	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1913		tex.src_sel_w = 2;
1914
1915	r = r600_bc_add_tex(ctx->bc, &tex);
1916	if (r)
1917		return r;
1918
1919	/* add shadow ambient support  - gallium doesn't do it yet */
1920	return 0;
1921
1922}
1923
1924static int tgsi_lrp(struct r600_shader_ctx *ctx)
1925{
1926	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1927	struct r600_bc_alu_src r600_src[3];
1928	struct r600_bc_alu alu;
1929	unsigned i;
1930	int r;
1931
1932	r = tgsi_split_constant(ctx, r600_src);
1933	if (r)
1934		return r;
1935	r = tgsi_split_literal_constant(ctx, r600_src);
1936	if (r)
1937		return r;
1938	/* 1 - src0 */
1939	for (i = 0; i < 4; i++) {
1940		memset(&alu, 0, sizeof(struct r600_bc_alu));
1941		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1942		alu.src[0].sel = V_SQ_ALU_SRC_1;
1943		alu.src[0].chan = 0;
1944		alu.src[1] = r600_src[0];
1945		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1946		alu.src[1].neg = 1;
1947		alu.dst.sel = ctx->temp_reg;
1948		alu.dst.chan = i;
1949		if (i == 3) {
1950			alu.last = 1;
1951		}
1952		alu.dst.write = 1;
1953		r = r600_bc_add_alu(ctx->bc, &alu);
1954		if (r)
1955			return r;
1956	}
1957	r = r600_bc_add_literal(ctx->bc, ctx->value);
1958	if (r)
1959		return r;
1960
1961	/* (1 - src0) * src2 */
1962	for (i = 0; i < 4; i++) {
1963		memset(&alu, 0, sizeof(struct r600_bc_alu));
1964		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1965		alu.src[0].sel = ctx->temp_reg;
1966		alu.src[0].chan = i;
1967		alu.src[1] = r600_src[2];
1968		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1969		alu.dst.sel = ctx->temp_reg;
1970		alu.dst.chan = i;
1971		if (i == 3) {
1972			alu.last = 1;
1973		}
1974		alu.dst.write = 1;
1975		r = r600_bc_add_alu(ctx->bc, &alu);
1976		if (r)
1977			return r;
1978	}
1979	r = r600_bc_add_literal(ctx->bc, ctx->value);
1980	if (r)
1981		return r;
1982
1983	/* src0 * src1 + (1 - src0) * src2 */
1984	for (i = 0; i < 4; i++) {
1985		memset(&alu, 0, sizeof(struct r600_bc_alu));
1986		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1987		alu.is_op3 = 1;
1988		alu.src[0] = r600_src[0];
1989		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1990		alu.src[1] = r600_src[1];
1991		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1992		alu.src[2].sel = ctx->temp_reg;
1993		alu.src[2].chan = i;
1994		alu.dst.sel = ctx->temp_reg;
1995		alu.dst.chan = i;
1996		if (i == 3) {
1997			alu.last = 1;
1998		}
1999		r = r600_bc_add_alu(ctx->bc, &alu);
2000		if (r)
2001			return r;
2002	}
2003	return tgsi_helper_copy(ctx, inst);
2004}
2005
2006static int tgsi_cmp(struct r600_shader_ctx *ctx)
2007{
2008	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2009	struct r600_bc_alu_src r600_src[3];
2010	struct r600_bc_alu alu;
2011	int use_temp = 0;
2012	int i, r;
2013
2014	r = tgsi_split_constant(ctx, r600_src);
2015	if (r)
2016		return r;
2017	r = tgsi_split_literal_constant(ctx, r600_src);
2018	if (r)
2019		return r;
2020
2021	if (inst->Dst[0].Register.WriteMask != 0xf)
2022		use_temp = 1;
2023
2024	for (i = 0; i < 4; i++) {
2025		memset(&alu, 0, sizeof(struct r600_bc_alu));
2026		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2027		alu.src[0] = r600_src[0];
2028		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2029
2030		alu.src[1] = r600_src[2];
2031		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2032
2033		alu.src[2] = r600_src[1];
2034		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2035
2036		if (use_temp)
2037			alu.dst.sel = ctx->temp_reg;
2038		else {
2039			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2040			if (r)
2041				return r;
2042		}
2043		alu.dst.chan = i;
2044		alu.dst.write = 1;
2045		alu.is_op3 = 1;
2046		if (i == 3)
2047			alu.last = 1;
2048		r = r600_bc_add_alu(ctx->bc, &alu);
2049		if (r)
2050			return r;
2051	}
2052	if (use_temp)
2053		return tgsi_helper_copy(ctx, inst);
2054	return 0;
2055}
2056
2057static int tgsi_xpd(struct r600_shader_ctx *ctx)
2058{
2059	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2060	struct r600_bc_alu_src r600_src[3];
2061	struct r600_bc_alu alu;
2062	uint32_t use_temp = 0;
2063	int i, r;
2064
2065	if (inst->Dst[0].Register.WriteMask != 0xf)
2066		use_temp = 1;
2067
2068	r = tgsi_split_constant(ctx, r600_src);
2069	if (r)
2070		return r;
2071	r = tgsi_split_literal_constant(ctx, r600_src);
2072	if (r)
2073		return r;
2074
2075	for (i = 0; i < 4; i++) {
2076		memset(&alu, 0, sizeof(struct r600_bc_alu));
2077		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2078
2079		alu.src[0] = r600_src[0];
2080		switch (i) {
2081		case 0:
2082			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2083			break;
2084		case 1:
2085			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2086			break;
2087		case 2:
2088			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2089			break;
2090		case 3:
2091			alu.src[0].sel = V_SQ_ALU_SRC_0;
2092			alu.src[0].chan = i;
2093		}
2094
2095		alu.src[1] = r600_src[1];
2096		switch (i) {
2097		case 0:
2098			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2099			break;
2100		case 1:
2101			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2102			break;
2103		case 2:
2104			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2105			break;
2106		case 3:
2107			alu.src[1].sel = V_SQ_ALU_SRC_0;
2108			alu.src[1].chan = i;
2109		}
2110
2111		alu.dst.sel = ctx->temp_reg;
2112		alu.dst.chan = i;
2113		alu.dst.write = 1;
2114
2115		if (i == 3)
2116			alu.last = 1;
2117		r = r600_bc_add_alu(ctx->bc, &alu);
2118		if (r)
2119			return r;
2120
2121		r = r600_bc_add_literal(ctx->bc, ctx->value);
2122		if (r)
2123			return r;
2124	}
2125
2126	for (i = 0; i < 4; i++) {
2127		memset(&alu, 0, sizeof(struct r600_bc_alu));
2128		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2129
2130		alu.src[0] = r600_src[0];
2131		switch (i) {
2132		case 0:
2133			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2134			break;
2135		case 1:
2136			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2137			break;
2138		case 2:
2139			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2140			break;
2141		case 3:
2142			alu.src[0].sel = V_SQ_ALU_SRC_0;
2143			alu.src[0].chan = i;
2144		}
2145
2146		alu.src[1] = r600_src[1];
2147		switch (i) {
2148		case 0:
2149			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2150			break;
2151		case 1:
2152			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2153			break;
2154		case 2:
2155			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2156			break;
2157		case 3:
2158			alu.src[1].sel = V_SQ_ALU_SRC_0;
2159			alu.src[1].chan = i;
2160		}
2161
2162		alu.src[2].sel = ctx->temp_reg;
2163		alu.src[2].neg = 1;
2164		alu.src[2].chan = i;
2165
2166		if (use_temp)
2167			alu.dst.sel = ctx->temp_reg;
2168		else {
2169			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2170			if (r)
2171				return r;
2172		}
2173		alu.dst.chan = i;
2174		alu.dst.write = 1;
2175		alu.is_op3 = 1;
2176		if (i == 3)
2177			alu.last = 1;
2178		r = r600_bc_add_alu(ctx->bc, &alu);
2179		if (r)
2180			return r;
2181
2182		r = r600_bc_add_literal(ctx->bc, ctx->value);
2183		if (r)
2184			return r;
2185	}
2186	if (use_temp)
2187		return tgsi_helper_copy(ctx, inst);
2188	return 0;
2189}
2190
2191static int tgsi_exp(struct r600_shader_ctx *ctx)
2192{
2193	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2194	struct r600_bc_alu_src r600_src[3];
2195	struct r600_bc_alu alu;
2196	int r;
2197
2198	/* result.x = 2^floor(src); */
2199	if (inst->Dst[0].Register.WriteMask & 1) {
2200		memset(&alu, 0, sizeof(struct r600_bc_alu));
2201
2202		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2203		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2204		if (r)
2205			return r;
2206
2207		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2208
2209		alu.dst.sel = ctx->temp_reg;
2210		alu.dst.chan = 0;
2211		alu.dst.write = 1;
2212		alu.last = 1;
2213		r = r600_bc_add_alu(ctx->bc, &alu);
2214		if (r)
2215			return r;
2216
2217		r = r600_bc_add_literal(ctx->bc, ctx->value);
2218		if (r)
2219			return r;
2220
2221		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2222		alu.src[0].sel = ctx->temp_reg;
2223		alu.src[0].chan = 0;
2224
2225		alu.dst.sel = ctx->temp_reg;
2226		alu.dst.chan = 0;
2227		alu.dst.write = 1;
2228		alu.last = 1;
2229		r = r600_bc_add_alu(ctx->bc, &alu);
2230		if (r)
2231			return r;
2232
2233		r = r600_bc_add_literal(ctx->bc, ctx->value);
2234		if (r)
2235			return r;
2236	}
2237
2238	/* result.y = tmp - floor(tmp); */
2239	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2240		memset(&alu, 0, sizeof(struct r600_bc_alu));
2241
2242		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2243		alu.src[0] = r600_src[0];
2244		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2245		if (r)
2246			return r;
2247		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2248
2249		alu.dst.sel = ctx->temp_reg;
2250//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2251//		if (r)
2252//			return r;
2253		alu.dst.write = 1;
2254		alu.dst.chan = 1;
2255
2256		alu.last = 1;
2257
2258		r = r600_bc_add_alu(ctx->bc, &alu);
2259		if (r)
2260			return r;
2261		r = r600_bc_add_literal(ctx->bc, ctx->value);
2262		if (r)
2263			return r;
2264	}
2265
2266	/* result.z = RoughApprox2ToX(tmp);*/
2267	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2268		memset(&alu, 0, sizeof(struct r600_bc_alu));
2269		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2270		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2271		if (r)
2272			return r;
2273		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2274
2275		alu.dst.sel = ctx->temp_reg;
2276		alu.dst.write = 1;
2277		alu.dst.chan = 2;
2278
2279		alu.last = 1;
2280
2281		r = r600_bc_add_alu(ctx->bc, &alu);
2282		if (r)
2283			return r;
2284		r = r600_bc_add_literal(ctx->bc, ctx->value);
2285		if (r)
2286			return r;
2287	}
2288
2289	/* result.w = 1.0;*/
2290	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2291		memset(&alu, 0, sizeof(struct r600_bc_alu));
2292
2293		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2294		alu.src[0].sel = V_SQ_ALU_SRC_1;
2295		alu.src[0].chan = 0;
2296
2297		alu.dst.sel = ctx->temp_reg;
2298		alu.dst.chan = 3;
2299		alu.dst.write = 1;
2300		alu.last = 1;
2301		r = r600_bc_add_alu(ctx->bc, &alu);
2302		if (r)
2303			return r;
2304		r = r600_bc_add_literal(ctx->bc, ctx->value);
2305		if (r)
2306			return r;
2307	}
2308	return tgsi_helper_copy(ctx, inst);
2309}
2310
2311static int tgsi_log(struct r600_shader_ctx *ctx)
2312{
2313	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2314	struct r600_bc_alu alu;
2315	int r;
2316
2317	/* result.x = floor(log2(src)); */
2318	if (inst->Dst[0].Register.WriteMask & 1) {
2319		memset(&alu, 0, sizeof(struct r600_bc_alu));
2320
2321		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2322		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2323		if (r)
2324			return r;
2325
2326		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2327
2328		alu.dst.sel = ctx->temp_reg;
2329		alu.dst.chan = 0;
2330		alu.dst.write = 1;
2331		alu.last = 1;
2332		r = r600_bc_add_alu(ctx->bc, &alu);
2333		if (r)
2334			return r;
2335
2336		r = r600_bc_add_literal(ctx->bc, ctx->value);
2337		if (r)
2338			return r;
2339
2340		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2341		alu.src[0].sel = ctx->temp_reg;
2342		alu.src[0].chan = 0;
2343
2344		alu.dst.sel = ctx->temp_reg;
2345		alu.dst.chan = 0;
2346		alu.dst.write = 1;
2347		alu.last = 1;
2348
2349		r = r600_bc_add_alu(ctx->bc, &alu);
2350		if (r)
2351			return r;
2352
2353		r = r600_bc_add_literal(ctx->bc, ctx->value);
2354		if (r)
2355			return r;
2356	}
2357
2358	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2359	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2360		memset(&alu, 0, sizeof(struct r600_bc_alu));
2361
2362		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2363		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2364		if (r)
2365			return r;
2366
2367		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2368
2369		alu.dst.sel = ctx->temp_reg;
2370		alu.dst.chan = 1;
2371		alu.dst.write = 1;
2372		alu.last = 1;
2373
2374		r = r600_bc_add_alu(ctx->bc, &alu);
2375		if (r)
2376			return r;
2377
2378		r = r600_bc_add_literal(ctx->bc, ctx->value);
2379		if (r)
2380			return r;
2381
2382		memset(&alu, 0, sizeof(struct r600_bc_alu));
2383
2384		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2385		alu.src[0].sel = ctx->temp_reg;
2386		alu.src[0].chan = 1;
2387
2388		alu.dst.sel = ctx->temp_reg;
2389		alu.dst.chan = 1;
2390		alu.dst.write = 1;
2391		alu.last = 1;
2392
2393		r = r600_bc_add_alu(ctx->bc, &alu);
2394		if (r)
2395			return r;
2396
2397		r = r600_bc_add_literal(ctx->bc, ctx->value);
2398		if (r)
2399			return r;
2400
2401		memset(&alu, 0, sizeof(struct r600_bc_alu));
2402
2403		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2404		alu.src[0].sel = ctx->temp_reg;
2405		alu.src[0].chan = 1;
2406
2407		alu.dst.sel = ctx->temp_reg;
2408		alu.dst.chan = 1;
2409		alu.dst.write = 1;
2410		alu.last = 1;
2411
2412		r = r600_bc_add_alu(ctx->bc, &alu);
2413		if (r)
2414			return r;
2415
2416		r = r600_bc_add_literal(ctx->bc, ctx->value);
2417		if (r)
2418			return r;
2419
2420		memset(&alu, 0, sizeof(struct r600_bc_alu));
2421
2422		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2423		alu.src[0].sel = ctx->temp_reg;
2424		alu.src[0].chan = 1;
2425
2426		alu.dst.sel = ctx->temp_reg;
2427		alu.dst.chan = 1;
2428		alu.dst.write = 1;
2429		alu.last = 1;
2430
2431		r = r600_bc_add_alu(ctx->bc, &alu);
2432		if (r)
2433			return r;
2434
2435		r = r600_bc_add_literal(ctx->bc, ctx->value);
2436		if (r)
2437			return r;
2438
2439		memset(&alu, 0, sizeof(struct r600_bc_alu));
2440
2441		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2442
2443		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2444		if (r)
2445			return r;
2446
2447		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2448
2449		alu.src[1].sel = ctx->temp_reg;
2450		alu.src[1].chan = 1;
2451
2452		alu.dst.sel = ctx->temp_reg;
2453		alu.dst.chan = 1;
2454		alu.dst.write = 1;
2455		alu.last = 1;
2456
2457		r = r600_bc_add_alu(ctx->bc, &alu);
2458		if (r)
2459			return r;
2460
2461		r = r600_bc_add_literal(ctx->bc, ctx->value);
2462		if (r)
2463			return r;
2464	}
2465
2466	/* result.z = log2(src);*/
2467	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2468		memset(&alu, 0, sizeof(struct r600_bc_alu));
2469
2470		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2471		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2472		if (r)
2473			return r;
2474
2475		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2476
2477		alu.dst.sel = ctx->temp_reg;
2478		alu.dst.write = 1;
2479		alu.dst.chan = 2;
2480		alu.last = 1;
2481
2482		r = r600_bc_add_alu(ctx->bc, &alu);
2483		if (r)
2484			return r;
2485
2486		r = r600_bc_add_literal(ctx->bc, ctx->value);
2487		if (r)
2488			return r;
2489	}
2490
2491	/* result.w = 1.0; */
2492	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2493		memset(&alu, 0, sizeof(struct r600_bc_alu));
2494
2495		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2496		alu.src[0].sel = V_SQ_ALU_SRC_1;
2497		alu.src[0].chan = 0;
2498
2499		alu.dst.sel = ctx->temp_reg;
2500		alu.dst.chan = 3;
2501		alu.dst.write = 1;
2502		alu.last = 1;
2503
2504		r = r600_bc_add_alu(ctx->bc, &alu);
2505		if (r)
2506			return r;
2507
2508		r = r600_bc_add_literal(ctx->bc, ctx->value);
2509		if (r)
2510			return r;
2511	}
2512
2513	return tgsi_helper_copy(ctx, inst);
2514}
2515
2516/* r6/7 only for now */
2517static int tgsi_arl(struct r600_shader_ctx *ctx)
2518{
2519	/* TODO from r600c, ar values don't persist between clauses */
2520	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2521	struct r600_bc_alu alu;
2522	int r;
2523	memset(&alu, 0, sizeof(struct r600_bc_alu));
2524
2525	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2526
2527	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2528	if (r)
2529		return r;
2530	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2531
2532	alu.last = 1;
2533
2534	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2535	if (r)
2536		return r;
2537	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2538	return 0;
2539}
2540
2541static int tgsi_opdst(struct r600_shader_ctx *ctx)
2542{
2543	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2544	struct r600_bc_alu alu;
2545	int i, r = 0;
2546
2547	for (i = 0; i < 4; i++) {
2548		memset(&alu, 0, sizeof(struct r600_bc_alu));
2549
2550		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2551		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2552		if (r)
2553			return r;
2554
2555	        if (i == 0 || i == 3) {
2556			alu.src[0].sel = V_SQ_ALU_SRC_1;
2557		} else {
2558			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2559			if (r)
2560				return r;
2561			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2562		}
2563
2564	        if (i == 0 || i == 2) {
2565			alu.src[1].sel = V_SQ_ALU_SRC_1;
2566		} else {
2567			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2568			if (r)
2569				return r;
2570			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2571		}
2572		if (i == 3)
2573			alu.last = 1;
2574		r = r600_bc_add_alu(ctx->bc, &alu);
2575		if (r)
2576			return r;
2577	}
2578	return 0;
2579}
2580
2581static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2582{
2583	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2584	struct r600_bc_alu alu;
2585	int r;
2586
2587	memset(&alu, 0, sizeof(struct r600_bc_alu));
2588	alu.inst = opcode;
2589	alu.predicate = 1;
2590
2591	alu.dst.sel = ctx->temp_reg;
2592	alu.dst.write = 1;
2593	alu.dst.chan = 0;
2594
2595	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2596	if (r)
2597		return r;
2598	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2599	alu.src[1].sel = V_SQ_ALU_SRC_0;
2600	alu.src[1].chan = 0;
2601
2602	alu.last = 1;
2603
2604	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2605	if (r)
2606		return r;
2607	return 0;
2608}
2609
2610static int pops(struct r600_shader_ctx *ctx, int pops)
2611{
2612	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2613	ctx->bc->cf_last->pop_count = pops;
2614	return 0;
2615}
2616
2617static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2618{
2619	switch(reason) {
2620	case FC_PUSH_VPM:
2621		ctx->bc->callstack[ctx->bc->call_sp].current--;
2622		break;
2623	case FC_PUSH_WQM:
2624	case FC_LOOP:
2625		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2626		break;
2627	case FC_REP:
2628		/* TOODO : for 16 vp asic should -= 2; */
2629		ctx->bc->callstack[ctx->bc->call_sp].current --;
2630		break;
2631	}
2632}
2633
2634static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2635{
2636	if (check_max_only) {
2637		int diff;
2638		switch (reason) {
2639		case FC_PUSH_VPM:
2640			diff = 1;
2641			break;
2642		case FC_PUSH_WQM:
2643			diff = 4;
2644			break;
2645		default:
2646			assert(0);
2647			diff = 0;
2648		}
2649		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2650		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2651			ctx->bc->callstack[ctx->bc->call_sp].max =
2652				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2653		}
2654		return;
2655	}
2656	switch (reason) {
2657	case FC_PUSH_VPM:
2658		ctx->bc->callstack[ctx->bc->call_sp].current++;
2659		break;
2660	case FC_PUSH_WQM:
2661	case FC_LOOP:
2662		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2663		break;
2664	case FC_REP:
2665		ctx->bc->callstack[ctx->bc->call_sp].current++;
2666		break;
2667	}
2668
2669	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2670	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2671		ctx->bc->callstack[ctx->bc->call_sp].max =
2672			ctx->bc->callstack[ctx->bc->call_sp].current;
2673	}
2674}
2675
2676static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2677{
2678	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2679
2680	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2681						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2682	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2683	sp->num_mid++;
2684}
2685
2686static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2687{
2688	ctx->bc->fc_sp++;
2689	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2690	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2691}
2692
2693static void fc_poplevel(struct r600_shader_ctx *ctx)
2694{
2695	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2696	if (sp->mid) {
2697		free(sp->mid);
2698		sp->mid = NULL;
2699	}
2700	sp->num_mid = 0;
2701	sp->start = NULL;
2702	sp->type = 0;
2703	ctx->bc->fc_sp--;
2704}
2705
2706#if 0
2707static int emit_return(struct r600_shader_ctx *ctx)
2708{
2709	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2710	return 0;
2711}
2712
2713static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2714{
2715
2716	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2717	ctx->bc->cf_last->pop_count = pops;
2718	/* TODO work out offset */
2719	return 0;
2720}
2721
2722static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2723{
2724	return 0;
2725}
2726
2727static void emit_testflag(struct r600_shader_ctx *ctx)
2728{
2729
2730}
2731
2732static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2733{
2734	emit_testflag(ctx);
2735	emit_jump_to_offset(ctx, 1, 4);
2736	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2737	pops(ctx, ifidx + 1);
2738	emit_return(ctx);
2739}
2740
2741static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2742{
2743	emit_testflag(ctx);
2744
2745	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2746	ctx->bc->cf_last->pop_count = 1;
2747
2748	fc_set_mid(ctx, fc_sp);
2749
2750	pops(ctx, 1);
2751}
2752#endif
2753
2754static int tgsi_if(struct r600_shader_ctx *ctx)
2755{
2756	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2757
2758	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2759
2760	fc_pushlevel(ctx, FC_IF);
2761
2762	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2763	return 0;
2764}
2765
2766static int tgsi_else(struct r600_shader_ctx *ctx)
2767{
2768	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2769	ctx->bc->cf_last->pop_count = 1;
2770
2771	fc_set_mid(ctx, ctx->bc->fc_sp);
2772	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2773	return 0;
2774}
2775
2776static int tgsi_endif(struct r600_shader_ctx *ctx)
2777{
2778	pops(ctx, 1);
2779	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2780		R600_ERR("if/endif unbalanced in shader\n");
2781		return -1;
2782	}
2783
2784	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2785		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2786		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2787	} else {
2788		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2789	}
2790	fc_poplevel(ctx);
2791
2792	callstack_decrease_current(ctx, FC_PUSH_VPM);
2793	return 0;
2794}
2795
2796static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2797{
2798	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2799
2800	fc_pushlevel(ctx, FC_LOOP);
2801
2802	/* check stack depth */
2803	callstack_check_depth(ctx, FC_LOOP, 0);
2804	return 0;
2805}
2806
2807static int tgsi_endloop(struct r600_shader_ctx *ctx)
2808{
2809	int i;
2810
2811	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2812
2813	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2814		R600_ERR("loop/endloop in shader code are not paired.\n");
2815		return -EINVAL;
2816	}
2817
2818	/* fixup loop pointers - from r600isa
2819	   LOOP END points to CF after LOOP START,
2820	   LOOP START point to CF after LOOP END
2821	   BRK/CONT point to LOOP END CF
2822	*/
2823	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2824
2825	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2826
2827	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2828		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2829	}
2830	/* TODO add LOOPRET support */
2831	fc_poplevel(ctx);
2832	callstack_decrease_current(ctx, FC_LOOP);
2833	return 0;
2834}
2835
2836static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2837{
2838	unsigned int fscp;
2839
2840	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2841	{
2842		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2843			break;
2844	}
2845
2846	if (fscp == 0) {
2847		R600_ERR("Break not inside loop/endloop pair\n");
2848		return -EINVAL;
2849	}
2850
2851	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2852	ctx->bc->cf_last->pop_count = 1;
2853
2854	fc_set_mid(ctx, fscp);
2855
2856	pops(ctx, 1);
2857	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2858	return 0;
2859}
2860
2861static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2862	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2863	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2864	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2865
2866	/* FIXME:
2867	 * For state trackers other than OpenGL, we'll want to use
2868	 * _RECIP_IEEE instead.
2869	 */
2870	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2871
2872	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2873	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2874	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2875	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2876	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2877	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2878	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2879	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2880	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2881	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2882	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2883	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2884	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2885	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2886	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2887	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	/* gap */
2889	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	/* gap */
2892	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2895	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2897	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2899	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2900	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2901	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2902	/* gap */
2903	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2905	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2907	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2908	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2909	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2910	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2911	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2917	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2919	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2920	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2921	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2922	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2924	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2926	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2937	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2938	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2939	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2940	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2943	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2944	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2945	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2946	/* gap */
2947	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2950	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2951	/* gap */
2952	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2957	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2960	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2961	/* gap */
2962	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2963	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2965	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2966	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2967	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2968	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2971	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2973	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2974	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2975	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2976	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977	/* gap */
2978	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2979	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2981	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983	/* gap */
2984	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2985	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2986	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2987	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2988	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2989	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2993	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2994	/* gap */
2995	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2997	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3002	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3011	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3015	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3016	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3018	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3022	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023};
3024
3025static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3026	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3028	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3029	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3030	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3031	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3032	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3034	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3035	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3036	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3037	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3038	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3039	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3040	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3041	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3042	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3043	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3044	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3045	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	/* gap */
3047	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049	/* gap */
3050	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3053	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3055	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3057	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3058	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3059	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3060	/* gap */
3061	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3063	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3065	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3066	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3067	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3068	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3069	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3075	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3077	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3078	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3079	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3080	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3082	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3084	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3095	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3096	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3097	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3098	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3101	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3102	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3103	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3104	/* gap */
3105	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3108	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3109	/* gap */
3110	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3118	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3119	/* gap */
3120	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3129	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3132	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3134	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135	/* gap */
3136	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3140	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141	/* gap */
3142	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3144	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3151	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3152	/* gap */
3153	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3176	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3178	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181};
3182