r600_shader.c revision 833f3a488a7ba0fa59e25f1e518f6b4616270143
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	for (i = 0; i < 10; i++) {
48		spi_vs_out_id[i] = 0;
49	}
50	for (i = 0; i < 32; i++) {
51		tmp = i << ((i & 3) * 8);
52		spi_vs_out_id[i / 4] |= tmp;
53	}
54	for (i = 0; i < 10; i++) {
55		r600_pipe_state_add_reg(rstate,
56					R_028614_SPI_VS_OUT_ID_0 + i * 4,
57					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58	}
59
60	r600_pipe_state_add_reg(rstate,
61			R_0286C4_SPI_VS_OUT_CONFIG,
62			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63			0xFFFFFFFF, NULL);
64	r600_pipe_state_add_reg(rstate,
65			R_028868_SQ_PGM_RESOURCES_VS,
66			S_028868_NUM_GPRS(rshader->bc.ngpr) |
67			S_028868_STACK_SIZE(rshader->bc.nstack),
68			0xFFFFFFFF, NULL);
69	r600_pipe_state_add_reg(rstate,
70			R_0288A4_SQ_PGM_RESOURCES_FS,
71			0x00000000, 0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_0288DC_SQ_PGM_CF_OFFSET_FS,
77			0x00000000, 0xFFFFFFFF, NULL);
78	r600_pipe_state_add_reg(rstate,
79			R_028858_SQ_PGM_START_VS,
80			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81	r600_pipe_state_add_reg(rstate,
82			R_028894_SQ_PGM_START_FS,
83			r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
84
85	r600_pipe_state_add_reg(rstate,
86				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87				0xFFFFFFFF, NULL);
88
89}
90
91int r600_find_vs_semantic_index(struct r600_shader *vs,
92				struct r600_shader *ps, int id)
93{
94	struct r600_shader_io *input = &ps->input[id];
95
96	for (int i = 0; i < vs->noutput; i++) {
97		if (input->name == vs->output[i].name &&
98			input->sid == vs->output[i].sid) {
99			return i - 1;
100		}
101	}
102	return 0;
103}
104
105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106{
107	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108	struct r600_pipe_state *rstate = &shader->rstate;
109	struct r600_shader *rshader = &shader->shader;
110	unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
111	int pos_index = -1, face_index = -1;
112
113	/* clear previous register */
114	rstate->nregs = 0;
115
116	for (i = 0; i < rshader->ninput; i++) {
117		tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118		if (rshader->input[i].centroid)
119			tmp |= S_028644_SEL_CENTROID(1);
120		if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
121			tmp |= S_028644_SEL_LINEAR(1);
122
123		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
124			pos_index = i;
125		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
126		    rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
127		    rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
128			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
129		}
130		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
131			face_index = i;
132		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
133			rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
134			tmp |= S_028644_PT_SPRITE_TEX(1);
135		}
136		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
137	}
138	for (i = 0; i < rshader->noutput; i++) {
139		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140			r600_pipe_state_add_reg(rstate,
141						R_02880C_DB_SHADER_CONTROL,
142						S_02880C_Z_EXPORT_ENABLE(1),
143						S_02880C_Z_EXPORT_ENABLE(1), NULL);
144		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
145			r600_pipe_state_add_reg(rstate,
146						R_02880C_DB_SHADER_CONTROL,
147						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
148						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
149	}
150
151	exports_ps = 0;
152	num_cout = 0;
153	for (i = 0; i < rshader->noutput; i++) {
154		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
155			exports_ps |= 1;
156		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
157			num_cout++;
158		}
159	}
160	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
161	if (!exports_ps) {
162		/* always at least export 1 component per pixel */
163		exports_ps = 2;
164	}
165
166	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
167				S_0286CC_PERSP_GRADIENT_ENA(1);
168	spi_input_z = 0;
169	if (pos_index != -1) {
170		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
171					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
172					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
173					S_0286CC_BARYC_SAMPLE_CNTL(1));
174		spi_input_z |= 1;
175	}
176
177	spi_ps_in_control_1 = 0;
178	if (face_index != -1) {
179		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
180			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
181	}
182
183	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
184	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
185	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
186	r600_pipe_state_add_reg(rstate,
187				R_028840_SQ_PGM_START_PS,
188				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
189	r600_pipe_state_add_reg(rstate,
190				R_028850_SQ_PGM_RESOURCES_PS,
191				S_028868_NUM_GPRS(rshader->bc.ngpr) |
192				S_028868_STACK_SIZE(rshader->bc.nstack),
193				0xFFFFFFFF, NULL);
194	r600_pipe_state_add_reg(rstate,
195				R_028854_SQ_PGM_EXPORTS_PS,
196				exports_ps, 0xFFFFFFFF, NULL);
197	r600_pipe_state_add_reg(rstate,
198				R_0288CC_SQ_PGM_CF_OFFSET_PS,
199				0x00000000, 0xFFFFFFFF, NULL);
200
201	if (rshader->uses_kill) {
202		/* only set some bits here, the other bits are set in the dsa state */
203		r600_pipe_state_add_reg(rstate,
204					R_02880C_DB_SHADER_CONTROL,
205					S_02880C_KILL_ENABLE(1),
206					S_02880C_KILL_ENABLE(1), NULL);
207	}
208	r600_pipe_state_add_reg(rstate,
209				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
210				0xFFFFFFFF, NULL);
211}
212
213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
214{
215	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
216	struct r600_shader *rshader = &shader->shader;
217	void *ptr;
218
219	/* copy new shader */
220	if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
221		shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
222		if (shader->bo_fetch == NULL) {
223			return -ENOMEM;
224		}
225		ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
226		memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
227		r600_bo_unmap(rctx->radeon, shader->bo_fetch);
228	}
229	if (shader->bo == NULL) {
230		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
231		if (shader->bo == NULL) {
232			return -ENOMEM;
233		}
234		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
235		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
236		r600_bo_unmap(rctx->radeon, shader->bo);
237	}
238	/* build state */
239	rshader->flat_shade = rctx->flatshade;
240	switch (rshader->processor_type) {
241	case TGSI_PROCESSOR_VERTEX:
242		if (rshader->family >= CHIP_CEDAR) {
243			evergreen_pipe_shader_vs(ctx, shader);
244		} else {
245			r600_pipe_shader_vs(ctx, shader);
246		}
247		break;
248	case TGSI_PROCESSOR_FRAGMENT:
249		if (rshader->family >= CHIP_CEDAR) {
250			evergreen_pipe_shader_ps(ctx, shader);
251		} else {
252			r600_pipe_shader_ps(ctx, shader);
253		}
254		break;
255	default:
256		return -EINVAL;
257	}
258	r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
259	return 0;
260}
261
262static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
263{
264	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
265	struct r600_shader *shader = &rshader->shader;
266	const struct util_format_description *desc;
267	enum pipe_format resource_format[160];
268	unsigned i, nresources = 0;
269	struct r600_bc *bc = &shader->bc_fetch;
270	struct r600_bc_cf *cf;
271	struct r600_bc_vtx *vtx;
272
273	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
274		return 0;
275	/* doing a full memcmp fell over the refcount */
276	if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
277	    (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
278                     rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
279		return 0;
280	}
281	rshader->vertex_elements = *rctx->vertex_elements;
282	for (i = 0; i < rctx->vertex_elements->count; i++) {
283		resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
284	}
285	r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
286	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
287		switch (cf->inst) {
288		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
289		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
290			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
291				desc = util_format_description(resource_format[vtx->buffer_id]);
292				if (desc == NULL) {
293					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
294					return -EINVAL;
295				}
296				vtx->dst_sel_x = desc->swizzle[0];
297				vtx->dst_sel_y = desc->swizzle[1];
298				vtx->dst_sel_z = desc->swizzle[2];
299				vtx->dst_sel_w = desc->swizzle[3];
300			}
301			break;
302		default:
303			break;
304		}
305	}
306	return r600_bc_build(&shader->bc_fetch);
307}
308
309int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
310{
311	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
312	int r;
313
314	if (shader == NULL)
315		return -EINVAL;
316	/* there should be enough input */
317	if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
318		R600_ERR("%d resources provided, expecting %d\n",
319			rctx->vertex_elements->count, shader->shader.bc.nresource);
320		return -EINVAL;
321	}
322	r = r600_shader_update(ctx, shader);
323	if (r)
324		return r;
325	return r600_pipe_shader(ctx, shader);
326}
327
328int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
329int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
330{
331	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
332	int r;
333
334//fprintf(stderr, "--------------------------------------------------------------\n");
335//tgsi_dump(tokens, 0);
336	shader->shader.family = r600_get_family(rctx->radeon);
337	r = r600_shader_from_tgsi(tokens, &shader->shader);
338	if (r) {
339		R600_ERR("translation from TGSI failed !\n");
340		return r;
341	}
342	r = r600_bc_build(&shader->shader.bc);
343	if (r) {
344		R600_ERR("building bytecode failed !\n");
345		return r;
346	}
347	if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
348		r = r600_bc_build(&shader->shader.bc_fetch);
349		if (r) {
350			R600_ERR("building bytecode failed !\n");
351			return r;
352		}
353	}
354//r600_bc_dump(&shader->shader.bc);
355//fprintf(stderr, "______________________________________________________________\n");
356	return 0;
357}
358
359void
360r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
361{
362	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
363
364	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
365
366	r600_bc_clear(&shader->shader.bc);
367
368	/* FIXME: is there more stuff to free? */
369}
370
371/*
372 * tgsi -> r600 shader
373 */
374struct r600_shader_tgsi_instruction;
375
376struct r600_shader_ctx {
377	struct tgsi_shader_info			info;
378	struct tgsi_parse_context		parse;
379	const struct tgsi_token			*tokens;
380	unsigned				type;
381	unsigned				file_offset[TGSI_FILE_COUNT];
382	unsigned				temp_reg;
383	struct r600_shader_tgsi_instruction	*inst_info;
384	struct r600_bc				*bc;
385	struct r600_bc				*bc_fetch;
386	struct r600_shader			*shader;
387	u32					value[4];
388	u32					*literals;
389	u32					nliterals;
390	u32					max_driver_temp_used;
391	/* needed for evergreen interpolation */
392	boolean                                 input_centroid;
393	boolean                                 input_linear;
394	boolean                                 input_perspective;
395	int					num_interp_gpr;
396};
397
398struct r600_shader_tgsi_instruction {
399	unsigned	tgsi_opcode;
400	unsigned	is_op3;
401	unsigned	r600_opcode;
402	int (*process)(struct r600_shader_ctx *ctx);
403};
404
405static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
406static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
407
408static int tgsi_is_supported(struct r600_shader_ctx *ctx)
409{
410	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
411	int j;
412
413	if (i->Instruction.NumDstRegs > 1) {
414		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
415		return -EINVAL;
416	}
417	if (i->Instruction.Predicate) {
418		R600_ERR("predicate unsupported\n");
419		return -EINVAL;
420	}
421#if 0
422	if (i->Instruction.Label) {
423		R600_ERR("label unsupported\n");
424		return -EINVAL;
425	}
426#endif
427	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
428		if (i->Src[j].Register.Dimension) {
429			R600_ERR("unsupported src %d (dimension %d)\n", j,
430				 i->Src[j].Register.Dimension);
431			return -EINVAL;
432		}
433	}
434	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
435		if (i->Dst[j].Register.Dimension) {
436			R600_ERR("unsupported dst (dimension)\n");
437			return -EINVAL;
438		}
439	}
440	return 0;
441}
442
443static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
444{
445	int i, r;
446	struct r600_bc_alu alu;
447	int gpr = 0, base_chan = 0;
448	int ij_index = 0;
449
450	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
451		ij_index = 0;
452		if (ctx->shader->input[input].centroid)
453			ij_index++;
454	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
455		ij_index = 0;
456		/* if we have perspective add one */
457		if (ctx->input_perspective)  {
458			ij_index++;
459			/* if we have perspective centroid */
460			if (ctx->input_centroid)
461				ij_index++;
462		}
463		if (ctx->shader->input[input].centroid)
464			ij_index++;
465	}
466
467	/* work out gpr and base_chan from index */
468	gpr = ij_index / 2;
469	base_chan = (2 * (ij_index % 2)) + 1;
470
471	for (i = 0; i < 8; i++) {
472		memset(&alu, 0, sizeof(struct r600_bc_alu));
473
474		if (i < 4)
475			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
476		else
477			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
478
479		if ((i > 1) && (i < 6)) {
480			alu.dst.sel = ctx->shader->input[input].gpr;
481			alu.dst.write = 1;
482		}
483
484		alu.dst.chan = i % 4;
485
486		alu.src[0].sel = gpr;
487		alu.src[0].chan = (base_chan - (i % 2));
488
489		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
490
491		alu.bank_swizzle_force = SQ_ALU_VEC_210;
492		if ((i % 4) == 3)
493			alu.last = 1;
494		r = r600_bc_add_alu(ctx->bc, &alu);
495		if (r)
496			return r;
497	}
498	return 0;
499}
500
501
502static int tgsi_declaration(struct r600_shader_ctx *ctx)
503{
504	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
505	struct r600_bc_vtx vtx;
506	unsigned i;
507	int r;
508
509	switch (d->Declaration.File) {
510	case TGSI_FILE_INPUT:
511		i = ctx->shader->ninput++;
512		ctx->shader->input[i].name = d->Semantic.Name;
513		ctx->shader->input[i].sid = d->Semantic.Index;
514		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
515		ctx->shader->input[i].centroid = d->Declaration.Centroid;
516		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
517		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
518			/* turn input into fetch */
519			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
520			vtx.inst = 0;
521			vtx.fetch_type = 0;
522			vtx.buffer_id = i;
523			/* register containing the index into the buffer */
524			vtx.src_gpr = 0;
525			vtx.src_sel_x = 0;
526			vtx.mega_fetch_count = 0x1F;
527			vtx.dst_gpr = ctx->shader->input[i].gpr;
528			vtx.dst_sel_x = 0;
529			vtx.dst_sel_y = 1;
530			vtx.dst_sel_z = 2;
531			vtx.dst_sel_w = 3;
532			vtx.use_const_fields = 1;
533			r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
534			if (r)
535				return r;
536		}
537		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
538			/* turn input into interpolate on EG */
539			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
540				if (ctx->shader->input[i].interpolate > 0) {
541					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
542					evergreen_interp_alu(ctx, i);
543				}
544			}
545		}
546		break;
547	case TGSI_FILE_OUTPUT:
548		i = ctx->shader->noutput++;
549		ctx->shader->output[i].name = d->Semantic.Name;
550		ctx->shader->output[i].sid = d->Semantic.Index;
551		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
552		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
553		break;
554	case TGSI_FILE_CONSTANT:
555	case TGSI_FILE_TEMPORARY:
556	case TGSI_FILE_SAMPLER:
557	case TGSI_FILE_ADDRESS:
558		break;
559	default:
560		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
561		return -EINVAL;
562	}
563	return 0;
564}
565
566static int r600_get_temp(struct r600_shader_ctx *ctx)
567{
568	return ctx->temp_reg + ctx->max_driver_temp_used++;
569}
570
571/*
572 * for evergreen we need to scan the shader to find the number of GPRs we need to
573 * reserve for interpolation.
574 *
575 * we need to know if we are going to emit
576 * any centroid inputs
577 * if perspective and linear are required
578*/
579static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
580{
581	int i;
582	int num_baryc;
583
584	ctx->input_linear = FALSE;
585	ctx->input_perspective = FALSE;
586	ctx->input_centroid = FALSE;
587	ctx->num_interp_gpr = 1;
588
589	/* any centroid inputs */
590	for (i = 0; i < ctx->info.num_inputs; i++) {
591		/* skip position/face */
592		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
593		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
594			continue;
595		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
596			ctx->input_linear = TRUE;
597		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
598			ctx->input_perspective = TRUE;
599		if (ctx->info.input_centroid[i])
600			ctx->input_centroid = TRUE;
601	}
602
603	num_baryc = 0;
604	/* ignoring sample for now */
605	if (ctx->input_perspective)
606		num_baryc++;
607	if (ctx->input_linear)
608		num_baryc++;
609	if (ctx->input_centroid)
610		num_baryc *= 2;
611
612	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
613
614	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
615	return ctx->num_interp_gpr;
616}
617
618int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
619{
620	struct tgsi_full_immediate *immediate;
621	struct r600_shader_ctx ctx;
622	struct r600_bc_output output[32];
623	unsigned output_done, noutput;
624	unsigned opcode;
625	int i, r = 0, pos0;
626
627	ctx.bc = &shader->bc;
628	ctx.bc_fetch = &shader->bc_fetch;
629	ctx.shader = shader;
630	r = r600_bc_init(ctx.bc, shader->family);
631	if (r)
632		return r;
633	ctx.tokens = tokens;
634	tgsi_scan_shader(tokens, &ctx.info);
635	tgsi_parse_init(&ctx.parse, tokens);
636	ctx.type = ctx.parse.FullHeader.Processor.Processor;
637	shader->processor_type = ctx.type;
638	if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
639		r = r600_bc_init(ctx.bc_fetch, shader->family);
640		if (r)
641			return r;
642		ctx.bc_fetch->type = -1;
643	}
644	ctx.bc->type = shader->processor_type;
645
646	/* register allocations */
647	/* Values [0,127] correspond to GPR[0..127].
648	 * Values [128,159] correspond to constant buffer bank 0
649	 * Values [160,191] correspond to constant buffer bank 1
650	 * Values [256,511] correspond to cfile constants c[0..255].
651	 * Other special values are shown in the list below.
652	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
653	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
654	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
655	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
656	 * 248	SQ_ALU_SRC_0: special constant 0.0.
657	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
658	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
659	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
660	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
661	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
662	 * 254	SQ_ALU_SRC_PV: previous vector result.
663	 * 255	SQ_ALU_SRC_PS: previous scalar result.
664	 */
665	for (i = 0; i < TGSI_FILE_COUNT; i++) {
666		ctx.file_offset[i] = 0;
667	}
668	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
669		ctx.file_offset[TGSI_FILE_INPUT] = 1;
670		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
671			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
672		} else {
673			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
674		}
675	}
676	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
677		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
678	}
679	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
680						ctx.info.file_count[TGSI_FILE_INPUT];
681	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
682						ctx.info.file_count[TGSI_FILE_OUTPUT];
683
684	ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
685
686	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
687	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
688			ctx.info.file_count[TGSI_FILE_TEMPORARY];
689
690	ctx.nliterals = 0;
691	ctx.literals = NULL;
692
693	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
694		tgsi_parse_token(&ctx.parse);
695		switch (ctx.parse.FullToken.Token.Type) {
696		case TGSI_TOKEN_TYPE_IMMEDIATE:
697			immediate = &ctx.parse.FullToken.FullImmediate;
698			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
699			if(ctx.literals == NULL) {
700				r = -ENOMEM;
701				goto out_err;
702			}
703			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
704			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
705			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
706			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
707			ctx.nliterals++;
708			break;
709		case TGSI_TOKEN_TYPE_DECLARATION:
710			r = tgsi_declaration(&ctx);
711			if (r)
712				goto out_err;
713			break;
714		case TGSI_TOKEN_TYPE_INSTRUCTION:
715			r = tgsi_is_supported(&ctx);
716			if (r)
717				goto out_err;
718			ctx.max_driver_temp_used = 0;
719			/* reserve first tmp for everyone */
720			r600_get_temp(&ctx);
721			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
722			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
723				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
724			else
725				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
726			r = ctx.inst_info->process(&ctx);
727			if (r)
728				goto out_err;
729			r = r600_bc_add_literal(ctx.bc, ctx.value);
730			if (r)
731				goto out_err;
732			break;
733		default:
734			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
735			r = -EINVAL;
736			goto out_err;
737		}
738	}
739	/* export output */
740	noutput = shader->noutput;
741	for (i = 0, pos0 = 0; i < noutput; i++) {
742		memset(&output[i], 0, sizeof(struct r600_bc_output));
743		output[i].gpr = shader->output[i].gpr;
744		output[i].elem_size = 3;
745		output[i].swizzle_x = 0;
746		output[i].swizzle_y = 1;
747		output[i].swizzle_z = 2;
748		output[i].swizzle_w = 3;
749		output[i].barrier = 1;
750		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
751		output[i].array_base = i - pos0;
752		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
753		switch (ctx.type) {
754		case TGSI_PROCESSOR_VERTEX:
755			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
756				output[i].array_base = 60;
757				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
758				/* position doesn't count in array_base */
759				pos0++;
760			}
761			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
762				output[i].array_base = 61;
763				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
764				/* position doesn't count in array_base */
765				pos0++;
766			}
767			break;
768		case TGSI_PROCESSOR_FRAGMENT:
769			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
770				output[i].array_base = shader->output[i].sid;
771				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
772			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
773				output[i].array_base = 61;
774				output[i].swizzle_x = 2;
775				output[i].swizzle_y = 7;
776				output[i].swizzle_z = output[i].swizzle_w = 7;
777				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
778			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
779				output[i].array_base = 61;
780				output[i].swizzle_x = 7;
781				output[i].swizzle_y = 1;
782				output[i].swizzle_z = output[i].swizzle_w = 7;
783				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
784			} else {
785				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
786				r = -EINVAL;
787				goto out_err;
788			}
789			break;
790		default:
791			R600_ERR("unsupported processor type %d\n", ctx.type);
792			r = -EINVAL;
793			goto out_err;
794		}
795	}
796	/* add fake param output for vertex shader if no param is exported */
797	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
798		for (i = 0, pos0 = 0; i < noutput; i++) {
799			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
800				pos0 = 1;
801				break;
802			}
803		}
804		if (!pos0) {
805			memset(&output[i], 0, sizeof(struct r600_bc_output));
806			output[i].gpr = 0;
807			output[i].elem_size = 3;
808			output[i].swizzle_x = 0;
809			output[i].swizzle_y = 1;
810			output[i].swizzle_z = 2;
811			output[i].swizzle_w = 3;
812			output[i].barrier = 1;
813			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
814			output[i].array_base = 0;
815			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
816			noutput++;
817		}
818	}
819	/* add fake pixel export */
820	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
821		memset(&output[0], 0, sizeof(struct r600_bc_output));
822		output[0].gpr = 0;
823		output[0].elem_size = 3;
824		output[0].swizzle_x = 7;
825		output[0].swizzle_y = 7;
826		output[0].swizzle_z = 7;
827		output[0].swizzle_w = 7;
828		output[0].barrier = 1;
829		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
830		output[0].array_base = 0;
831		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
832		noutput++;
833	}
834	/* set export done on last export of each type */
835	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
836		if (i == (noutput - 1)) {
837			output[i].end_of_program = 1;
838		}
839		if (!(output_done & (1 << output[i].type))) {
840			output_done |= (1 << output[i].type);
841			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
842		}
843	}
844	/* add return to fetch shader */
845	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
846		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
847			r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
848		} else {
849			r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
850		}
851	}
852	/* add output to bytecode */
853	for (i = 0; i < noutput; i++) {
854		r = r600_bc_add_output(ctx.bc, &output[i]);
855		if (r)
856			goto out_err;
857	}
858	free(ctx.literals);
859	tgsi_parse_free(&ctx.parse);
860	return 0;
861out_err:
862	free(ctx.literals);
863	tgsi_parse_free(&ctx.parse);
864	return r;
865}
866
867static int tgsi_unsupported(struct r600_shader_ctx *ctx)
868{
869	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
870	return -EINVAL;
871}
872
873static int tgsi_end(struct r600_shader_ctx *ctx)
874{
875	return 0;
876}
877
878static int tgsi_src(struct r600_shader_ctx *ctx,
879			const struct tgsi_full_src_register *tgsi_src,
880			struct r600_bc_alu_src *r600_src)
881{
882	int index;
883	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
884	r600_src->sel = tgsi_src->Register.Index;
885	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
886		r600_src->sel = 0;
887		index = tgsi_src->Register.Index;
888		ctx->value[0] = ctx->literals[index * 4 + 0];
889		ctx->value[1] = ctx->literals[index * 4 + 1];
890		ctx->value[2] = ctx->literals[index * 4 + 2];
891		ctx->value[3] = ctx->literals[index * 4 + 3];
892	}
893	if (tgsi_src->Register.Indirect)
894		r600_src->rel = V_SQ_REL_RELATIVE;
895	r600_src->neg = tgsi_src->Register.Negate;
896	r600_src->abs = tgsi_src->Register.Absolute;
897	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
898	return 0;
899}
900
901static int tgsi_dst(struct r600_shader_ctx *ctx,
902			const struct tgsi_full_dst_register *tgsi_dst,
903			unsigned swizzle,
904			struct r600_bc_alu_dst *r600_dst)
905{
906	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
907
908	r600_dst->sel = tgsi_dst->Register.Index;
909	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
910	r600_dst->chan = swizzle;
911	r600_dst->write = 1;
912	if (tgsi_dst->Register.Indirect)
913		r600_dst->rel = V_SQ_REL_RELATIVE;
914	if (inst->Instruction.Saturate) {
915		r600_dst->clamp = 1;
916	}
917	return 0;
918}
919
920static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
921{
922	switch (swizzle) {
923	case 0:
924		return tgsi_src->Register.SwizzleX;
925	case 1:
926		return tgsi_src->Register.SwizzleY;
927	case 2:
928		return tgsi_src->Register.SwizzleZ;
929	case 3:
930		return tgsi_src->Register.SwizzleW;
931	default:
932		return 0;
933	}
934}
935
936static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
937{
938	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
939	struct r600_bc_alu alu;
940	int i, j, k, nconst, r;
941
942	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
943		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
944			nconst++;
945		}
946		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
947		if (r) {
948			return r;
949		}
950	}
951	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
952		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
953			int treg = r600_get_temp(ctx);
954			for (k = 0; k < 4; k++) {
955				memset(&alu, 0, sizeof(struct r600_bc_alu));
956				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
957				alu.src[0].sel = r600_src[i].sel;
958				alu.src[0].chan = k;
959				alu.src[0].rel = r600_src[i].rel;
960				alu.dst.sel = treg;
961				alu.dst.chan = k;
962				alu.dst.write = 1;
963				if (k == 3)
964					alu.last = 1;
965				r = r600_bc_add_alu(ctx->bc, &alu);
966				if (r)
967					return r;
968			}
969			r600_src[i].sel = treg;
970			r600_src[i].rel =0;
971			j--;
972		}
973	}
974	return 0;
975}
976
977/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
978static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
979{
980	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
981	struct r600_bc_alu alu;
982	int i, j, k, nliteral, r;
983
984	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
985		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
986			nliteral++;
987		}
988	}
989	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
990		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
991			int treg = r600_get_temp(ctx);
992			for (k = 0; k < 4; k++) {
993				memset(&alu, 0, sizeof(struct r600_bc_alu));
994				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
995				alu.src[0].sel = r600_src[i].sel;
996				alu.src[0].chan = k;
997				alu.dst.sel = treg;
998				alu.dst.chan = k;
999				alu.dst.write = 1;
1000				if (k == 3)
1001					alu.last = 1;
1002				r = r600_bc_add_alu(ctx->bc, &alu);
1003				if (r)
1004					return r;
1005			}
1006			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
1007			if (r)
1008				return r;
1009			r600_src[i].sel = treg;
1010			j--;
1011		}
1012	}
1013	return 0;
1014}
1015
1016static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1017{
1018	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1019	struct r600_bc_alu_src r600_src[3];
1020	struct r600_bc_alu alu;
1021	int i, j, r;
1022	int lasti = 0;
1023
1024	for (i = 0; i < 4; i++) {
1025		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1026			lasti = i;
1027		}
1028	}
1029
1030	r = tgsi_split_constant(ctx, r600_src);
1031	if (r)
1032		return r;
1033	r = tgsi_split_literal_constant(ctx, r600_src);
1034	if (r)
1035		return r;
1036	for (i = 0; i < lasti + 1; i++) {
1037		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1038			continue;
1039
1040		memset(&alu, 0, sizeof(struct r600_bc_alu));
1041		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1042		if (r)
1043			return r;
1044
1045		alu.inst = ctx->inst_info->r600_opcode;
1046		if (!swap) {
1047			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1048				alu.src[j] = r600_src[j];
1049				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1050			}
1051		} else {
1052			alu.src[0] = r600_src[1];
1053			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1054
1055			alu.src[1] = r600_src[0];
1056			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1057		}
1058		/* handle some special cases */
1059		switch (ctx->inst_info->tgsi_opcode) {
1060		case TGSI_OPCODE_SUB:
1061			alu.src[1].neg = 1;
1062			break;
1063		case TGSI_OPCODE_ABS:
1064			alu.src[0].abs = 1;
1065			break;
1066		default:
1067			break;
1068		}
1069		if (i == lasti) {
1070			alu.last = 1;
1071		}
1072		r = r600_bc_add_alu(ctx->bc, &alu);
1073		if (r)
1074			return r;
1075	}
1076	return 0;
1077}
1078
1079static int tgsi_op2(struct r600_shader_ctx *ctx)
1080{
1081	return tgsi_op2_s(ctx, 0);
1082}
1083
1084static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1085{
1086	return tgsi_op2_s(ctx, 1);
1087}
1088
1089/*
1090 * r600 - trunc to -PI..PI range
1091 * r700 - normalize by dividing by 2PI
1092 * see fdo bug 27901
1093 */
1094static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1095			   struct r600_bc_alu_src r600_src[3])
1096{
1097	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1098	int r;
1099	uint32_t lit_vals[4];
1100	struct r600_bc_alu alu;
1101
1102	memset(lit_vals, 0, 4*4);
1103	r = tgsi_split_constant(ctx, r600_src);
1104	if (r)
1105		return r;
1106	r = tgsi_split_literal_constant(ctx, r600_src);
1107	if (r)
1108		return r;
1109
1110	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1111	lit_vals[1] = fui(0.5f);
1112
1113	memset(&alu, 0, sizeof(struct r600_bc_alu));
1114	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1115	alu.is_op3 = 1;
1116
1117	alu.dst.chan = 0;
1118	alu.dst.sel = ctx->temp_reg;
1119	alu.dst.write = 1;
1120
1121	alu.src[0] = r600_src[0];
1122	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1123
1124	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1125	alu.src[1].chan = 0;
1126	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1127	alu.src[2].chan = 1;
1128	alu.last = 1;
1129	r = r600_bc_add_alu(ctx->bc, &alu);
1130	if (r)
1131		return r;
1132	r = r600_bc_add_literal(ctx->bc, lit_vals);
1133	if (r)
1134		return r;
1135
1136	memset(&alu, 0, sizeof(struct r600_bc_alu));
1137	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1138
1139	alu.dst.chan = 0;
1140	alu.dst.sel = ctx->temp_reg;
1141	alu.dst.write = 1;
1142
1143	alu.src[0].sel = ctx->temp_reg;
1144	alu.src[0].chan = 0;
1145	alu.last = 1;
1146	r = r600_bc_add_alu(ctx->bc, &alu);
1147	if (r)
1148		return r;
1149
1150	if (ctx->bc->chiprev == CHIPREV_R600) {
1151		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1152		lit_vals[1] = fui(-3.1415926535897f);
1153	} else {
1154		lit_vals[0] = fui(1.0f);
1155		lit_vals[1] = fui(-0.5f);
1156	}
1157
1158	memset(&alu, 0, sizeof(struct r600_bc_alu));
1159	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1160	alu.is_op3 = 1;
1161
1162	alu.dst.chan = 0;
1163	alu.dst.sel = ctx->temp_reg;
1164	alu.dst.write = 1;
1165
1166	alu.src[0].sel = ctx->temp_reg;
1167	alu.src[0].chan = 0;
1168
1169	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1170	alu.src[1].chan = 0;
1171	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1172	alu.src[2].chan = 1;
1173	alu.last = 1;
1174	r = r600_bc_add_alu(ctx->bc, &alu);
1175	if (r)
1176		return r;
1177	r = r600_bc_add_literal(ctx->bc, lit_vals);
1178	if (r)
1179		return r;
1180	return 0;
1181}
1182
1183static int tgsi_trig(struct r600_shader_ctx *ctx)
1184{
1185	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1186	struct r600_bc_alu_src r600_src[3];
1187	struct r600_bc_alu alu;
1188	int i, r;
1189	int lasti = 0;
1190
1191	r = tgsi_setup_trig(ctx, r600_src);
1192	if (r)
1193		return r;
1194
1195	memset(&alu, 0, sizeof(struct r600_bc_alu));
1196	alu.inst = ctx->inst_info->r600_opcode;
1197	alu.dst.chan = 0;
1198	alu.dst.sel = ctx->temp_reg;
1199	alu.dst.write = 1;
1200
1201	alu.src[0].sel = ctx->temp_reg;
1202	alu.src[0].chan = 0;
1203	alu.last = 1;
1204	r = r600_bc_add_alu(ctx->bc, &alu);
1205	if (r)
1206		return r;
1207
1208	/* replicate result */
1209	for (i = 0; i < 4; i++) {
1210		if (inst->Dst[0].Register.WriteMask & (1 << i))
1211			lasti = i;
1212	}
1213	for (i = 0; i < lasti + 1; i++) {
1214		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1215			continue;
1216
1217		memset(&alu, 0, sizeof(struct r600_bc_alu));
1218		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1219
1220		alu.src[0].sel = ctx->temp_reg;
1221		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1222		if (r)
1223			return r;
1224		if (i == lasti)
1225			alu.last = 1;
1226		r = r600_bc_add_alu(ctx->bc, &alu);
1227		if (r)
1228			return r;
1229	}
1230	return 0;
1231}
1232
1233static int tgsi_scs(struct r600_shader_ctx *ctx)
1234{
1235	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1236	struct r600_bc_alu_src r600_src[3];
1237	struct r600_bc_alu alu;
1238	int r;
1239
1240	/* We'll only need the trig stuff if we are going to write to the
1241	 * X or Y components of the destination vector.
1242	 */
1243	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1244		r = tgsi_setup_trig(ctx, r600_src);
1245		if (r)
1246			return r;
1247	}
1248
1249	/* dst.x = COS */
1250	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1251		memset(&alu, 0, sizeof(struct r600_bc_alu));
1252		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1253		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1254		if (r)
1255			return r;
1256
1257		alu.src[0].sel = ctx->temp_reg;
1258		alu.src[0].chan = 0;
1259		alu.last = 1;
1260		r = r600_bc_add_alu(ctx->bc, &alu);
1261		if (r)
1262			return r;
1263	}
1264
1265	/* dst.y = SIN */
1266	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1267		memset(&alu, 0, sizeof(struct r600_bc_alu));
1268		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1269		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1270		if (r)
1271			return r;
1272
1273		alu.src[0].sel = ctx->temp_reg;
1274		alu.src[0].chan = 0;
1275		alu.last = 1;
1276		r = r600_bc_add_alu(ctx->bc, &alu);
1277		if (r)
1278			return r;
1279	}
1280
1281	/* dst.z = 0.0; */
1282	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1283		memset(&alu, 0, sizeof(struct r600_bc_alu));
1284
1285		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1286
1287		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1288		if (r)
1289			return r;
1290
1291		alu.src[0].sel = V_SQ_ALU_SRC_0;
1292		alu.src[0].chan = 0;
1293
1294		alu.last = 1;
1295
1296		r = r600_bc_add_alu(ctx->bc, &alu);
1297		if (r)
1298			return r;
1299
1300		r = r600_bc_add_literal(ctx->bc, ctx->value);
1301		if (r)
1302			return r;
1303	}
1304
1305	/* dst.w = 1.0; */
1306	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1307		memset(&alu, 0, sizeof(struct r600_bc_alu));
1308
1309		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1310
1311		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1312		if (r)
1313			return r;
1314
1315		alu.src[0].sel = V_SQ_ALU_SRC_1;
1316		alu.src[0].chan = 0;
1317
1318		alu.last = 1;
1319
1320		r = r600_bc_add_alu(ctx->bc, &alu);
1321		if (r)
1322			return r;
1323
1324		r = r600_bc_add_literal(ctx->bc, ctx->value);
1325		if (r)
1326			return r;
1327	}
1328
1329	return 0;
1330}
1331
1332static int tgsi_kill(struct r600_shader_ctx *ctx)
1333{
1334	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1335	struct r600_bc_alu alu;
1336	int i, r;
1337
1338	for (i = 0; i < 4; i++) {
1339		memset(&alu, 0, sizeof(struct r600_bc_alu));
1340		alu.inst = ctx->inst_info->r600_opcode;
1341
1342		alu.dst.chan = i;
1343
1344		alu.src[0].sel = V_SQ_ALU_SRC_0;
1345
1346		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1347			alu.src[1].sel = V_SQ_ALU_SRC_1;
1348			alu.src[1].neg = 1;
1349		} else {
1350			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1351			if (r)
1352				return r;
1353			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1354		}
1355		if (i == 3) {
1356			alu.last = 1;
1357		}
1358		r = r600_bc_add_alu(ctx->bc, &alu);
1359		if (r)
1360			return r;
1361	}
1362	r = r600_bc_add_literal(ctx->bc, ctx->value);
1363	if (r)
1364		return r;
1365
1366	/* kill must be last in ALU */
1367	ctx->bc->force_add_cf = 1;
1368	ctx->shader->uses_kill = TRUE;
1369	return 0;
1370}
1371
1372static int tgsi_lit(struct r600_shader_ctx *ctx)
1373{
1374	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1375	struct r600_bc_alu alu;
1376	struct r600_bc_alu_src r600_src[3];
1377	int r;
1378
1379	r = tgsi_split_constant(ctx, r600_src);
1380	if (r)
1381		return r;
1382	r = tgsi_split_literal_constant(ctx, r600_src);
1383	if (r)
1384		return r;
1385
1386	/* dst.x, <- 1.0  */
1387	memset(&alu, 0, sizeof(struct r600_bc_alu));
1388	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1389	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1390	alu.src[0].chan = 0;
1391	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1392	if (r)
1393		return r;
1394	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1395	r = r600_bc_add_alu(ctx->bc, &alu);
1396	if (r)
1397		return r;
1398
1399	/* dst.y = max(src.x, 0.0) */
1400	memset(&alu, 0, sizeof(struct r600_bc_alu));
1401	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1402	alu.src[0] = r600_src[0];
1403	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1404	alu.src[1].chan = 0;
1405	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1406	if (r)
1407		return r;
1408	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1409	r = r600_bc_add_alu(ctx->bc, &alu);
1410	if (r)
1411		return r;
1412
1413	/* dst.w, <- 1.0  */
1414	memset(&alu, 0, sizeof(struct r600_bc_alu));
1415	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1416	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1417	alu.src[0].chan = 0;
1418	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1419	if (r)
1420		return r;
1421	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1422	alu.last = 1;
1423	r = r600_bc_add_alu(ctx->bc, &alu);
1424	if (r)
1425		return r;
1426
1427	r = r600_bc_add_literal(ctx->bc, ctx->value);
1428	if (r)
1429		return r;
1430
1431	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1432	{
1433		int chan;
1434		int sel;
1435
1436		/* dst.z = log(src.y) */
1437		memset(&alu, 0, sizeof(struct r600_bc_alu));
1438		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1439		alu.src[0] = r600_src[0];
1440		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1441		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1442		if (r)
1443			return r;
1444		alu.last = 1;
1445		r = r600_bc_add_alu(ctx->bc, &alu);
1446		if (r)
1447			return r;
1448
1449		r = r600_bc_add_literal(ctx->bc, ctx->value);
1450		if (r)
1451			return r;
1452
1453		chan = alu.dst.chan;
1454		sel = alu.dst.sel;
1455
1456		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1457		memset(&alu, 0, sizeof(struct r600_bc_alu));
1458		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1459		alu.src[0] = r600_src[0];
1460		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1461		alu.src[1].sel  = sel;
1462		alu.src[1].chan = chan;
1463
1464		alu.src[2] = r600_src[0];
1465		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1466		alu.dst.sel = ctx->temp_reg;
1467		alu.dst.chan = 0;
1468		alu.dst.write = 1;
1469		alu.is_op3 = 1;
1470		alu.last = 1;
1471		r = r600_bc_add_alu(ctx->bc, &alu);
1472		if (r)
1473			return r;
1474
1475		r = r600_bc_add_literal(ctx->bc, ctx->value);
1476		if (r)
1477			return r;
1478		/* dst.z = exp(tmp.x) */
1479		memset(&alu, 0, sizeof(struct r600_bc_alu));
1480		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1481		alu.src[0].sel = ctx->temp_reg;
1482		alu.src[0].chan = 0;
1483		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1484		if (r)
1485			return r;
1486		alu.last = 1;
1487		r = r600_bc_add_alu(ctx->bc, &alu);
1488		if (r)
1489			return r;
1490	}
1491	return 0;
1492}
1493
1494static int tgsi_rsq(struct r600_shader_ctx *ctx)
1495{
1496	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1497	struct r600_bc_alu alu;
1498	int i, r;
1499
1500	memset(&alu, 0, sizeof(struct r600_bc_alu));
1501
1502	/* FIXME:
1503	 * For state trackers other than OpenGL, we'll want to use
1504	 * _RECIPSQRT_IEEE instead.
1505	 */
1506	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1507
1508	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1509		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1510		if (r)
1511			return r;
1512		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1513		alu.src[i].abs = 1;
1514	}
1515	alu.dst.sel = ctx->temp_reg;
1516	alu.dst.write = 1;
1517	alu.last = 1;
1518	r = r600_bc_add_alu(ctx->bc, &alu);
1519	if (r)
1520		return r;
1521	r = r600_bc_add_literal(ctx->bc, ctx->value);
1522	if (r)
1523		return r;
1524	/* replicate result */
1525	return tgsi_helper_tempx_replicate(ctx);
1526}
1527
1528static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1529{
1530	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1531	struct r600_bc_alu alu;
1532	int i, r;
1533
1534	for (i = 0; i < 4; i++) {
1535		memset(&alu, 0, sizeof(struct r600_bc_alu));
1536		alu.src[0].sel = ctx->temp_reg;
1537		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1538		alu.dst.chan = i;
1539		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1540		if (r)
1541			return r;
1542		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1543		if (i == 3)
1544			alu.last = 1;
1545		r = r600_bc_add_alu(ctx->bc, &alu);
1546		if (r)
1547			return r;
1548	}
1549	return 0;
1550}
1551
1552static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1553{
1554	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1555	struct r600_bc_alu alu;
1556	int i, r;
1557
1558	memset(&alu, 0, sizeof(struct r600_bc_alu));
1559	alu.inst = ctx->inst_info->r600_opcode;
1560	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1561		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1562		if (r)
1563			return r;
1564		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1565	}
1566	alu.dst.sel = ctx->temp_reg;
1567	alu.dst.write = 1;
1568	alu.last = 1;
1569	r = r600_bc_add_alu(ctx->bc, &alu);
1570	if (r)
1571		return r;
1572	r = r600_bc_add_literal(ctx->bc, ctx->value);
1573	if (r)
1574		return r;
1575	/* replicate result */
1576	return tgsi_helper_tempx_replicate(ctx);
1577}
1578
1579static int tgsi_pow(struct r600_shader_ctx *ctx)
1580{
1581	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1582	struct r600_bc_alu alu;
1583	int r;
1584
1585	/* LOG2(a) */
1586	memset(&alu, 0, sizeof(struct r600_bc_alu));
1587	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1588	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1589	if (r)
1590		return r;
1591	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1592	alu.dst.sel = ctx->temp_reg;
1593	alu.dst.write = 1;
1594	alu.last = 1;
1595	r = r600_bc_add_alu(ctx->bc, &alu);
1596	if (r)
1597		return r;
1598	r = r600_bc_add_literal(ctx->bc,ctx->value);
1599	if (r)
1600		return r;
1601	/* b * LOG2(a) */
1602	memset(&alu, 0, sizeof(struct r600_bc_alu));
1603	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1604	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1605	if (r)
1606		return r;
1607	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1608	alu.src[1].sel = ctx->temp_reg;
1609	alu.dst.sel = ctx->temp_reg;
1610	alu.dst.write = 1;
1611	alu.last = 1;
1612	r = r600_bc_add_alu(ctx->bc, &alu);
1613	if (r)
1614		return r;
1615	r = r600_bc_add_literal(ctx->bc,ctx->value);
1616	if (r)
1617		return r;
1618	/* POW(a,b) = EXP2(b * LOG2(a))*/
1619	memset(&alu, 0, sizeof(struct r600_bc_alu));
1620	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1621	alu.src[0].sel = ctx->temp_reg;
1622	alu.dst.sel = ctx->temp_reg;
1623	alu.dst.write = 1;
1624	alu.last = 1;
1625	r = r600_bc_add_alu(ctx->bc, &alu);
1626	if (r)
1627		return r;
1628	r = r600_bc_add_literal(ctx->bc,ctx->value);
1629	if (r)
1630		return r;
1631	return tgsi_helper_tempx_replicate(ctx);
1632}
1633
1634static int tgsi_ssg(struct r600_shader_ctx *ctx)
1635{
1636	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1637	struct r600_bc_alu alu;
1638	struct r600_bc_alu_src r600_src[3];
1639	int i, r;
1640
1641	r = tgsi_split_constant(ctx, r600_src);
1642	if (r)
1643		return r;
1644	r = tgsi_split_literal_constant(ctx, r600_src);
1645	if (r)
1646		return r;
1647
1648	/* tmp = (src > 0 ? 1 : src) */
1649	for (i = 0; i < 4; i++) {
1650		memset(&alu, 0, sizeof(struct r600_bc_alu));
1651		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1652		alu.is_op3 = 1;
1653
1654		alu.dst.sel = ctx->temp_reg;
1655		alu.dst.chan = i;
1656
1657		alu.src[0] = r600_src[0];
1658		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1659
1660		alu.src[1].sel = V_SQ_ALU_SRC_1;
1661
1662		alu.src[2] = r600_src[0];
1663		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1664		if (i == 3)
1665			alu.last = 1;
1666		r = r600_bc_add_alu(ctx->bc, &alu);
1667		if (r)
1668			return r;
1669	}
1670	r = r600_bc_add_literal(ctx->bc, ctx->value);
1671	if (r)
1672		return r;
1673
1674	/* dst = (-tmp > 0 ? -1 : tmp) */
1675	for (i = 0; i < 4; i++) {
1676		memset(&alu, 0, sizeof(struct r600_bc_alu));
1677		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1678		alu.is_op3 = 1;
1679		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1680		if (r)
1681			return r;
1682
1683		alu.src[0].sel = ctx->temp_reg;
1684		alu.src[0].chan = i;
1685		alu.src[0].neg = 1;
1686
1687		alu.src[1].sel = V_SQ_ALU_SRC_1;
1688		alu.src[1].neg = 1;
1689
1690		alu.src[2].sel = ctx->temp_reg;
1691		alu.src[2].chan = i;
1692
1693		if (i == 3)
1694			alu.last = 1;
1695		r = r600_bc_add_alu(ctx->bc, &alu);
1696		if (r)
1697			return r;
1698	}
1699	return 0;
1700}
1701
1702static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1703{
1704	struct r600_bc_alu alu;
1705	int i, r;
1706
1707	r = r600_bc_add_literal(ctx->bc, ctx->value);
1708	if (r)
1709		return r;
1710	for (i = 0; i < 4; i++) {
1711		memset(&alu, 0, sizeof(struct r600_bc_alu));
1712		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1713			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1714			alu.dst.chan = i;
1715		} else {
1716			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1717			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1718			if (r)
1719				return r;
1720			alu.src[0].sel = ctx->temp_reg;
1721			alu.src[0].chan = i;
1722		}
1723		if (i == 3) {
1724			alu.last = 1;
1725		}
1726		r = r600_bc_add_alu(ctx->bc, &alu);
1727		if (r)
1728			return r;
1729	}
1730	return 0;
1731}
1732
1733static int tgsi_op3(struct r600_shader_ctx *ctx)
1734{
1735	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1736	struct r600_bc_alu_src r600_src[3];
1737	struct r600_bc_alu alu;
1738	int i, j, r;
1739
1740	r = tgsi_split_constant(ctx, r600_src);
1741	if (r)
1742		return r;
1743	r = tgsi_split_literal_constant(ctx, r600_src);
1744	if (r)
1745		return r;
1746	/* do it in 2 step as op3 doesn't support writemask */
1747	for (i = 0; i < 4; i++) {
1748		memset(&alu, 0, sizeof(struct r600_bc_alu));
1749		alu.inst = ctx->inst_info->r600_opcode;
1750		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1751			alu.src[j] = r600_src[j];
1752			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1753		}
1754		alu.dst.sel = ctx->temp_reg;
1755		alu.dst.chan = i;
1756		alu.dst.write = 1;
1757		alu.is_op3 = 1;
1758		if (i == 3) {
1759			alu.last = 1;
1760		}
1761		r = r600_bc_add_alu(ctx->bc, &alu);
1762		if (r)
1763			return r;
1764	}
1765	return tgsi_helper_copy(ctx, inst);
1766}
1767
1768static int tgsi_dp(struct r600_shader_ctx *ctx)
1769{
1770	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1771	struct r600_bc_alu_src r600_src[3];
1772	struct r600_bc_alu alu;
1773	int i, j, r;
1774
1775	r = tgsi_split_constant(ctx, r600_src);
1776	if (r)
1777		return r;
1778	r = tgsi_split_literal_constant(ctx, r600_src);
1779	if (r)
1780		return r;
1781	for (i = 0; i < 4; i++) {
1782		memset(&alu, 0, sizeof(struct r600_bc_alu));
1783		alu.inst = ctx->inst_info->r600_opcode;
1784		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1785			alu.src[j] = r600_src[j];
1786			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1787		}
1788		alu.dst.sel = ctx->temp_reg;
1789		alu.dst.chan = i;
1790		alu.dst.write = 1;
1791		/* handle some special cases */
1792		switch (ctx->inst_info->tgsi_opcode) {
1793		case TGSI_OPCODE_DP2:
1794			if (i > 1) {
1795				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1796				alu.src[0].chan = alu.src[1].chan = 0;
1797			}
1798			break;
1799		case TGSI_OPCODE_DP3:
1800			if (i > 2) {
1801				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1802				alu.src[0].chan = alu.src[1].chan = 0;
1803			}
1804			break;
1805		case TGSI_OPCODE_DPH:
1806			if (i == 3) {
1807				alu.src[0].sel = V_SQ_ALU_SRC_1;
1808				alu.src[0].chan = 0;
1809				alu.src[0].neg = 0;
1810			}
1811			break;
1812		default:
1813			break;
1814		}
1815		if (i == 3) {
1816			alu.last = 1;
1817		}
1818		r = r600_bc_add_alu(ctx->bc, &alu);
1819		if (r)
1820			return r;
1821	}
1822	return tgsi_helper_copy(ctx, inst);
1823}
1824
1825static int tgsi_tex(struct r600_shader_ctx *ctx)
1826{
1827	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1828	struct r600_bc_tex tex;
1829	struct r600_bc_alu alu;
1830	unsigned src_gpr;
1831	int r, i;
1832	int opcode;
1833	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1834	uint32_t lit_vals[4];
1835
1836	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1837
1838	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1839		/* Add perspective divide */
1840		memset(&alu, 0, sizeof(struct r600_bc_alu));
1841		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1842		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1843		if (r)
1844			return r;
1845
1846		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1847		alu.dst.sel = ctx->temp_reg;
1848		alu.dst.chan = 3;
1849		alu.last = 1;
1850		alu.dst.write = 1;
1851		r = r600_bc_add_alu(ctx->bc, &alu);
1852		if (r)
1853			return r;
1854
1855		for (i = 0; i < 3; i++) {
1856			memset(&alu, 0, sizeof(struct r600_bc_alu));
1857			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1858			alu.src[0].sel = ctx->temp_reg;
1859			alu.src[0].chan = 3;
1860			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1861			if (r)
1862				return r;
1863			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1864			alu.dst.sel = ctx->temp_reg;
1865			alu.dst.chan = i;
1866			alu.dst.write = 1;
1867			r = r600_bc_add_alu(ctx->bc, &alu);
1868			if (r)
1869				return r;
1870		}
1871		memset(&alu, 0, sizeof(struct r600_bc_alu));
1872		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1873		alu.src[0].sel = V_SQ_ALU_SRC_1;
1874		alu.src[0].chan = 0;
1875		alu.dst.sel = ctx->temp_reg;
1876		alu.dst.chan = 3;
1877		alu.last = 1;
1878		alu.dst.write = 1;
1879		r = r600_bc_add_alu(ctx->bc, &alu);
1880		if (r)
1881			return r;
1882		src_not_temp = FALSE;
1883		src_gpr = ctx->temp_reg;
1884	}
1885
1886	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1887		int src_chan, src2_chan;
1888
1889		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1890		for (i = 0; i < 4; i++) {
1891			memset(&alu, 0, sizeof(struct r600_bc_alu));
1892			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1893			switch (i) {
1894			case 0:
1895				src_chan = 2;
1896				src2_chan = 1;
1897				break;
1898			case 1:
1899				src_chan = 2;
1900				src2_chan = 0;
1901				break;
1902			case 2:
1903				src_chan = 0;
1904				src2_chan = 2;
1905				break;
1906			case 3:
1907				src_chan = 1;
1908				src2_chan = 2;
1909				break;
1910			default:
1911				assert(0);
1912				src_chan = 0;
1913				src2_chan = 0;
1914				break;
1915			}
1916			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1917			if (r)
1918				return r;
1919			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1920			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1921			if (r)
1922				return r;
1923			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1924			alu.dst.sel = ctx->temp_reg;
1925			alu.dst.chan = i;
1926			if (i == 3)
1927				alu.last = 1;
1928			alu.dst.write = 1;
1929			r = r600_bc_add_alu(ctx->bc, &alu);
1930			if (r)
1931				return r;
1932		}
1933
1934		/* tmp1.z = RCP_e(|tmp1.z|) */
1935		memset(&alu, 0, sizeof(struct r600_bc_alu));
1936		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1937		alu.src[0].sel = ctx->temp_reg;
1938		alu.src[0].chan = 2;
1939		alu.src[0].abs = 1;
1940		alu.dst.sel = ctx->temp_reg;
1941		alu.dst.chan = 2;
1942		alu.dst.write = 1;
1943		alu.last = 1;
1944		r = r600_bc_add_alu(ctx->bc, &alu);
1945		if (r)
1946			return r;
1947
1948		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1949		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1950		 * muladd has no writemask, have to use another temp
1951		 */
1952		memset(&alu, 0, sizeof(struct r600_bc_alu));
1953		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1954		alu.is_op3 = 1;
1955
1956		alu.src[0].sel = ctx->temp_reg;
1957		alu.src[0].chan = 0;
1958		alu.src[1].sel = ctx->temp_reg;
1959		alu.src[1].chan = 2;
1960
1961		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1962		alu.src[2].chan = 0;
1963
1964		alu.dst.sel = ctx->temp_reg;
1965		alu.dst.chan = 0;
1966		alu.dst.write = 1;
1967
1968		r = r600_bc_add_alu(ctx->bc, &alu);
1969		if (r)
1970			return r;
1971
1972		memset(&alu, 0, sizeof(struct r600_bc_alu));
1973		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1974		alu.is_op3 = 1;
1975
1976		alu.src[0].sel = ctx->temp_reg;
1977		alu.src[0].chan = 1;
1978		alu.src[1].sel = ctx->temp_reg;
1979		alu.src[1].chan = 2;
1980
1981		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1982		alu.src[2].chan = 0;
1983
1984		alu.dst.sel = ctx->temp_reg;
1985		alu.dst.chan = 1;
1986		alu.dst.write = 1;
1987
1988		alu.last = 1;
1989		r = r600_bc_add_alu(ctx->bc, &alu);
1990		if (r)
1991			return r;
1992
1993		lit_vals[0] = fui(1.5f);
1994
1995		r = r600_bc_add_literal(ctx->bc, lit_vals);
1996		if (r)
1997			return r;
1998		src_not_temp = FALSE;
1999		src_gpr = ctx->temp_reg;
2000	}
2001
2002	if (src_not_temp) {
2003		for (i = 0; i < 4; i++) {
2004			memset(&alu, 0, sizeof(struct r600_bc_alu));
2005			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2006			alu.src[0].sel = src_gpr;
2007			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2008			alu.dst.sel = ctx->temp_reg;
2009			alu.dst.chan = i;
2010			if (i == 3)
2011				alu.last = 1;
2012			alu.dst.write = 1;
2013			r = r600_bc_add_alu(ctx->bc, &alu);
2014			if (r)
2015				return r;
2016		}
2017		src_gpr = ctx->temp_reg;
2018	}
2019
2020	opcode = ctx->inst_info->r600_opcode;
2021	if (opcode == SQ_TEX_INST_SAMPLE &&
2022	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
2023		opcode = SQ_TEX_INST_SAMPLE_C;
2024
2025	memset(&tex, 0, sizeof(struct r600_bc_tex));
2026	tex.inst = opcode;
2027	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
2028	tex.resource_id = tex.sampler_id;
2029	tex.src_gpr = src_gpr;
2030	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2031	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2032	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2033	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2034	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2035	tex.src_sel_x = 0;
2036	tex.src_sel_y = 1;
2037	tex.src_sel_z = 2;
2038	tex.src_sel_w = 3;
2039
2040	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2041		tex.src_sel_x = 1;
2042		tex.src_sel_y = 0;
2043		tex.src_sel_z = 3;
2044		tex.src_sel_w = 1;
2045	}
2046
2047	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2048		tex.coord_type_x = 1;
2049		tex.coord_type_y = 1;
2050		tex.coord_type_z = 1;
2051		tex.coord_type_w = 1;
2052	}
2053
2054	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2055		tex.src_sel_w = 2;
2056
2057	r = r600_bc_add_tex(ctx->bc, &tex);
2058	if (r)
2059		return r;
2060
2061	/* add shadow ambient support  - gallium doesn't do it yet */
2062	return 0;
2063}
2064
2065static int tgsi_lrp(struct r600_shader_ctx *ctx)
2066{
2067	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2068	struct r600_bc_alu_src r600_src[3];
2069	struct r600_bc_alu alu;
2070	unsigned i;
2071	int r;
2072
2073	r = tgsi_split_constant(ctx, r600_src);
2074	if (r)
2075		return r;
2076	r = tgsi_split_literal_constant(ctx, r600_src);
2077	if (r)
2078		return r;
2079	/* 1 - src0 */
2080	for (i = 0; i < 4; i++) {
2081		memset(&alu, 0, sizeof(struct r600_bc_alu));
2082		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2083		alu.src[0].sel = V_SQ_ALU_SRC_1;
2084		alu.src[0].chan = 0;
2085		alu.src[1] = r600_src[0];
2086		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2087		alu.src[1].neg = 1;
2088		alu.dst.sel = ctx->temp_reg;
2089		alu.dst.chan = i;
2090		if (i == 3) {
2091			alu.last = 1;
2092		}
2093		alu.dst.write = 1;
2094		r = r600_bc_add_alu(ctx->bc, &alu);
2095		if (r)
2096			return r;
2097	}
2098	r = r600_bc_add_literal(ctx->bc, ctx->value);
2099	if (r)
2100		return r;
2101
2102	/* (1 - src0) * src2 */
2103	for (i = 0; i < 4; i++) {
2104		memset(&alu, 0, sizeof(struct r600_bc_alu));
2105		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2106		alu.src[0].sel = ctx->temp_reg;
2107		alu.src[0].chan = i;
2108		alu.src[1] = r600_src[2];
2109		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2110		alu.dst.sel = ctx->temp_reg;
2111		alu.dst.chan = i;
2112		if (i == 3) {
2113			alu.last = 1;
2114		}
2115		alu.dst.write = 1;
2116		r = r600_bc_add_alu(ctx->bc, &alu);
2117		if (r)
2118			return r;
2119	}
2120	r = r600_bc_add_literal(ctx->bc, ctx->value);
2121	if (r)
2122		return r;
2123
2124	/* src0 * src1 + (1 - src0) * src2 */
2125	for (i = 0; i < 4; i++) {
2126		memset(&alu, 0, sizeof(struct r600_bc_alu));
2127		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2128		alu.is_op3 = 1;
2129		alu.src[0] = r600_src[0];
2130		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2131		alu.src[1] = r600_src[1];
2132		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2133		alu.src[2].sel = ctx->temp_reg;
2134		alu.src[2].chan = i;
2135		alu.dst.sel = ctx->temp_reg;
2136		alu.dst.chan = i;
2137		if (i == 3) {
2138			alu.last = 1;
2139		}
2140		r = r600_bc_add_alu(ctx->bc, &alu);
2141		if (r)
2142			return r;
2143	}
2144	return tgsi_helper_copy(ctx, inst);
2145}
2146
2147static int tgsi_cmp(struct r600_shader_ctx *ctx)
2148{
2149	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2150	struct r600_bc_alu_src r600_src[3];
2151	struct r600_bc_alu alu;
2152	int use_temp = 0;
2153	int i, r;
2154
2155	r = tgsi_split_constant(ctx, r600_src);
2156	if (r)
2157		return r;
2158	r = tgsi_split_literal_constant(ctx, r600_src);
2159	if (r)
2160		return r;
2161
2162	if (inst->Dst[0].Register.WriteMask != 0xf)
2163		use_temp = 1;
2164
2165	for (i = 0; i < 4; i++) {
2166		memset(&alu, 0, sizeof(struct r600_bc_alu));
2167		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2168		alu.src[0] = r600_src[0];
2169		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2170
2171		alu.src[1] = r600_src[2];
2172		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2173
2174		alu.src[2] = r600_src[1];
2175		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2176
2177		if (use_temp)
2178			alu.dst.sel = ctx->temp_reg;
2179		else {
2180			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2181			if (r)
2182				return r;
2183		}
2184		alu.dst.chan = i;
2185		alu.dst.write = 1;
2186		alu.is_op3 = 1;
2187		if (i == 3)
2188			alu.last = 1;
2189		r = r600_bc_add_alu(ctx->bc, &alu);
2190		if (r)
2191			return r;
2192	}
2193	if (use_temp)
2194		return tgsi_helper_copy(ctx, inst);
2195	return 0;
2196}
2197
2198static int tgsi_xpd(struct r600_shader_ctx *ctx)
2199{
2200	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2201	struct r600_bc_alu_src r600_src[3];
2202	struct r600_bc_alu alu;
2203	uint32_t use_temp = 0;
2204	int i, r;
2205
2206	if (inst->Dst[0].Register.WriteMask != 0xf)
2207		use_temp = 1;
2208
2209	r = tgsi_split_constant(ctx, r600_src);
2210	if (r)
2211		return r;
2212	r = tgsi_split_literal_constant(ctx, r600_src);
2213	if (r)
2214		return r;
2215
2216	for (i = 0; i < 4; i++) {
2217		memset(&alu, 0, sizeof(struct r600_bc_alu));
2218		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2219
2220		alu.src[0] = r600_src[0];
2221		switch (i) {
2222		case 0:
2223			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2224			break;
2225		case 1:
2226			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2227			break;
2228		case 2:
2229			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2230			break;
2231		case 3:
2232			alu.src[0].sel = V_SQ_ALU_SRC_0;
2233			alu.src[0].chan = i;
2234		}
2235
2236		alu.src[1] = r600_src[1];
2237		switch (i) {
2238		case 0:
2239			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2240			break;
2241		case 1:
2242			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2243			break;
2244		case 2:
2245			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2246			break;
2247		case 3:
2248			alu.src[1].sel = V_SQ_ALU_SRC_0;
2249			alu.src[1].chan = i;
2250		}
2251
2252		alu.dst.sel = ctx->temp_reg;
2253		alu.dst.chan = i;
2254		alu.dst.write = 1;
2255
2256		if (i == 3)
2257			alu.last = 1;
2258		r = r600_bc_add_alu(ctx->bc, &alu);
2259		if (r)
2260			return r;
2261
2262		r = r600_bc_add_literal(ctx->bc, ctx->value);
2263		if (r)
2264			return r;
2265	}
2266
2267	for (i = 0; i < 4; i++) {
2268		memset(&alu, 0, sizeof(struct r600_bc_alu));
2269		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2270
2271		alu.src[0] = r600_src[0];
2272		switch (i) {
2273		case 0:
2274			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2275			break;
2276		case 1:
2277			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2278			break;
2279		case 2:
2280			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2281			break;
2282		case 3:
2283			alu.src[0].sel = V_SQ_ALU_SRC_0;
2284			alu.src[0].chan = i;
2285		}
2286
2287		alu.src[1] = r600_src[1];
2288		switch (i) {
2289		case 0:
2290			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2291			break;
2292		case 1:
2293			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2294			break;
2295		case 2:
2296			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2297			break;
2298		case 3:
2299			alu.src[1].sel = V_SQ_ALU_SRC_0;
2300			alu.src[1].chan = i;
2301		}
2302
2303		alu.src[2].sel = ctx->temp_reg;
2304		alu.src[2].neg = 1;
2305		alu.src[2].chan = i;
2306
2307		if (use_temp)
2308			alu.dst.sel = ctx->temp_reg;
2309		else {
2310			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2311			if (r)
2312				return r;
2313		}
2314		alu.dst.chan = i;
2315		alu.dst.write = 1;
2316		alu.is_op3 = 1;
2317		if (i == 3)
2318			alu.last = 1;
2319		r = r600_bc_add_alu(ctx->bc, &alu);
2320		if (r)
2321			return r;
2322
2323		r = r600_bc_add_literal(ctx->bc, ctx->value);
2324		if (r)
2325			return r;
2326	}
2327	if (use_temp)
2328		return tgsi_helper_copy(ctx, inst);
2329	return 0;
2330}
2331
2332static int tgsi_exp(struct r600_shader_ctx *ctx)
2333{
2334	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2335	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2336	struct r600_bc_alu alu;
2337	int r;
2338
2339	/* result.x = 2^floor(src); */
2340	if (inst->Dst[0].Register.WriteMask & 1) {
2341		memset(&alu, 0, sizeof(struct r600_bc_alu));
2342
2343		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2344		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2345		if (r)
2346			return r;
2347
2348		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2349
2350		alu.dst.sel = ctx->temp_reg;
2351		alu.dst.chan = 0;
2352		alu.dst.write = 1;
2353		alu.last = 1;
2354		r = r600_bc_add_alu(ctx->bc, &alu);
2355		if (r)
2356			return r;
2357
2358		r = r600_bc_add_literal(ctx->bc, ctx->value);
2359		if (r)
2360			return r;
2361
2362		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2363		alu.src[0].sel = ctx->temp_reg;
2364		alu.src[0].chan = 0;
2365
2366		alu.dst.sel = ctx->temp_reg;
2367		alu.dst.chan = 0;
2368		alu.dst.write = 1;
2369		alu.last = 1;
2370		r = r600_bc_add_alu(ctx->bc, &alu);
2371		if (r)
2372			return r;
2373
2374		r = r600_bc_add_literal(ctx->bc, ctx->value);
2375		if (r)
2376			return r;
2377	}
2378
2379	/* result.y = tmp - floor(tmp); */
2380	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2381		memset(&alu, 0, sizeof(struct r600_bc_alu));
2382
2383		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2384		alu.src[0] = r600_src[0];
2385		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2386		if (r)
2387			return r;
2388		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2389
2390		alu.dst.sel = ctx->temp_reg;
2391//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2392//		if (r)
2393//			return r;
2394		alu.dst.write = 1;
2395		alu.dst.chan = 1;
2396
2397		alu.last = 1;
2398
2399		r = r600_bc_add_alu(ctx->bc, &alu);
2400		if (r)
2401			return r;
2402		r = r600_bc_add_literal(ctx->bc, ctx->value);
2403		if (r)
2404			return r;
2405	}
2406
2407	/* result.z = RoughApprox2ToX(tmp);*/
2408	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2409		memset(&alu, 0, sizeof(struct r600_bc_alu));
2410		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2411		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2412		if (r)
2413			return r;
2414		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2415
2416		alu.dst.sel = ctx->temp_reg;
2417		alu.dst.write = 1;
2418		alu.dst.chan = 2;
2419
2420		alu.last = 1;
2421
2422		r = r600_bc_add_alu(ctx->bc, &alu);
2423		if (r)
2424			return r;
2425		r = r600_bc_add_literal(ctx->bc, ctx->value);
2426		if (r)
2427			return r;
2428	}
2429
2430	/* result.w = 1.0;*/
2431	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2432		memset(&alu, 0, sizeof(struct r600_bc_alu));
2433
2434		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2435		alu.src[0].sel = V_SQ_ALU_SRC_1;
2436		alu.src[0].chan = 0;
2437
2438		alu.dst.sel = ctx->temp_reg;
2439		alu.dst.chan = 3;
2440		alu.dst.write = 1;
2441		alu.last = 1;
2442		r = r600_bc_add_alu(ctx->bc, &alu);
2443		if (r)
2444			return r;
2445		r = r600_bc_add_literal(ctx->bc, ctx->value);
2446		if (r)
2447			return r;
2448	}
2449	return tgsi_helper_copy(ctx, inst);
2450}
2451
2452static int tgsi_log(struct r600_shader_ctx *ctx)
2453{
2454	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2455	struct r600_bc_alu alu;
2456	int r;
2457
2458	/* result.x = floor(log2(src)); */
2459	if (inst->Dst[0].Register.WriteMask & 1) {
2460		memset(&alu, 0, sizeof(struct r600_bc_alu));
2461
2462		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2463		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2464		if (r)
2465			return r;
2466
2467		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2468
2469		alu.dst.sel = ctx->temp_reg;
2470		alu.dst.chan = 0;
2471		alu.dst.write = 1;
2472		alu.last = 1;
2473		r = r600_bc_add_alu(ctx->bc, &alu);
2474		if (r)
2475			return r;
2476
2477		r = r600_bc_add_literal(ctx->bc, ctx->value);
2478		if (r)
2479			return r;
2480
2481		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2482		alu.src[0].sel = ctx->temp_reg;
2483		alu.src[0].chan = 0;
2484
2485		alu.dst.sel = ctx->temp_reg;
2486		alu.dst.chan = 0;
2487		alu.dst.write = 1;
2488		alu.last = 1;
2489
2490		r = r600_bc_add_alu(ctx->bc, &alu);
2491		if (r)
2492			return r;
2493
2494		r = r600_bc_add_literal(ctx->bc, ctx->value);
2495		if (r)
2496			return r;
2497	}
2498
2499	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2500	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2501		memset(&alu, 0, sizeof(struct r600_bc_alu));
2502
2503		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2504		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2505		if (r)
2506			return r;
2507
2508		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2509
2510		alu.dst.sel = ctx->temp_reg;
2511		alu.dst.chan = 1;
2512		alu.dst.write = 1;
2513		alu.last = 1;
2514
2515		r = r600_bc_add_alu(ctx->bc, &alu);
2516		if (r)
2517			return r;
2518
2519		r = r600_bc_add_literal(ctx->bc, ctx->value);
2520		if (r)
2521			return r;
2522
2523		memset(&alu, 0, sizeof(struct r600_bc_alu));
2524
2525		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2526		alu.src[0].sel = ctx->temp_reg;
2527		alu.src[0].chan = 1;
2528
2529		alu.dst.sel = ctx->temp_reg;
2530		alu.dst.chan = 1;
2531		alu.dst.write = 1;
2532		alu.last = 1;
2533
2534		r = r600_bc_add_alu(ctx->bc, &alu);
2535		if (r)
2536			return r;
2537
2538		r = r600_bc_add_literal(ctx->bc, ctx->value);
2539		if (r)
2540			return r;
2541
2542		memset(&alu, 0, sizeof(struct r600_bc_alu));
2543
2544		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2545		alu.src[0].sel = ctx->temp_reg;
2546		alu.src[0].chan = 1;
2547
2548		alu.dst.sel = ctx->temp_reg;
2549		alu.dst.chan = 1;
2550		alu.dst.write = 1;
2551		alu.last = 1;
2552
2553		r = r600_bc_add_alu(ctx->bc, &alu);
2554		if (r)
2555			return r;
2556
2557		r = r600_bc_add_literal(ctx->bc, ctx->value);
2558		if (r)
2559			return r;
2560
2561		memset(&alu, 0, sizeof(struct r600_bc_alu));
2562
2563		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2564		alu.src[0].sel = ctx->temp_reg;
2565		alu.src[0].chan = 1;
2566
2567		alu.dst.sel = ctx->temp_reg;
2568		alu.dst.chan = 1;
2569		alu.dst.write = 1;
2570		alu.last = 1;
2571
2572		r = r600_bc_add_alu(ctx->bc, &alu);
2573		if (r)
2574			return r;
2575
2576		r = r600_bc_add_literal(ctx->bc, ctx->value);
2577		if (r)
2578			return r;
2579
2580		memset(&alu, 0, sizeof(struct r600_bc_alu));
2581
2582		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2583
2584		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2585		if (r)
2586			return r;
2587
2588		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2589
2590		alu.src[1].sel = ctx->temp_reg;
2591		alu.src[1].chan = 1;
2592
2593		alu.dst.sel = ctx->temp_reg;
2594		alu.dst.chan = 1;
2595		alu.dst.write = 1;
2596		alu.last = 1;
2597
2598		r = r600_bc_add_alu(ctx->bc, &alu);
2599		if (r)
2600			return r;
2601
2602		r = r600_bc_add_literal(ctx->bc, ctx->value);
2603		if (r)
2604			return r;
2605	}
2606
2607	/* result.z = log2(src);*/
2608	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2609		memset(&alu, 0, sizeof(struct r600_bc_alu));
2610
2611		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2612		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2613		if (r)
2614			return r;
2615
2616		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2617
2618		alu.dst.sel = ctx->temp_reg;
2619		alu.dst.write = 1;
2620		alu.dst.chan = 2;
2621		alu.last = 1;
2622
2623		r = r600_bc_add_alu(ctx->bc, &alu);
2624		if (r)
2625			return r;
2626
2627		r = r600_bc_add_literal(ctx->bc, ctx->value);
2628		if (r)
2629			return r;
2630	}
2631
2632	/* result.w = 1.0; */
2633	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2634		memset(&alu, 0, sizeof(struct r600_bc_alu));
2635
2636		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2637		alu.src[0].sel = V_SQ_ALU_SRC_1;
2638		alu.src[0].chan = 0;
2639
2640		alu.dst.sel = ctx->temp_reg;
2641		alu.dst.chan = 3;
2642		alu.dst.write = 1;
2643		alu.last = 1;
2644
2645		r = r600_bc_add_alu(ctx->bc, &alu);
2646		if (r)
2647			return r;
2648
2649		r = r600_bc_add_literal(ctx->bc, ctx->value);
2650		if (r)
2651			return r;
2652	}
2653
2654	return tgsi_helper_copy(ctx, inst);
2655}
2656
2657static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2658{
2659	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2660	struct r600_bc_alu alu;
2661	int r;
2662	memset(&alu, 0, sizeof(struct r600_bc_alu));
2663
2664	switch (inst->Instruction.Opcode) {
2665	case TGSI_OPCODE_ARL:
2666		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2667		break;
2668	case TGSI_OPCODE_ARR:
2669		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2670		break;
2671	default:
2672		assert(0);
2673		return -1;
2674	}
2675
2676	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2677	if (r)
2678		return r;
2679	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2680	alu.last = 1;
2681	alu.dst.chan = 0;
2682	alu.dst.sel = ctx->temp_reg;
2683	alu.dst.write = 1;
2684	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2685	if (r)
2686		return r;
2687	memset(&alu, 0, sizeof(struct r600_bc_alu));
2688	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2689	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2690	if (r)
2691		return r;
2692	alu.src[0].sel = ctx->temp_reg;
2693	alu.src[0].chan = 0;
2694	alu.last = 1;
2695	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2696	if (r)
2697		return r;
2698	return 0;
2699}
2700static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2701{
2702	/* TODO from r600c, ar values don't persist between clauses */
2703	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2704	struct r600_bc_alu alu;
2705	int r;
2706	memset(&alu, 0, sizeof(struct r600_bc_alu));
2707
2708	switch (inst->Instruction.Opcode) {
2709	case TGSI_OPCODE_ARL:
2710		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2711		break;
2712	case TGSI_OPCODE_ARR:
2713		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2714		break;
2715	default:
2716		assert(0);
2717		return -1;
2718	}
2719
2720
2721	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2722	if (r)
2723		return r;
2724	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2725
2726	alu.last = 1;
2727
2728	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2729	if (r)
2730		return r;
2731	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2732	return 0;
2733}
2734
2735static int tgsi_opdst(struct r600_shader_ctx *ctx)
2736{
2737	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2738	struct r600_bc_alu alu;
2739	int i, r = 0;
2740
2741	for (i = 0; i < 4; i++) {
2742		memset(&alu, 0, sizeof(struct r600_bc_alu));
2743
2744		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2745		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2746		if (r)
2747			return r;
2748
2749		if (i == 0 || i == 3) {
2750			alu.src[0].sel = V_SQ_ALU_SRC_1;
2751		} else {
2752			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2753			if (r)
2754				return r;
2755			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2756		}
2757
2758	        if (i == 0 || i == 2) {
2759			alu.src[1].sel = V_SQ_ALU_SRC_1;
2760		} else {
2761			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2762			if (r)
2763				return r;
2764			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2765		}
2766		if (i == 3)
2767			alu.last = 1;
2768		r = r600_bc_add_alu(ctx->bc, &alu);
2769		if (r)
2770			return r;
2771	}
2772	return 0;
2773}
2774
2775static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2776{
2777	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2778	struct r600_bc_alu alu;
2779	int r;
2780
2781	memset(&alu, 0, sizeof(struct r600_bc_alu));
2782	alu.inst = opcode;
2783	alu.predicate = 1;
2784
2785	alu.dst.sel = ctx->temp_reg;
2786	alu.dst.write = 1;
2787	alu.dst.chan = 0;
2788
2789	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2790	if (r)
2791		return r;
2792	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2793	alu.src[1].sel = V_SQ_ALU_SRC_0;
2794	alu.src[1].chan = 0;
2795
2796	alu.last = 1;
2797
2798	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2799	if (r)
2800		return r;
2801	return 0;
2802}
2803
2804static int pops(struct r600_shader_ctx *ctx, int pops)
2805{
2806	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2807	ctx->bc->cf_last->pop_count = pops;
2808	return 0;
2809}
2810
2811static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2812{
2813	switch(reason) {
2814	case FC_PUSH_VPM:
2815		ctx->bc->callstack[ctx->bc->call_sp].current--;
2816		break;
2817	case FC_PUSH_WQM:
2818	case FC_LOOP:
2819		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2820		break;
2821	case FC_REP:
2822		/* TOODO : for 16 vp asic should -= 2; */
2823		ctx->bc->callstack[ctx->bc->call_sp].current --;
2824		break;
2825	}
2826}
2827
2828static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2829{
2830	if (check_max_only) {
2831		int diff;
2832		switch (reason) {
2833		case FC_PUSH_VPM:
2834			diff = 1;
2835			break;
2836		case FC_PUSH_WQM:
2837			diff = 4;
2838			break;
2839		default:
2840			assert(0);
2841			diff = 0;
2842		}
2843		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2844		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2845			ctx->bc->callstack[ctx->bc->call_sp].max =
2846				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2847		}
2848		return;
2849	}
2850	switch (reason) {
2851	case FC_PUSH_VPM:
2852		ctx->bc->callstack[ctx->bc->call_sp].current++;
2853		break;
2854	case FC_PUSH_WQM:
2855	case FC_LOOP:
2856		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2857		break;
2858	case FC_REP:
2859		ctx->bc->callstack[ctx->bc->call_sp].current++;
2860		break;
2861	}
2862
2863	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2864	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2865		ctx->bc->callstack[ctx->bc->call_sp].max =
2866			ctx->bc->callstack[ctx->bc->call_sp].current;
2867	}
2868}
2869
2870static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2871{
2872	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2873
2874	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2875						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2876	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2877	sp->num_mid++;
2878}
2879
2880static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2881{
2882	ctx->bc->fc_sp++;
2883	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2884	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2885}
2886
2887static void fc_poplevel(struct r600_shader_ctx *ctx)
2888{
2889	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2890	if (sp->mid) {
2891		free(sp->mid);
2892		sp->mid = NULL;
2893	}
2894	sp->num_mid = 0;
2895	sp->start = NULL;
2896	sp->type = 0;
2897	ctx->bc->fc_sp--;
2898}
2899
2900#if 0
2901static int emit_return(struct r600_shader_ctx *ctx)
2902{
2903	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2904	return 0;
2905}
2906
2907static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2908{
2909
2910	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2911	ctx->bc->cf_last->pop_count = pops;
2912	/* TODO work out offset */
2913	return 0;
2914}
2915
2916static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2917{
2918	return 0;
2919}
2920
2921static void emit_testflag(struct r600_shader_ctx *ctx)
2922{
2923
2924}
2925
2926static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2927{
2928	emit_testflag(ctx);
2929	emit_jump_to_offset(ctx, 1, 4);
2930	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2931	pops(ctx, ifidx + 1);
2932	emit_return(ctx);
2933}
2934
2935static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2936{
2937	emit_testflag(ctx);
2938
2939	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2940	ctx->bc->cf_last->pop_count = 1;
2941
2942	fc_set_mid(ctx, fc_sp);
2943
2944	pops(ctx, 1);
2945}
2946#endif
2947
2948static int tgsi_if(struct r600_shader_ctx *ctx)
2949{
2950	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2951
2952	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2953
2954	fc_pushlevel(ctx, FC_IF);
2955
2956	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2957	return 0;
2958}
2959
2960static int tgsi_else(struct r600_shader_ctx *ctx)
2961{
2962	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2963	ctx->bc->cf_last->pop_count = 1;
2964
2965	fc_set_mid(ctx, ctx->bc->fc_sp);
2966	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2967	return 0;
2968}
2969
2970static int tgsi_endif(struct r600_shader_ctx *ctx)
2971{
2972	pops(ctx, 1);
2973	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2974		R600_ERR("if/endif unbalanced in shader\n");
2975		return -1;
2976	}
2977
2978	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2979		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2980		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2981	} else {
2982		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2983	}
2984	fc_poplevel(ctx);
2985
2986	callstack_decrease_current(ctx, FC_PUSH_VPM);
2987	return 0;
2988}
2989
2990static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2991{
2992	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2993
2994	fc_pushlevel(ctx, FC_LOOP);
2995
2996	/* check stack depth */
2997	callstack_check_depth(ctx, FC_LOOP, 0);
2998	return 0;
2999}
3000
3001static int tgsi_endloop(struct r600_shader_ctx *ctx)
3002{
3003	int i;
3004
3005	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3006
3007	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3008		R600_ERR("loop/endloop in shader code are not paired.\n");
3009		return -EINVAL;
3010	}
3011
3012	/* fixup loop pointers - from r600isa
3013	   LOOP END points to CF after LOOP START,
3014	   LOOP START point to CF after LOOP END
3015	   BRK/CONT point to LOOP END CF
3016	*/
3017	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3018
3019	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3020
3021	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3022		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3023	}
3024	/* TODO add LOOPRET support */
3025	fc_poplevel(ctx);
3026	callstack_decrease_current(ctx, FC_LOOP);
3027	return 0;
3028}
3029
3030static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3031{
3032	unsigned int fscp;
3033
3034	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3035	{
3036		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3037			break;
3038	}
3039
3040	if (fscp == 0) {
3041		R600_ERR("Break not inside loop/endloop pair\n");
3042		return -EINVAL;
3043	}
3044
3045	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3046	ctx->bc->cf_last->pop_count = 1;
3047
3048	fc_set_mid(ctx, fscp);
3049
3050	pops(ctx, 1);
3051	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3052	return 0;
3053}
3054
3055static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3056	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3057	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3058	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3059
3060	/* FIXME:
3061	 * For state trackers other than OpenGL, we'll want to use
3062	 * _RECIP_IEEE instead.
3063	 */
3064	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3065
3066	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3067	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3068	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3069	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3070	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3071	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3072	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3073	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3074	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3075	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3076	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3077	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3078	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3079	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3080	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3081	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082	/* gap */
3083	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	/* gap */
3086	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3089	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3091	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3093	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3094	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3095	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3096	/* gap */
3097	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3099	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3101	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3102	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3103	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3104	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3105	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3111	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3113	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3114	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3115	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3116	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3118	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3119	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3120	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3127	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3131	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3132	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3133	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3134	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3137	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3138	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3139	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3140	/* gap */
3141	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3144	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3145	/* gap */
3146	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3154	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	/* gap */
3156	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3165	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3168	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3170	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171	/* gap */
3172	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3176	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177	/* gap */
3178	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3182	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3183	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3184	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3186	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3187	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3188	/* gap */
3189	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3190	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3217};
3218
3219static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3220	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3221	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3222	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3223	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3224	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3225	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3226	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3227	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3228	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3229	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3230	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3231	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3232	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3233	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3234	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3235	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3236	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3237	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3238	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3239	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	/* gap */
3241	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	/* gap */
3244	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3247	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3249	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3251	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3252	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3253	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3254	/* gap */
3255	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3257	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3258	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3259	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3260	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3261	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3262	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3263	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3269	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3271	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3272	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3273	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3274	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3275	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3276	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3277	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3278	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3285	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3289	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3290	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3291	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3292	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3294	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3295	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3296	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3297	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3298	/* gap */
3299	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3302	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3303	/* gap */
3304	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3312	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	/* gap */
3314	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3323	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3326	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3328	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3329	/* gap */
3330	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335	/* gap */
3336	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3342	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3345	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3346	/* gap */
3347	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3371	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3373	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3374	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3375};
3376