r600_shader.c revision 07e0424a172970a6ea06e09fe92c1681d8f0f260
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	for (i = 0; i < 10; i++) {
48		spi_vs_out_id[i] = 0;
49	}
50	for (i = 0; i < 32; i++) {
51		tmp = i << ((i & 3) * 8);
52		spi_vs_out_id[i / 4] |= tmp;
53	}
54	for (i = 0; i < 10; i++) {
55		r600_pipe_state_add_reg(rstate,
56					R_028614_SPI_VS_OUT_ID_0 + i * 4,
57					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58	}
59
60	r600_pipe_state_add_reg(rstate,
61			R_0286C4_SPI_VS_OUT_CONFIG,
62			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63			0xFFFFFFFF, NULL);
64	r600_pipe_state_add_reg(rstate,
65			R_028868_SQ_PGM_RESOURCES_VS,
66			S_028868_NUM_GPRS(rshader->bc.ngpr) |
67			S_028868_STACK_SIZE(rshader->bc.nstack),
68			0xFFFFFFFF, NULL);
69	r600_pipe_state_add_reg(rstate,
70			R_0288A4_SQ_PGM_RESOURCES_FS,
71			0x00000000, 0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_0288DC_SQ_PGM_CF_OFFSET_FS,
77			0x00000000, 0xFFFFFFFF, NULL);
78	r600_pipe_state_add_reg(rstate,
79			R_028858_SQ_PGM_START_VS,
80			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81	r600_pipe_state_add_reg(rstate,
82			R_028894_SQ_PGM_START_FS,
83			r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
84
85	r600_pipe_state_add_reg(rstate,
86				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87				0xFFFFFFFF, NULL);
88
89}
90
91int r600_find_vs_semantic_index(struct r600_shader *vs,
92				struct r600_shader *ps, int id)
93{
94	struct r600_shader_io *input = &ps->input[id];
95
96	for (int i = 0; i < vs->noutput; i++) {
97		if (input->name == vs->output[i].name &&
98			input->sid == vs->output[i].sid) {
99			return i - 1;
100		}
101	}
102	return 0;
103}
104
105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106{
107	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108	struct r600_pipe_state *rstate = &shader->rstate;
109	struct r600_shader *rshader = &shader->shader;
110	unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
111	int pos_index = -1, face_index = -1;
112
113	/* clear previous register */
114	rstate->nregs = 0;
115
116	for (i = 0; i < rshader->ninput; i++) {
117		tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118		if (rshader->input[i].centroid)
119			tmp |= S_028644_SEL_CENTROID(1);
120		if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
121			tmp |= S_028644_SEL_LINEAR(1);
122
123		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
124			pos_index = i;
125		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
126		    rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
127		    rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
128			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
129		}
130		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
131			face_index = i;
132		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
133			rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
134			tmp |= S_028644_PT_SPRITE_TEX(1);
135		}
136		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
137	}
138	for (i = 0; i < rshader->noutput; i++) {
139		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140			r600_pipe_state_add_reg(rstate,
141						R_02880C_DB_SHADER_CONTROL,
142						S_02880C_Z_EXPORT_ENABLE(1),
143						S_02880C_Z_EXPORT_ENABLE(1), NULL);
144		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
145			r600_pipe_state_add_reg(rstate,
146						R_02880C_DB_SHADER_CONTROL,
147						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
148						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
149	}
150
151	exports_ps = 0;
152	num_cout = 0;
153	for (i = 0; i < rshader->noutput; i++) {
154		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
155			exports_ps |= 1;
156		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
157			num_cout++;
158		}
159	}
160	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
161	if (!exports_ps) {
162		/* always at least export 1 component per pixel */
163		exports_ps = 2;
164	}
165
166	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
167				S_0286CC_PERSP_GRADIENT_ENA(1);
168	spi_input_z = 0;
169	if (pos_index != -1) {
170		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
171					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
172					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
173					S_0286CC_BARYC_SAMPLE_CNTL(1));
174		spi_input_z |= 1;
175	}
176
177	spi_ps_in_control_1 = 0;
178	if (face_index != -1) {
179		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
180			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
181	}
182
183	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
184	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
185	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
186	r600_pipe_state_add_reg(rstate,
187				R_028840_SQ_PGM_START_PS,
188				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
189	r600_pipe_state_add_reg(rstate,
190				R_028850_SQ_PGM_RESOURCES_PS,
191				S_028868_NUM_GPRS(rshader->bc.ngpr) |
192				S_028868_STACK_SIZE(rshader->bc.nstack),
193				0xFFFFFFFF, NULL);
194	r600_pipe_state_add_reg(rstate,
195				R_028854_SQ_PGM_EXPORTS_PS,
196				exports_ps, 0xFFFFFFFF, NULL);
197	r600_pipe_state_add_reg(rstate,
198				R_0288CC_SQ_PGM_CF_OFFSET_PS,
199				0x00000000, 0xFFFFFFFF, NULL);
200
201	if (rshader->uses_kill) {
202		/* only set some bits here, the other bits are set in the dsa state */
203		r600_pipe_state_add_reg(rstate,
204					R_02880C_DB_SHADER_CONTROL,
205					S_02880C_KILL_ENABLE(1),
206					S_02880C_KILL_ENABLE(1), NULL);
207	}
208	r600_pipe_state_add_reg(rstate,
209				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
210				0xFFFFFFFF, NULL);
211}
212
213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
214{
215	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
216	struct r600_shader *rshader = &shader->shader;
217	void *ptr;
218
219	/* copy new shader */
220	if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
221		shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
222		if (shader->bo_fetch == NULL) {
223			return -ENOMEM;
224		}
225		ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
226		memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
227		r600_bo_unmap(rctx->radeon, shader->bo_fetch);
228	}
229	if (shader->bo == NULL) {
230		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
231		if (shader->bo == NULL) {
232			return -ENOMEM;
233		}
234		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
235		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
236		r600_bo_unmap(rctx->radeon, shader->bo);
237	}
238	/* build state */
239	rshader->flat_shade = rctx->flatshade;
240	switch (rshader->processor_type) {
241	case TGSI_PROCESSOR_VERTEX:
242		if (rshader->family >= CHIP_CEDAR) {
243			evergreen_pipe_shader_vs(ctx, shader);
244		} else {
245			r600_pipe_shader_vs(ctx, shader);
246		}
247		break;
248	case TGSI_PROCESSOR_FRAGMENT:
249		if (rshader->family >= CHIP_CEDAR) {
250			evergreen_pipe_shader_ps(ctx, shader);
251		} else {
252			r600_pipe_shader_ps(ctx, shader);
253		}
254		break;
255	default:
256		return -EINVAL;
257	}
258	r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
259	return 0;
260}
261
262static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
263{
264	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
265	struct r600_shader *shader = &rshader->shader;
266	const struct util_format_description *desc;
267	enum pipe_format resource_format[160];
268	unsigned i, nresources = 0;
269	struct r600_bc *bc = &shader->bc_fetch;
270	struct r600_bc_cf *cf;
271	struct r600_bc_vtx *vtx;
272
273	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
274		return 0;
275	/* doing a full memcmp fell over the refcount */
276	if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
277	    (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
278                     rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
279		return 0;
280	}
281	rshader->vertex_elements = *rctx->vertex_elements;
282	for (i = 0; i < rctx->vertex_elements->count; i++) {
283		resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
284	}
285	r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
286	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
287		switch (cf->inst) {
288		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
289		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
290			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
291				desc = util_format_description(resource_format[vtx->buffer_id]);
292				if (desc == NULL) {
293					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
294					return -EINVAL;
295				}
296				vtx->dst_sel_x = desc->swizzle[0];
297				vtx->dst_sel_y = desc->swizzle[1];
298				vtx->dst_sel_z = desc->swizzle[2];
299				vtx->dst_sel_w = desc->swizzle[3];
300			}
301			break;
302		default:
303			break;
304		}
305	}
306	return r600_bc_build(&shader->bc_fetch);
307}
308
309int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
310{
311	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
312	int r;
313
314	if (shader == NULL)
315		return -EINVAL;
316	/* there should be enough input */
317	if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
318		R600_ERR("%d resources provided, expecting %d\n",
319			rctx->vertex_elements->count, shader->shader.bc.nresource);
320		return -EINVAL;
321	}
322	r = r600_shader_update(ctx, shader);
323	if (r)
324		return r;
325	return r600_pipe_shader(ctx, shader);
326}
327
328int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
329int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
330{
331	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
332	int r;
333
334//fprintf(stderr, "--------------------------------------------------------------\n");
335//tgsi_dump(tokens, 0);
336	shader->shader.family = r600_get_family(rctx->radeon);
337	r = r600_shader_from_tgsi(tokens, &shader->shader);
338	if (r) {
339		R600_ERR("translation from TGSI failed !\n");
340		return r;
341	}
342	r = r600_bc_build(&shader->shader.bc);
343	if (r) {
344		R600_ERR("building bytecode failed !\n");
345		return r;
346	}
347	if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
348		r = r600_bc_build(&shader->shader.bc_fetch);
349		if (r) {
350			R600_ERR("building bytecode failed !\n");
351			return r;
352		}
353	}
354//fprintf(stderr, "______________________________________________________________\n");
355	return 0;
356}
357
358void
359r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
360{
361	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
362
363	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
364
365	r600_bc_clear(&shader->shader.bc);
366
367	/* FIXME: is there more stuff to free? */
368}
369
370/*
371 * tgsi -> r600 shader
372 */
373struct r600_shader_tgsi_instruction;
374
375struct r600_shader_ctx {
376	struct tgsi_shader_info			info;
377	struct tgsi_parse_context		parse;
378	const struct tgsi_token			*tokens;
379	unsigned				type;
380	unsigned				file_offset[TGSI_FILE_COUNT];
381	unsigned				temp_reg;
382	struct r600_shader_tgsi_instruction	*inst_info;
383	struct r600_bc				*bc;
384	struct r600_bc				*bc_fetch;
385	struct r600_shader			*shader;
386	u32					value[4];
387	u32					*literals;
388	u32					nliterals;
389	u32					max_driver_temp_used;
390	/* needed for evergreen interpolation */
391	boolean                                 input_centroid;
392	boolean                                 input_linear;
393	boolean                                 input_perspective;
394	int					num_interp_gpr;
395};
396
397struct r600_shader_tgsi_instruction {
398	unsigned	tgsi_opcode;
399	unsigned	is_op3;
400	unsigned	r600_opcode;
401	int (*process)(struct r600_shader_ctx *ctx);
402};
403
404static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
405static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
406
407static int tgsi_is_supported(struct r600_shader_ctx *ctx)
408{
409	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
410	int j;
411
412	if (i->Instruction.NumDstRegs > 1) {
413		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
414		return -EINVAL;
415	}
416	if (i->Instruction.Predicate) {
417		R600_ERR("predicate unsupported\n");
418		return -EINVAL;
419	}
420#if 0
421	if (i->Instruction.Label) {
422		R600_ERR("label unsupported\n");
423		return -EINVAL;
424	}
425#endif
426	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
427		if (i->Src[j].Register.Dimension) {
428			R600_ERR("unsupported src %d (dimension %d)\n", j,
429				 i->Src[j].Register.Dimension);
430			return -EINVAL;
431		}
432	}
433	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
434		if (i->Dst[j].Register.Dimension) {
435			R600_ERR("unsupported dst (dimension)\n");
436			return -EINVAL;
437		}
438	}
439	return 0;
440}
441
442static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
443{
444	int i, r;
445	struct r600_bc_alu alu;
446	int gpr = 0, base_chan = 0;
447	int ij_index = 0;
448
449	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
450		ij_index = 0;
451		if (ctx->shader->input[input].centroid)
452			ij_index++;
453	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
454		ij_index = 0;
455		/* if we have perspective add one */
456		if (ctx->input_perspective)  {
457			ij_index++;
458			/* if we have perspective centroid */
459			if (ctx->input_centroid)
460				ij_index++;
461		}
462		if (ctx->shader->input[input].centroid)
463			ij_index++;
464	}
465
466	/* work out gpr and base_chan from index */
467	gpr = ij_index / 2;
468	base_chan = (2 * (ij_index % 2)) + 1;
469
470	for (i = 0; i < 8; i++) {
471		memset(&alu, 0, sizeof(struct r600_bc_alu));
472
473		if (i < 4)
474			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
475		else
476			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
477
478		if ((i > 1) && (i < 6)) {
479			alu.dst.sel = ctx->shader->input[input].gpr;
480			alu.dst.write = 1;
481		}
482
483		alu.dst.chan = i % 4;
484
485		alu.src[0].sel = gpr;
486		alu.src[0].chan = (base_chan - (i % 2));
487
488		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
489
490		alu.bank_swizzle_force = SQ_ALU_VEC_210;
491		if ((i % 4) == 3)
492			alu.last = 1;
493		r = r600_bc_add_alu(ctx->bc, &alu);
494		if (r)
495			return r;
496	}
497	return 0;
498}
499
500
501static int tgsi_declaration(struct r600_shader_ctx *ctx)
502{
503	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
504	struct r600_bc_vtx vtx;
505	unsigned i;
506	int r;
507
508	switch (d->Declaration.File) {
509	case TGSI_FILE_INPUT:
510		i = ctx->shader->ninput++;
511		ctx->shader->input[i].name = d->Semantic.Name;
512		ctx->shader->input[i].sid = d->Semantic.Index;
513		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
514		ctx->shader->input[i].centroid = d->Declaration.Centroid;
515		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
516		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
517			/* turn input into fetch */
518			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
519			vtx.inst = 0;
520			vtx.fetch_type = 0;
521			vtx.buffer_id = i;
522			/* register containing the index into the buffer */
523			vtx.src_gpr = 0;
524			vtx.src_sel_x = 0;
525			vtx.mega_fetch_count = 0x1F;
526			vtx.dst_gpr = ctx->shader->input[i].gpr;
527			vtx.dst_sel_x = 0;
528			vtx.dst_sel_y = 1;
529			vtx.dst_sel_z = 2;
530			vtx.dst_sel_w = 3;
531			vtx.use_const_fields = 1;
532			r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
533			if (r)
534				return r;
535		}
536		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
537			/* turn input into interpolate on EG */
538			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
539				if (ctx->shader->input[i].interpolate > 0) {
540					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
541					evergreen_interp_alu(ctx, i);
542				}
543			}
544		}
545		break;
546	case TGSI_FILE_OUTPUT:
547		i = ctx->shader->noutput++;
548		ctx->shader->output[i].name = d->Semantic.Name;
549		ctx->shader->output[i].sid = d->Semantic.Index;
550		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
551		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
552		break;
553	case TGSI_FILE_CONSTANT:
554	case TGSI_FILE_TEMPORARY:
555	case TGSI_FILE_SAMPLER:
556	case TGSI_FILE_ADDRESS:
557		break;
558	default:
559		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
560		return -EINVAL;
561	}
562	return 0;
563}
564
565static int r600_get_temp(struct r600_shader_ctx *ctx)
566{
567	return ctx->temp_reg + ctx->max_driver_temp_used++;
568}
569
570/*
571 * for evergreen we need to scan the shader to find the number of GPRs we need to
572 * reserve for interpolation.
573 *
574 * we need to know if we are going to emit
575 * any centroid inputs
576 * if perspective and linear are required
577*/
578static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
579{
580	int i;
581	int num_baryc;
582
583	ctx->input_linear = FALSE;
584	ctx->input_perspective = FALSE;
585	ctx->input_centroid = FALSE;
586	ctx->num_interp_gpr = 1;
587
588	/* any centroid inputs */
589	for (i = 0; i < ctx->info.num_inputs; i++) {
590		/* skip position/face */
591		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
592		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
593			continue;
594		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
595			ctx->input_linear = TRUE;
596		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
597			ctx->input_perspective = TRUE;
598		if (ctx->info.input_centroid[i])
599			ctx->input_centroid = TRUE;
600	}
601
602	num_baryc = 0;
603	/* ignoring sample for now */
604	if (ctx->input_perspective)
605		num_baryc++;
606	if (ctx->input_linear)
607		num_baryc++;
608	if (ctx->input_centroid)
609		num_baryc *= 2;
610
611	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
612
613	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
614	return ctx->num_interp_gpr;
615}
616
617int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
618{
619	struct tgsi_full_immediate *immediate;
620	struct r600_shader_ctx ctx;
621	struct r600_bc_output output[32];
622	unsigned output_done, noutput;
623	unsigned opcode;
624	int i, r = 0, pos0;
625
626	ctx.bc = &shader->bc;
627	ctx.bc_fetch = &shader->bc_fetch;
628	ctx.shader = shader;
629	r = r600_bc_init(ctx.bc, shader->family);
630	if (r)
631		return r;
632	ctx.tokens = tokens;
633	tgsi_scan_shader(tokens, &ctx.info);
634	tgsi_parse_init(&ctx.parse, tokens);
635	ctx.type = ctx.parse.FullHeader.Processor.Processor;
636	shader->processor_type = ctx.type;
637	if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
638		r = r600_bc_init(ctx.bc_fetch, shader->family);
639		if (r)
640			return r;
641		ctx.bc_fetch->type = -1;
642	}
643	ctx.bc->type = shader->processor_type;
644
645	/* register allocations */
646	/* Values [0,127] correspond to GPR[0..127].
647	 * Values [128,159] correspond to constant buffer bank 0
648	 * Values [160,191] correspond to constant buffer bank 1
649	 * Values [256,511] correspond to cfile constants c[0..255].
650	 * Other special values are shown in the list below.
651	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
652	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
653	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
654	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
655	 * 248	SQ_ALU_SRC_0: special constant 0.0.
656	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
657	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
658	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
659	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
660	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
661	 * 254	SQ_ALU_SRC_PV: previous vector result.
662	 * 255	SQ_ALU_SRC_PS: previous scalar result.
663	 */
664	for (i = 0; i < TGSI_FILE_COUNT; i++) {
665		ctx.file_offset[i] = 0;
666	}
667	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
668		ctx.file_offset[TGSI_FILE_INPUT] = 1;
669		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
670			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
671		} else {
672			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
673		}
674	}
675	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
676		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
677	}
678	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
679						ctx.info.file_count[TGSI_FILE_INPUT];
680	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
681						ctx.info.file_count[TGSI_FILE_OUTPUT];
682
683	ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
684
685	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
686	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
687			ctx.info.file_count[TGSI_FILE_TEMPORARY];
688
689	ctx.nliterals = 0;
690	ctx.literals = NULL;
691
692	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
693		tgsi_parse_token(&ctx.parse);
694		switch (ctx.parse.FullToken.Token.Type) {
695		case TGSI_TOKEN_TYPE_IMMEDIATE:
696			immediate = &ctx.parse.FullToken.FullImmediate;
697			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
698			if(ctx.literals == NULL) {
699				r = -ENOMEM;
700				goto out_err;
701			}
702			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
703			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
704			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
705			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
706			ctx.nliterals++;
707			break;
708		case TGSI_TOKEN_TYPE_DECLARATION:
709			r = tgsi_declaration(&ctx);
710			if (r)
711				goto out_err;
712			break;
713		case TGSI_TOKEN_TYPE_INSTRUCTION:
714			r = tgsi_is_supported(&ctx);
715			if (r)
716				goto out_err;
717			ctx.max_driver_temp_used = 0;
718			/* reserve first tmp for everyone */
719			r600_get_temp(&ctx);
720			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
721			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
722				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
723			else
724				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
725			r = ctx.inst_info->process(&ctx);
726			if (r)
727				goto out_err;
728			r = r600_bc_add_literal(ctx.bc, ctx.value);
729			if (r)
730				goto out_err;
731			break;
732		default:
733			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
734			r = -EINVAL;
735			goto out_err;
736		}
737	}
738	/* export output */
739	noutput = shader->noutput;
740	for (i = 0, pos0 = 0; i < noutput; i++) {
741		memset(&output[i], 0, sizeof(struct r600_bc_output));
742		output[i].gpr = shader->output[i].gpr;
743		output[i].elem_size = 3;
744		output[i].swizzle_x = 0;
745		output[i].swizzle_y = 1;
746		output[i].swizzle_z = 2;
747		output[i].swizzle_w = 3;
748		output[i].barrier = 1;
749		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
750		output[i].array_base = i - pos0;
751		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
752		switch (ctx.type) {
753		case TGSI_PROCESSOR_VERTEX:
754			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
755				output[i].array_base = 60;
756				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
757				/* position doesn't count in array_base */
758				pos0++;
759			}
760			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
761				output[i].array_base = 61;
762				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
763				/* position doesn't count in array_base */
764				pos0++;
765			}
766			break;
767		case TGSI_PROCESSOR_FRAGMENT:
768			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
769				output[i].array_base = shader->output[i].sid;
770				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
771			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
772				output[i].array_base = 61;
773				output[i].swizzle_x = 2;
774				output[i].swizzle_y = 7;
775				output[i].swizzle_z = output[i].swizzle_w = 7;
776				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
777			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
778				output[i].array_base = 61;
779				output[i].swizzle_x = 7;
780				output[i].swizzle_y = 1;
781				output[i].swizzle_z = output[i].swizzle_w = 7;
782				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
783			} else {
784				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
785				r = -EINVAL;
786				goto out_err;
787			}
788			break;
789		default:
790			R600_ERR("unsupported processor type %d\n", ctx.type);
791			r = -EINVAL;
792			goto out_err;
793		}
794	}
795	/* add fake param output for vertex shader if no param is exported */
796	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
797		for (i = 0, pos0 = 0; i < noutput; i++) {
798			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
799				pos0 = 1;
800				break;
801			}
802		}
803		if (!pos0) {
804			memset(&output[i], 0, sizeof(struct r600_bc_output));
805			output[i].gpr = 0;
806			output[i].elem_size = 3;
807			output[i].swizzle_x = 0;
808			output[i].swizzle_y = 1;
809			output[i].swizzle_z = 2;
810			output[i].swizzle_w = 3;
811			output[i].barrier = 1;
812			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
813			output[i].array_base = 0;
814			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
815			noutput++;
816		}
817	}
818	/* add fake pixel export */
819	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
820		memset(&output[0], 0, sizeof(struct r600_bc_output));
821		output[0].gpr = 0;
822		output[0].elem_size = 3;
823		output[0].swizzle_x = 7;
824		output[0].swizzle_y = 7;
825		output[0].swizzle_z = 7;
826		output[0].swizzle_w = 7;
827		output[0].barrier = 1;
828		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
829		output[0].array_base = 0;
830		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
831		noutput++;
832	}
833	/* set export done on last export of each type */
834	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
835		if (i == (noutput - 1)) {
836			output[i].end_of_program = 1;
837		}
838		if (!(output_done & (1 << output[i].type))) {
839			output_done |= (1 << output[i].type);
840			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
841		}
842	}
843	/* add return to fetch shader */
844	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
845		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
846			r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
847		} else {
848			r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
849		}
850	}
851	/* add output to bytecode */
852	for (i = 0; i < noutput; i++) {
853		r = r600_bc_add_output(ctx.bc, &output[i]);
854		if (r)
855			goto out_err;
856	}
857	free(ctx.literals);
858	tgsi_parse_free(&ctx.parse);
859	return 0;
860out_err:
861	free(ctx.literals);
862	tgsi_parse_free(&ctx.parse);
863	return r;
864}
865
866static int tgsi_unsupported(struct r600_shader_ctx *ctx)
867{
868	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
869	return -EINVAL;
870}
871
872static int tgsi_end(struct r600_shader_ctx *ctx)
873{
874	return 0;
875}
876
877static int tgsi_src(struct r600_shader_ctx *ctx,
878			const struct tgsi_full_src_register *tgsi_src,
879			struct r600_bc_alu_src *r600_src)
880{
881	int index;
882	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
883	r600_src->sel = tgsi_src->Register.Index;
884	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
885		r600_src->sel = 0;
886		index = tgsi_src->Register.Index;
887		ctx->value[0] = ctx->literals[index * 4 + 0];
888		ctx->value[1] = ctx->literals[index * 4 + 1];
889		ctx->value[2] = ctx->literals[index * 4 + 2];
890		ctx->value[3] = ctx->literals[index * 4 + 3];
891	}
892	if (tgsi_src->Register.Indirect)
893		r600_src->rel = V_SQ_REL_RELATIVE;
894	r600_src->neg = tgsi_src->Register.Negate;
895	r600_src->abs = tgsi_src->Register.Absolute;
896	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
897	return 0;
898}
899
900static int tgsi_dst(struct r600_shader_ctx *ctx,
901			const struct tgsi_full_dst_register *tgsi_dst,
902			unsigned swizzle,
903			struct r600_bc_alu_dst *r600_dst)
904{
905	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
906
907	r600_dst->sel = tgsi_dst->Register.Index;
908	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
909	r600_dst->chan = swizzle;
910	r600_dst->write = 1;
911	if (tgsi_dst->Register.Indirect)
912		r600_dst->rel = V_SQ_REL_RELATIVE;
913	if (inst->Instruction.Saturate) {
914		r600_dst->clamp = 1;
915	}
916	return 0;
917}
918
919static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
920{
921	switch (swizzle) {
922	case 0:
923		return tgsi_src->Register.SwizzleX;
924	case 1:
925		return tgsi_src->Register.SwizzleY;
926	case 2:
927		return tgsi_src->Register.SwizzleZ;
928	case 3:
929		return tgsi_src->Register.SwizzleW;
930	default:
931		return 0;
932	}
933}
934
935static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
936{
937	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
938	struct r600_bc_alu alu;
939	int i, j, k, nconst, r;
940
941	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
942		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
943			nconst++;
944		}
945		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
946		if (r) {
947			return r;
948		}
949	}
950	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
951		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
952			int treg = r600_get_temp(ctx);
953			for (k = 0; k < 4; k++) {
954				memset(&alu, 0, sizeof(struct r600_bc_alu));
955				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
956				alu.src[0].sel = r600_src[i].sel;
957				alu.src[0].chan = k;
958				alu.src[0].rel = r600_src[i].rel;
959				alu.dst.sel = treg;
960				alu.dst.chan = k;
961				alu.dst.write = 1;
962				if (k == 3)
963					alu.last = 1;
964				r = r600_bc_add_alu(ctx->bc, &alu);
965				if (r)
966					return r;
967			}
968			r600_src[i].sel = treg;
969			r600_src[i].rel =0;
970			j--;
971		}
972	}
973	return 0;
974}
975
976/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
977static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
978{
979	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
980	struct r600_bc_alu alu;
981	int i, j, k, nliteral, r;
982
983	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
984		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
985			nliteral++;
986		}
987	}
988	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
989		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
990			int treg = r600_get_temp(ctx);
991			for (k = 0; k < 4; k++) {
992				memset(&alu, 0, sizeof(struct r600_bc_alu));
993				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
994				alu.src[0].sel = r600_src[i].sel;
995				alu.src[0].chan = k;
996				alu.dst.sel = treg;
997				alu.dst.chan = k;
998				alu.dst.write = 1;
999				if (k == 3)
1000					alu.last = 1;
1001				r = r600_bc_add_alu(ctx->bc, &alu);
1002				if (r)
1003					return r;
1004			}
1005			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
1006			if (r)
1007				return r;
1008			r600_src[i].sel = treg;
1009			j--;
1010		}
1011	}
1012	return 0;
1013}
1014
1015static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1016{
1017	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1018	struct r600_bc_alu_src r600_src[3];
1019	struct r600_bc_alu alu;
1020	int i, j, r;
1021	int lasti = 0;
1022
1023	for (i = 0; i < 4; i++) {
1024		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1025			lasti = i;
1026		}
1027	}
1028
1029	r = tgsi_split_constant(ctx, r600_src);
1030	if (r)
1031		return r;
1032	r = tgsi_split_literal_constant(ctx, r600_src);
1033	if (r)
1034		return r;
1035	for (i = 0; i < lasti + 1; i++) {
1036		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1037			continue;
1038
1039		memset(&alu, 0, sizeof(struct r600_bc_alu));
1040		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1041		if (r)
1042			return r;
1043
1044		alu.inst = ctx->inst_info->r600_opcode;
1045		if (!swap) {
1046			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1047				alu.src[j] = r600_src[j];
1048				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1049			}
1050		} else {
1051			alu.src[0] = r600_src[1];
1052			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1053
1054			alu.src[1] = r600_src[0];
1055			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1056		}
1057		/* handle some special cases */
1058		switch (ctx->inst_info->tgsi_opcode) {
1059		case TGSI_OPCODE_SUB:
1060			alu.src[1].neg = 1;
1061			break;
1062		case TGSI_OPCODE_ABS:
1063			alu.src[0].abs = 1;
1064			break;
1065		default:
1066			break;
1067		}
1068		if (i == lasti) {
1069			alu.last = 1;
1070		}
1071		r = r600_bc_add_alu(ctx->bc, &alu);
1072		if (r)
1073			return r;
1074	}
1075	return 0;
1076}
1077
1078static int tgsi_op2(struct r600_shader_ctx *ctx)
1079{
1080	return tgsi_op2_s(ctx, 0);
1081}
1082
1083static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1084{
1085	return tgsi_op2_s(ctx, 1);
1086}
1087
1088/*
1089 * r600 - trunc to -PI..PI range
1090 * r700 - normalize by dividing by 2PI
1091 * see fdo bug 27901
1092 */
1093static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1094			   struct r600_bc_alu_src r600_src[3])
1095{
1096	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1097	int r;
1098	uint32_t lit_vals[4];
1099	struct r600_bc_alu alu;
1100
1101	memset(lit_vals, 0, 4*4);
1102	r = tgsi_split_constant(ctx, r600_src);
1103	if (r)
1104		return r;
1105	r = tgsi_split_literal_constant(ctx, r600_src);
1106	if (r)
1107		return r;
1108
1109	r = tgsi_split_literal_constant(ctx, r600_src);
1110	if (r)
1111		return r;
1112
1113	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1114	lit_vals[1] = fui(0.5f);
1115
1116	memset(&alu, 0, sizeof(struct r600_bc_alu));
1117	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1118	alu.is_op3 = 1;
1119
1120	alu.dst.chan = 0;
1121	alu.dst.sel = ctx->temp_reg;
1122	alu.dst.write = 1;
1123
1124	alu.src[0] = r600_src[0];
1125	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1126
1127	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1128	alu.src[1].chan = 0;
1129	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1130	alu.src[2].chan = 1;
1131	alu.last = 1;
1132	r = r600_bc_add_alu(ctx->bc, &alu);
1133	if (r)
1134		return r;
1135	r = r600_bc_add_literal(ctx->bc, lit_vals);
1136	if (r)
1137		return r;
1138
1139	memset(&alu, 0, sizeof(struct r600_bc_alu));
1140	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1141
1142	alu.dst.chan = 0;
1143	alu.dst.sel = ctx->temp_reg;
1144	alu.dst.write = 1;
1145
1146	alu.src[0].sel = ctx->temp_reg;
1147	alu.src[0].chan = 0;
1148	alu.last = 1;
1149	r = r600_bc_add_alu(ctx->bc, &alu);
1150	if (r)
1151		return r;
1152
1153	if (ctx->bc->chiprev == CHIPREV_R600) {
1154		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1155		lit_vals[1] = fui(-3.1415926535897f);
1156	} else {
1157		lit_vals[0] = fui(1.0f);
1158		lit_vals[1] = fui(-0.5f);
1159	}
1160
1161	memset(&alu, 0, sizeof(struct r600_bc_alu));
1162	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1163	alu.is_op3 = 1;
1164
1165	alu.dst.chan = 0;
1166	alu.dst.sel = ctx->temp_reg;
1167	alu.dst.write = 1;
1168
1169	alu.src[0].sel = ctx->temp_reg;
1170	alu.src[0].chan = 0;
1171
1172	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1173	alu.src[1].chan = 0;
1174	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1175	alu.src[2].chan = 1;
1176	alu.last = 1;
1177	r = r600_bc_add_alu(ctx->bc, &alu);
1178	if (r)
1179		return r;
1180	r = r600_bc_add_literal(ctx->bc, lit_vals);
1181	if (r)
1182		return r;
1183	return 0;
1184}
1185
1186static int tgsi_trig(struct r600_shader_ctx *ctx)
1187{
1188	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1189	struct r600_bc_alu_src r600_src[3];
1190	struct r600_bc_alu alu;
1191	int i, r;
1192	int lasti = 0;
1193
1194	r = tgsi_setup_trig(ctx, r600_src);
1195	if (r)
1196		return r;
1197
1198	memset(&alu, 0, sizeof(struct r600_bc_alu));
1199	alu.inst = ctx->inst_info->r600_opcode;
1200	alu.dst.chan = 0;
1201	alu.dst.sel = ctx->temp_reg;
1202	alu.dst.write = 1;
1203
1204	alu.src[0].sel = ctx->temp_reg;
1205	alu.src[0].chan = 0;
1206	alu.last = 1;
1207	r = r600_bc_add_alu(ctx->bc, &alu);
1208	if (r)
1209		return r;
1210
1211	/* replicate result */
1212	for (i = 0; i < 4; i++) {
1213		if (inst->Dst[0].Register.WriteMask & (1 << i))
1214			lasti = i;
1215	}
1216	for (i = 0; i < lasti + 1; i++) {
1217		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1218			continue;
1219
1220		memset(&alu, 0, sizeof(struct r600_bc_alu));
1221		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1222
1223		alu.src[0].sel = ctx->temp_reg;
1224		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1225		if (r)
1226			return r;
1227		if (i == lasti)
1228			alu.last = 1;
1229		r = r600_bc_add_alu(ctx->bc, &alu);
1230		if (r)
1231			return r;
1232	}
1233	return 0;
1234}
1235
1236static int tgsi_scs(struct r600_shader_ctx *ctx)
1237{
1238	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1239	struct r600_bc_alu_src r600_src[3];
1240	struct r600_bc_alu alu;
1241	int r;
1242
1243	/* We'll only need the trig stuff if we are going to write to the
1244	 * X or Y components of the destination vector.
1245	 */
1246	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1247		r = tgsi_setup_trig(ctx, r600_src);
1248		if (r)
1249			return r;
1250	}
1251
1252	/* dst.x = COS */
1253	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1254		memset(&alu, 0, sizeof(struct r600_bc_alu));
1255		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1256		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1257		if (r)
1258			return r;
1259
1260		alu.src[0].sel = ctx->temp_reg;
1261		alu.src[0].chan = 0;
1262		alu.last = 1;
1263		r = r600_bc_add_alu(ctx->bc, &alu);
1264		if (r)
1265			return r;
1266	}
1267
1268	/* dst.y = SIN */
1269	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1270		memset(&alu, 0, sizeof(struct r600_bc_alu));
1271		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1272		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1273		if (r)
1274			return r;
1275
1276		alu.src[0].sel = ctx->temp_reg;
1277		alu.src[0].chan = 0;
1278		alu.last = 1;
1279		r = r600_bc_add_alu(ctx->bc, &alu);
1280		if (r)
1281			return r;
1282	}
1283
1284	/* dst.z = 0.0; */
1285	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1286		memset(&alu, 0, sizeof(struct r600_bc_alu));
1287
1288		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1289
1290		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1291		if (r)
1292			return r;
1293
1294		alu.src[0].sel = V_SQ_ALU_SRC_0;
1295		alu.src[0].chan = 0;
1296
1297		alu.last = 1;
1298
1299		r = r600_bc_add_alu(ctx->bc, &alu);
1300		if (r)
1301			return r;
1302
1303		r = r600_bc_add_literal(ctx->bc, ctx->value);
1304		if (r)
1305			return r;
1306	}
1307
1308	/* dst.w = 1.0; */
1309	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1310		memset(&alu, 0, sizeof(struct r600_bc_alu));
1311
1312		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1313
1314		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1315		if (r)
1316			return r;
1317
1318		alu.src[0].sel = V_SQ_ALU_SRC_1;
1319		alu.src[0].chan = 0;
1320
1321		alu.last = 1;
1322
1323		r = r600_bc_add_alu(ctx->bc, &alu);
1324		if (r)
1325			return r;
1326
1327		r = r600_bc_add_literal(ctx->bc, ctx->value);
1328		if (r)
1329			return r;
1330	}
1331
1332	return 0;
1333}
1334
1335static int tgsi_kill(struct r600_shader_ctx *ctx)
1336{
1337	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1338	struct r600_bc_alu alu;
1339	int i, r;
1340
1341	for (i = 0; i < 4; i++) {
1342		memset(&alu, 0, sizeof(struct r600_bc_alu));
1343		alu.inst = ctx->inst_info->r600_opcode;
1344
1345		alu.dst.chan = i;
1346
1347		alu.src[0].sel = V_SQ_ALU_SRC_0;
1348
1349		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1350			alu.src[1].sel = V_SQ_ALU_SRC_1;
1351			alu.src[1].neg = 1;
1352		} else {
1353			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1354			if (r)
1355				return r;
1356			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1357		}
1358		if (i == 3) {
1359			alu.last = 1;
1360		}
1361		r = r600_bc_add_alu(ctx->bc, &alu);
1362		if (r)
1363			return r;
1364	}
1365	r = r600_bc_add_literal(ctx->bc, ctx->value);
1366	if (r)
1367		return r;
1368
1369	/* kill must be last in ALU */
1370	ctx->bc->force_add_cf = 1;
1371	ctx->shader->uses_kill = TRUE;
1372	return 0;
1373}
1374
1375static int tgsi_lit(struct r600_shader_ctx *ctx)
1376{
1377	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1378	struct r600_bc_alu alu;
1379	struct r600_bc_alu_src r600_src[3];
1380	int r;
1381
1382	r = tgsi_split_constant(ctx, r600_src);
1383	if (r)
1384		return r;
1385	r = tgsi_split_literal_constant(ctx, r600_src);
1386	if (r)
1387		return r;
1388
1389	/* dst.x, <- 1.0  */
1390	memset(&alu, 0, sizeof(struct r600_bc_alu));
1391	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1392	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1393	alu.src[0].chan = 0;
1394	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1395	if (r)
1396		return r;
1397	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1398	r = r600_bc_add_alu(ctx->bc, &alu);
1399	if (r)
1400		return r;
1401
1402	/* dst.y = max(src.x, 0.0) */
1403	memset(&alu, 0, sizeof(struct r600_bc_alu));
1404	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1405	alu.src[0] = r600_src[0];
1406	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1407	alu.src[1].chan = 0;
1408	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1409	if (r)
1410		return r;
1411	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1412	r = r600_bc_add_alu(ctx->bc, &alu);
1413	if (r)
1414		return r;
1415
1416	/* dst.w, <- 1.0  */
1417	memset(&alu, 0, sizeof(struct r600_bc_alu));
1418	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1419	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1420	alu.src[0].chan = 0;
1421	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1422	if (r)
1423		return r;
1424	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1425	alu.last = 1;
1426	r = r600_bc_add_alu(ctx->bc, &alu);
1427	if (r)
1428		return r;
1429
1430	r = r600_bc_add_literal(ctx->bc, ctx->value);
1431	if (r)
1432		return r;
1433
1434	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1435	{
1436		int chan;
1437		int sel;
1438
1439		/* dst.z = log(src.y) */
1440		memset(&alu, 0, sizeof(struct r600_bc_alu));
1441		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1442		alu.src[0] = r600_src[0];
1443		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1444		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1445		if (r)
1446			return r;
1447		alu.last = 1;
1448		r = r600_bc_add_alu(ctx->bc, &alu);
1449		if (r)
1450			return r;
1451
1452		r = r600_bc_add_literal(ctx->bc, ctx->value);
1453		if (r)
1454			return r;
1455
1456		chan = alu.dst.chan;
1457		sel = alu.dst.sel;
1458
1459		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1460		memset(&alu, 0, sizeof(struct r600_bc_alu));
1461		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1462		alu.src[0] = r600_src[0];
1463		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1464		alu.src[1].sel  = sel;
1465		alu.src[1].chan = chan;
1466
1467		alu.src[2] = r600_src[0];
1468		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1469		alu.dst.sel = ctx->temp_reg;
1470		alu.dst.chan = 0;
1471		alu.dst.write = 1;
1472		alu.is_op3 = 1;
1473		alu.last = 1;
1474		r = r600_bc_add_alu(ctx->bc, &alu);
1475		if (r)
1476			return r;
1477
1478		r = r600_bc_add_literal(ctx->bc, ctx->value);
1479		if (r)
1480			return r;
1481		/* dst.z = exp(tmp.x) */
1482		memset(&alu, 0, sizeof(struct r600_bc_alu));
1483		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1484		alu.src[0].sel = ctx->temp_reg;
1485		alu.src[0].chan = 0;
1486		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1487		if (r)
1488			return r;
1489		alu.last = 1;
1490		r = r600_bc_add_alu(ctx->bc, &alu);
1491		if (r)
1492			return r;
1493	}
1494	return 0;
1495}
1496
1497static int tgsi_rsq(struct r600_shader_ctx *ctx)
1498{
1499	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1500	struct r600_bc_alu alu;
1501	int i, r;
1502
1503	memset(&alu, 0, sizeof(struct r600_bc_alu));
1504
1505	/* FIXME:
1506	 * For state trackers other than OpenGL, we'll want to use
1507	 * _RECIPSQRT_IEEE instead.
1508	 */
1509	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1510
1511	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1512		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1513		if (r)
1514			return r;
1515		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1516		alu.src[i].abs = 1;
1517	}
1518	alu.dst.sel = ctx->temp_reg;
1519	alu.dst.write = 1;
1520	alu.last = 1;
1521	r = r600_bc_add_alu(ctx->bc, &alu);
1522	if (r)
1523		return r;
1524	r = r600_bc_add_literal(ctx->bc, ctx->value);
1525	if (r)
1526		return r;
1527	/* replicate result */
1528	return tgsi_helper_tempx_replicate(ctx);
1529}
1530
1531static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1532{
1533	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1534	struct r600_bc_alu alu;
1535	int i, r;
1536
1537	for (i = 0; i < 4; i++) {
1538		memset(&alu, 0, sizeof(struct r600_bc_alu));
1539		alu.src[0].sel = ctx->temp_reg;
1540		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1541		alu.dst.chan = i;
1542		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1543		if (r)
1544			return r;
1545		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1546		if (i == 3)
1547			alu.last = 1;
1548		r = r600_bc_add_alu(ctx->bc, &alu);
1549		if (r)
1550			return r;
1551	}
1552	return 0;
1553}
1554
1555static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1556{
1557	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1558	struct r600_bc_alu alu;
1559	int i, r;
1560
1561	memset(&alu, 0, sizeof(struct r600_bc_alu));
1562	alu.inst = ctx->inst_info->r600_opcode;
1563	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1564		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1565		if (r)
1566			return r;
1567		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1568	}
1569	alu.dst.sel = ctx->temp_reg;
1570	alu.dst.write = 1;
1571	alu.last = 1;
1572	r = r600_bc_add_alu(ctx->bc, &alu);
1573	if (r)
1574		return r;
1575	r = r600_bc_add_literal(ctx->bc, ctx->value);
1576	if (r)
1577		return r;
1578	/* replicate result */
1579	return tgsi_helper_tempx_replicate(ctx);
1580}
1581
1582static int tgsi_pow(struct r600_shader_ctx *ctx)
1583{
1584	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1585	struct r600_bc_alu alu;
1586	int r;
1587
1588	/* LOG2(a) */
1589	memset(&alu, 0, sizeof(struct r600_bc_alu));
1590	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1591	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1592	if (r)
1593		return r;
1594	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1595	alu.dst.sel = ctx->temp_reg;
1596	alu.dst.write = 1;
1597	alu.last = 1;
1598	r = r600_bc_add_alu(ctx->bc, &alu);
1599	if (r)
1600		return r;
1601	r = r600_bc_add_literal(ctx->bc,ctx->value);
1602	if (r)
1603		return r;
1604	/* b * LOG2(a) */
1605	memset(&alu, 0, sizeof(struct r600_bc_alu));
1606	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1607	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1608	if (r)
1609		return r;
1610	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1611	alu.src[1].sel = ctx->temp_reg;
1612	alu.dst.sel = ctx->temp_reg;
1613	alu.dst.write = 1;
1614	alu.last = 1;
1615	r = r600_bc_add_alu(ctx->bc, &alu);
1616	if (r)
1617		return r;
1618	r = r600_bc_add_literal(ctx->bc,ctx->value);
1619	if (r)
1620		return r;
1621	/* POW(a,b) = EXP2(b * LOG2(a))*/
1622	memset(&alu, 0, sizeof(struct r600_bc_alu));
1623	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1624	alu.src[0].sel = ctx->temp_reg;
1625	alu.dst.sel = ctx->temp_reg;
1626	alu.dst.write = 1;
1627	alu.last = 1;
1628	r = r600_bc_add_alu(ctx->bc, &alu);
1629	if (r)
1630		return r;
1631	r = r600_bc_add_literal(ctx->bc,ctx->value);
1632	if (r)
1633		return r;
1634	return tgsi_helper_tempx_replicate(ctx);
1635}
1636
1637static int tgsi_ssg(struct r600_shader_ctx *ctx)
1638{
1639	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1640	struct r600_bc_alu alu;
1641	struct r600_bc_alu_src r600_src[3];
1642	int i, r;
1643
1644	r = tgsi_split_constant(ctx, r600_src);
1645	if (r)
1646		return r;
1647	r = tgsi_split_literal_constant(ctx, r600_src);
1648	if (r)
1649		return r;
1650
1651	/* tmp = (src > 0 ? 1 : src) */
1652	for (i = 0; i < 4; i++) {
1653		memset(&alu, 0, sizeof(struct r600_bc_alu));
1654		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1655		alu.is_op3 = 1;
1656
1657		alu.dst.sel = ctx->temp_reg;
1658		alu.dst.chan = i;
1659
1660		alu.src[0] = r600_src[0];
1661		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1662
1663		alu.src[1].sel = V_SQ_ALU_SRC_1;
1664
1665		alu.src[2] = r600_src[0];
1666		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1667		if (i == 3)
1668			alu.last = 1;
1669		r = r600_bc_add_alu(ctx->bc, &alu);
1670		if (r)
1671			return r;
1672	}
1673	r = r600_bc_add_literal(ctx->bc, ctx->value);
1674	if (r)
1675		return r;
1676
1677	/* dst = (-tmp > 0 ? -1 : tmp) */
1678	for (i = 0; i < 4; i++) {
1679		memset(&alu, 0, sizeof(struct r600_bc_alu));
1680		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1681		alu.is_op3 = 1;
1682		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1683		if (r)
1684			return r;
1685
1686		alu.src[0].sel = ctx->temp_reg;
1687		alu.src[0].chan = i;
1688		alu.src[0].neg = 1;
1689
1690		alu.src[1].sel = V_SQ_ALU_SRC_1;
1691		alu.src[1].neg = 1;
1692
1693		alu.src[2].sel = ctx->temp_reg;
1694		alu.src[2].chan = i;
1695
1696		if (i == 3)
1697			alu.last = 1;
1698		r = r600_bc_add_alu(ctx->bc, &alu);
1699		if (r)
1700			return r;
1701	}
1702	return 0;
1703}
1704
1705static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1706{
1707	struct r600_bc_alu alu;
1708	int i, r;
1709
1710	r = r600_bc_add_literal(ctx->bc, ctx->value);
1711	if (r)
1712		return r;
1713	for (i = 0; i < 4; i++) {
1714		memset(&alu, 0, sizeof(struct r600_bc_alu));
1715		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1716			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1717			alu.dst.chan = i;
1718		} else {
1719			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1720			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1721			if (r)
1722				return r;
1723			alu.src[0].sel = ctx->temp_reg;
1724			alu.src[0].chan = i;
1725		}
1726		if (i == 3) {
1727			alu.last = 1;
1728		}
1729		r = r600_bc_add_alu(ctx->bc, &alu);
1730		if (r)
1731			return r;
1732	}
1733	return 0;
1734}
1735
1736static int tgsi_op3(struct r600_shader_ctx *ctx)
1737{
1738	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1739	struct r600_bc_alu_src r600_src[3];
1740	struct r600_bc_alu alu;
1741	int i, j, r;
1742
1743	r = tgsi_split_constant(ctx, r600_src);
1744	if (r)
1745		return r;
1746	r = tgsi_split_literal_constant(ctx, r600_src);
1747	if (r)
1748		return r;
1749	/* do it in 2 step as op3 doesn't support writemask */
1750	for (i = 0; i < 4; i++) {
1751		memset(&alu, 0, sizeof(struct r600_bc_alu));
1752		alu.inst = ctx->inst_info->r600_opcode;
1753		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1754			alu.src[j] = r600_src[j];
1755			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1756		}
1757		alu.dst.sel = ctx->temp_reg;
1758		alu.dst.chan = i;
1759		alu.dst.write = 1;
1760		alu.is_op3 = 1;
1761		if (i == 3) {
1762			alu.last = 1;
1763		}
1764		r = r600_bc_add_alu(ctx->bc, &alu);
1765		if (r)
1766			return r;
1767	}
1768	return tgsi_helper_copy(ctx, inst);
1769}
1770
1771static int tgsi_dp(struct r600_shader_ctx *ctx)
1772{
1773	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1774	struct r600_bc_alu_src r600_src[3];
1775	struct r600_bc_alu alu;
1776	int i, j, r;
1777
1778	r = tgsi_split_constant(ctx, r600_src);
1779	if (r)
1780		return r;
1781	r = tgsi_split_literal_constant(ctx, r600_src);
1782	if (r)
1783		return r;
1784	for (i = 0; i < 4; i++) {
1785		memset(&alu, 0, sizeof(struct r600_bc_alu));
1786		alu.inst = ctx->inst_info->r600_opcode;
1787		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1788			alu.src[j] = r600_src[j];
1789			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1790		}
1791		alu.dst.sel = ctx->temp_reg;
1792		alu.dst.chan = i;
1793		alu.dst.write = 1;
1794		/* handle some special cases */
1795		switch (ctx->inst_info->tgsi_opcode) {
1796		case TGSI_OPCODE_DP2:
1797			if (i > 1) {
1798				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1799				alu.src[0].chan = alu.src[1].chan = 0;
1800			}
1801			break;
1802		case TGSI_OPCODE_DP3:
1803			if (i > 2) {
1804				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1805				alu.src[0].chan = alu.src[1].chan = 0;
1806			}
1807			break;
1808		case TGSI_OPCODE_DPH:
1809			if (i == 3) {
1810				alu.src[0].sel = V_SQ_ALU_SRC_1;
1811				alu.src[0].chan = 0;
1812				alu.src[0].neg = 0;
1813			}
1814			break;
1815		default:
1816			break;
1817		}
1818		if (i == 3) {
1819			alu.last = 1;
1820		}
1821		r = r600_bc_add_alu(ctx->bc, &alu);
1822		if (r)
1823			return r;
1824	}
1825	return tgsi_helper_copy(ctx, inst);
1826}
1827
1828static int tgsi_tex(struct r600_shader_ctx *ctx)
1829{
1830	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1831	struct r600_bc_tex tex;
1832	struct r600_bc_alu alu;
1833	unsigned src_gpr;
1834	int r, i;
1835	int opcode;
1836	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1837	uint32_t lit_vals[4];
1838
1839	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1840
1841	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1842		/* Add perspective divide */
1843		memset(&alu, 0, sizeof(struct r600_bc_alu));
1844		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1845		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1846		if (r)
1847			return r;
1848
1849		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1850		alu.dst.sel = ctx->temp_reg;
1851		alu.dst.chan = 3;
1852		alu.last = 1;
1853		alu.dst.write = 1;
1854		r = r600_bc_add_alu(ctx->bc, &alu);
1855		if (r)
1856			return r;
1857
1858		for (i = 0; i < 3; i++) {
1859			memset(&alu, 0, sizeof(struct r600_bc_alu));
1860			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1861			alu.src[0].sel = ctx->temp_reg;
1862			alu.src[0].chan = 3;
1863			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1864			if (r)
1865				return r;
1866			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1867			alu.dst.sel = ctx->temp_reg;
1868			alu.dst.chan = i;
1869			alu.dst.write = 1;
1870			r = r600_bc_add_alu(ctx->bc, &alu);
1871			if (r)
1872				return r;
1873		}
1874		memset(&alu, 0, sizeof(struct r600_bc_alu));
1875		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1876		alu.src[0].sel = V_SQ_ALU_SRC_1;
1877		alu.src[0].chan = 0;
1878		alu.dst.sel = ctx->temp_reg;
1879		alu.dst.chan = 3;
1880		alu.last = 1;
1881		alu.dst.write = 1;
1882		r = r600_bc_add_alu(ctx->bc, &alu);
1883		if (r)
1884			return r;
1885		src_not_temp = FALSE;
1886		src_gpr = ctx->temp_reg;
1887	}
1888
1889	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1890		int src_chan, src2_chan;
1891
1892		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1893		for (i = 0; i < 4; i++) {
1894			memset(&alu, 0, sizeof(struct r600_bc_alu));
1895			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1896			switch (i) {
1897			case 0:
1898				src_chan = 2;
1899				src2_chan = 1;
1900				break;
1901			case 1:
1902				src_chan = 2;
1903				src2_chan = 0;
1904				break;
1905			case 2:
1906				src_chan = 0;
1907				src2_chan = 2;
1908				break;
1909			case 3:
1910				src_chan = 1;
1911				src2_chan = 2;
1912				break;
1913			default:
1914				assert(0);
1915				src_chan = 0;
1916				src2_chan = 0;
1917				break;
1918			}
1919			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1920			if (r)
1921				return r;
1922			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1923			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1924			if (r)
1925				return r;
1926			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1927			alu.dst.sel = ctx->temp_reg;
1928			alu.dst.chan = i;
1929			if (i == 3)
1930				alu.last = 1;
1931			alu.dst.write = 1;
1932			r = r600_bc_add_alu(ctx->bc, &alu);
1933			if (r)
1934				return r;
1935		}
1936
1937		/* tmp1.z = RCP_e(|tmp1.z|) */
1938		memset(&alu, 0, sizeof(struct r600_bc_alu));
1939		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1940		alu.src[0].sel = ctx->temp_reg;
1941		alu.src[0].chan = 2;
1942		alu.src[0].abs = 1;
1943		alu.dst.sel = ctx->temp_reg;
1944		alu.dst.chan = 2;
1945		alu.dst.write = 1;
1946		alu.last = 1;
1947		r = r600_bc_add_alu(ctx->bc, &alu);
1948		if (r)
1949			return r;
1950
1951		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1952		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1953		 * muladd has no writemask, have to use another temp
1954		 */
1955		memset(&alu, 0, sizeof(struct r600_bc_alu));
1956		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1957		alu.is_op3 = 1;
1958
1959		alu.src[0].sel = ctx->temp_reg;
1960		alu.src[0].chan = 0;
1961		alu.src[1].sel = ctx->temp_reg;
1962		alu.src[1].chan = 2;
1963
1964		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1965		alu.src[2].chan = 0;
1966
1967		alu.dst.sel = ctx->temp_reg;
1968		alu.dst.chan = 0;
1969		alu.dst.write = 1;
1970
1971		r = r600_bc_add_alu(ctx->bc, &alu);
1972		if (r)
1973			return r;
1974
1975		memset(&alu, 0, sizeof(struct r600_bc_alu));
1976		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1977		alu.is_op3 = 1;
1978
1979		alu.src[0].sel = ctx->temp_reg;
1980		alu.src[0].chan = 1;
1981		alu.src[1].sel = ctx->temp_reg;
1982		alu.src[1].chan = 2;
1983
1984		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1985		alu.src[2].chan = 0;
1986
1987		alu.dst.sel = ctx->temp_reg;
1988		alu.dst.chan = 1;
1989		alu.dst.write = 1;
1990
1991		alu.last = 1;
1992		r = r600_bc_add_alu(ctx->bc, &alu);
1993		if (r)
1994			return r;
1995
1996		lit_vals[0] = fui(1.5f);
1997
1998		r = r600_bc_add_literal(ctx->bc, lit_vals);
1999		if (r)
2000			return r;
2001		src_not_temp = FALSE;
2002		src_gpr = ctx->temp_reg;
2003	}
2004
2005	if (src_not_temp) {
2006		for (i = 0; i < 4; i++) {
2007			memset(&alu, 0, sizeof(struct r600_bc_alu));
2008			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2009			alu.src[0].sel = src_gpr;
2010			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2011			alu.dst.sel = ctx->temp_reg;
2012			alu.dst.chan = i;
2013			if (i == 3)
2014				alu.last = 1;
2015			alu.dst.write = 1;
2016			r = r600_bc_add_alu(ctx->bc, &alu);
2017			if (r)
2018				return r;
2019		}
2020		src_gpr = ctx->temp_reg;
2021	}
2022
2023	opcode = ctx->inst_info->r600_opcode;
2024	if (opcode == SQ_TEX_INST_SAMPLE &&
2025	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
2026		opcode = SQ_TEX_INST_SAMPLE_C;
2027
2028	memset(&tex, 0, sizeof(struct r600_bc_tex));
2029	tex.inst = opcode;
2030	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
2031	tex.resource_id = tex.sampler_id;
2032	tex.src_gpr = src_gpr;
2033	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2034	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2035	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2036	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2037	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2038	tex.src_sel_x = 0;
2039	tex.src_sel_y = 1;
2040	tex.src_sel_z = 2;
2041	tex.src_sel_w = 3;
2042
2043	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2044		tex.src_sel_x = 1;
2045		tex.src_sel_y = 0;
2046		tex.src_sel_z = 3;
2047		tex.src_sel_w = 1;
2048	}
2049
2050	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2051		tex.coord_type_x = 1;
2052		tex.coord_type_y = 1;
2053		tex.coord_type_z = 1;
2054		tex.coord_type_w = 1;
2055	}
2056
2057	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2058		tex.src_sel_w = 2;
2059
2060	r = r600_bc_add_tex(ctx->bc, &tex);
2061	if (r)
2062		return r;
2063
2064	/* add shadow ambient support  - gallium doesn't do it yet */
2065	return 0;
2066}
2067
2068static int tgsi_lrp(struct r600_shader_ctx *ctx)
2069{
2070	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2071	struct r600_bc_alu_src r600_src[3];
2072	struct r600_bc_alu alu;
2073	unsigned i;
2074	int r;
2075
2076	r = tgsi_split_constant(ctx, r600_src);
2077	if (r)
2078		return r;
2079	r = tgsi_split_literal_constant(ctx, r600_src);
2080	if (r)
2081		return r;
2082	/* 1 - src0 */
2083	for (i = 0; i < 4; i++) {
2084		memset(&alu, 0, sizeof(struct r600_bc_alu));
2085		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2086		alu.src[0].sel = V_SQ_ALU_SRC_1;
2087		alu.src[0].chan = 0;
2088		alu.src[1] = r600_src[0];
2089		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2090		alu.src[1].neg = 1;
2091		alu.dst.sel = ctx->temp_reg;
2092		alu.dst.chan = i;
2093		if (i == 3) {
2094			alu.last = 1;
2095		}
2096		alu.dst.write = 1;
2097		r = r600_bc_add_alu(ctx->bc, &alu);
2098		if (r)
2099			return r;
2100	}
2101	r = r600_bc_add_literal(ctx->bc, ctx->value);
2102	if (r)
2103		return r;
2104
2105	/* (1 - src0) * src2 */
2106	for (i = 0; i < 4; i++) {
2107		memset(&alu, 0, sizeof(struct r600_bc_alu));
2108		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2109		alu.src[0].sel = ctx->temp_reg;
2110		alu.src[0].chan = i;
2111		alu.src[1] = r600_src[2];
2112		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2113		alu.dst.sel = ctx->temp_reg;
2114		alu.dst.chan = i;
2115		if (i == 3) {
2116			alu.last = 1;
2117		}
2118		alu.dst.write = 1;
2119		r = r600_bc_add_alu(ctx->bc, &alu);
2120		if (r)
2121			return r;
2122	}
2123	r = r600_bc_add_literal(ctx->bc, ctx->value);
2124	if (r)
2125		return r;
2126
2127	/* src0 * src1 + (1 - src0) * src2 */
2128	for (i = 0; i < 4; i++) {
2129		memset(&alu, 0, sizeof(struct r600_bc_alu));
2130		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2131		alu.is_op3 = 1;
2132		alu.src[0] = r600_src[0];
2133		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2134		alu.src[1] = r600_src[1];
2135		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2136		alu.src[2].sel = ctx->temp_reg;
2137		alu.src[2].chan = i;
2138		alu.dst.sel = ctx->temp_reg;
2139		alu.dst.chan = i;
2140		if (i == 3) {
2141			alu.last = 1;
2142		}
2143		r = r600_bc_add_alu(ctx->bc, &alu);
2144		if (r)
2145			return r;
2146	}
2147	return tgsi_helper_copy(ctx, inst);
2148}
2149
2150static int tgsi_cmp(struct r600_shader_ctx *ctx)
2151{
2152	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2153	struct r600_bc_alu_src r600_src[3];
2154	struct r600_bc_alu alu;
2155	int use_temp = 0;
2156	int i, r;
2157
2158	r = tgsi_split_constant(ctx, r600_src);
2159	if (r)
2160		return r;
2161	r = tgsi_split_literal_constant(ctx, r600_src);
2162	if (r)
2163		return r;
2164
2165	if (inst->Dst[0].Register.WriteMask != 0xf)
2166		use_temp = 1;
2167
2168	for (i = 0; i < 4; i++) {
2169		memset(&alu, 0, sizeof(struct r600_bc_alu));
2170		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2171		alu.src[0] = r600_src[0];
2172		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2173
2174		alu.src[1] = r600_src[2];
2175		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2176
2177		alu.src[2] = r600_src[1];
2178		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2179
2180		if (use_temp)
2181			alu.dst.sel = ctx->temp_reg;
2182		else {
2183			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2184			if (r)
2185				return r;
2186		}
2187		alu.dst.chan = i;
2188		alu.dst.write = 1;
2189		alu.is_op3 = 1;
2190		if (i == 3)
2191			alu.last = 1;
2192		r = r600_bc_add_alu(ctx->bc, &alu);
2193		if (r)
2194			return r;
2195	}
2196	if (use_temp)
2197		return tgsi_helper_copy(ctx, inst);
2198	return 0;
2199}
2200
2201static int tgsi_xpd(struct r600_shader_ctx *ctx)
2202{
2203	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2204	struct r600_bc_alu_src r600_src[3];
2205	struct r600_bc_alu alu;
2206	uint32_t use_temp = 0;
2207	int i, r;
2208
2209	if (inst->Dst[0].Register.WriteMask != 0xf)
2210		use_temp = 1;
2211
2212	r = tgsi_split_constant(ctx, r600_src);
2213	if (r)
2214		return r;
2215	r = tgsi_split_literal_constant(ctx, r600_src);
2216	if (r)
2217		return r;
2218
2219	for (i = 0; i < 4; i++) {
2220		memset(&alu, 0, sizeof(struct r600_bc_alu));
2221		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2222
2223		alu.src[0] = r600_src[0];
2224		switch (i) {
2225		case 0:
2226			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2227			break;
2228		case 1:
2229			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2230			break;
2231		case 2:
2232			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2233			break;
2234		case 3:
2235			alu.src[0].sel = V_SQ_ALU_SRC_0;
2236			alu.src[0].chan = i;
2237		}
2238
2239		alu.src[1] = r600_src[1];
2240		switch (i) {
2241		case 0:
2242			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2243			break;
2244		case 1:
2245			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2246			break;
2247		case 2:
2248			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2249			break;
2250		case 3:
2251			alu.src[1].sel = V_SQ_ALU_SRC_0;
2252			alu.src[1].chan = i;
2253		}
2254
2255		alu.dst.sel = ctx->temp_reg;
2256		alu.dst.chan = i;
2257		alu.dst.write = 1;
2258
2259		if (i == 3)
2260			alu.last = 1;
2261		r = r600_bc_add_alu(ctx->bc, &alu);
2262		if (r)
2263			return r;
2264
2265		r = r600_bc_add_literal(ctx->bc, ctx->value);
2266		if (r)
2267			return r;
2268	}
2269
2270	for (i = 0; i < 4; i++) {
2271		memset(&alu, 0, sizeof(struct r600_bc_alu));
2272		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2273
2274		alu.src[0] = r600_src[0];
2275		switch (i) {
2276		case 0:
2277			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2278			break;
2279		case 1:
2280			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2281			break;
2282		case 2:
2283			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2284			break;
2285		case 3:
2286			alu.src[0].sel = V_SQ_ALU_SRC_0;
2287			alu.src[0].chan = i;
2288		}
2289
2290		alu.src[1] = r600_src[1];
2291		switch (i) {
2292		case 0:
2293			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2294			break;
2295		case 1:
2296			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2297			break;
2298		case 2:
2299			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2300			break;
2301		case 3:
2302			alu.src[1].sel = V_SQ_ALU_SRC_0;
2303			alu.src[1].chan = i;
2304		}
2305
2306		alu.src[2].sel = ctx->temp_reg;
2307		alu.src[2].neg = 1;
2308		alu.src[2].chan = i;
2309
2310		if (use_temp)
2311			alu.dst.sel = ctx->temp_reg;
2312		else {
2313			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2314			if (r)
2315				return r;
2316		}
2317		alu.dst.chan = i;
2318		alu.dst.write = 1;
2319		alu.is_op3 = 1;
2320		if (i == 3)
2321			alu.last = 1;
2322		r = r600_bc_add_alu(ctx->bc, &alu);
2323		if (r)
2324			return r;
2325
2326		r = r600_bc_add_literal(ctx->bc, ctx->value);
2327		if (r)
2328			return r;
2329	}
2330	if (use_temp)
2331		return tgsi_helper_copy(ctx, inst);
2332	return 0;
2333}
2334
2335static int tgsi_exp(struct r600_shader_ctx *ctx)
2336{
2337	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2338	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2339	struct r600_bc_alu alu;
2340	int r;
2341
2342	/* result.x = 2^floor(src); */
2343	if (inst->Dst[0].Register.WriteMask & 1) {
2344		memset(&alu, 0, sizeof(struct r600_bc_alu));
2345
2346		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2347		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2348		if (r)
2349			return r;
2350
2351		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2352
2353		alu.dst.sel = ctx->temp_reg;
2354		alu.dst.chan = 0;
2355		alu.dst.write = 1;
2356		alu.last = 1;
2357		r = r600_bc_add_alu(ctx->bc, &alu);
2358		if (r)
2359			return r;
2360
2361		r = r600_bc_add_literal(ctx->bc, ctx->value);
2362		if (r)
2363			return r;
2364
2365		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2366		alu.src[0].sel = ctx->temp_reg;
2367		alu.src[0].chan = 0;
2368
2369		alu.dst.sel = ctx->temp_reg;
2370		alu.dst.chan = 0;
2371		alu.dst.write = 1;
2372		alu.last = 1;
2373		r = r600_bc_add_alu(ctx->bc, &alu);
2374		if (r)
2375			return r;
2376
2377		r = r600_bc_add_literal(ctx->bc, ctx->value);
2378		if (r)
2379			return r;
2380	}
2381
2382	/* result.y = tmp - floor(tmp); */
2383	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2384		memset(&alu, 0, sizeof(struct r600_bc_alu));
2385
2386		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2387		alu.src[0] = r600_src[0];
2388		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2389		if (r)
2390			return r;
2391		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2392
2393		alu.dst.sel = ctx->temp_reg;
2394//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2395//		if (r)
2396//			return r;
2397		alu.dst.write = 1;
2398		alu.dst.chan = 1;
2399
2400		alu.last = 1;
2401
2402		r = r600_bc_add_alu(ctx->bc, &alu);
2403		if (r)
2404			return r;
2405		r = r600_bc_add_literal(ctx->bc, ctx->value);
2406		if (r)
2407			return r;
2408	}
2409
2410	/* result.z = RoughApprox2ToX(tmp);*/
2411	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2412		memset(&alu, 0, sizeof(struct r600_bc_alu));
2413		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2414		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2415		if (r)
2416			return r;
2417		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2418
2419		alu.dst.sel = ctx->temp_reg;
2420		alu.dst.write = 1;
2421		alu.dst.chan = 2;
2422
2423		alu.last = 1;
2424
2425		r = r600_bc_add_alu(ctx->bc, &alu);
2426		if (r)
2427			return r;
2428		r = r600_bc_add_literal(ctx->bc, ctx->value);
2429		if (r)
2430			return r;
2431	}
2432
2433	/* result.w = 1.0;*/
2434	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2435		memset(&alu, 0, sizeof(struct r600_bc_alu));
2436
2437		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2438		alu.src[0].sel = V_SQ_ALU_SRC_1;
2439		alu.src[0].chan = 0;
2440
2441		alu.dst.sel = ctx->temp_reg;
2442		alu.dst.chan = 3;
2443		alu.dst.write = 1;
2444		alu.last = 1;
2445		r = r600_bc_add_alu(ctx->bc, &alu);
2446		if (r)
2447			return r;
2448		r = r600_bc_add_literal(ctx->bc, ctx->value);
2449		if (r)
2450			return r;
2451	}
2452	return tgsi_helper_copy(ctx, inst);
2453}
2454
2455static int tgsi_log(struct r600_shader_ctx *ctx)
2456{
2457	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2458	struct r600_bc_alu alu;
2459	int r;
2460
2461	/* result.x = floor(log2(src)); */
2462	if (inst->Dst[0].Register.WriteMask & 1) {
2463		memset(&alu, 0, sizeof(struct r600_bc_alu));
2464
2465		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2466		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2467		if (r)
2468			return r;
2469
2470		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2471
2472		alu.dst.sel = ctx->temp_reg;
2473		alu.dst.chan = 0;
2474		alu.dst.write = 1;
2475		alu.last = 1;
2476		r = r600_bc_add_alu(ctx->bc, &alu);
2477		if (r)
2478			return r;
2479
2480		r = r600_bc_add_literal(ctx->bc, ctx->value);
2481		if (r)
2482			return r;
2483
2484		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2485		alu.src[0].sel = ctx->temp_reg;
2486		alu.src[0].chan = 0;
2487
2488		alu.dst.sel = ctx->temp_reg;
2489		alu.dst.chan = 0;
2490		alu.dst.write = 1;
2491		alu.last = 1;
2492
2493		r = r600_bc_add_alu(ctx->bc, &alu);
2494		if (r)
2495			return r;
2496
2497		r = r600_bc_add_literal(ctx->bc, ctx->value);
2498		if (r)
2499			return r;
2500	}
2501
2502	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2503	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2504		memset(&alu, 0, sizeof(struct r600_bc_alu));
2505
2506		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2507		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2508		if (r)
2509			return r;
2510
2511		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2512
2513		alu.dst.sel = ctx->temp_reg;
2514		alu.dst.chan = 1;
2515		alu.dst.write = 1;
2516		alu.last = 1;
2517
2518		r = r600_bc_add_alu(ctx->bc, &alu);
2519		if (r)
2520			return r;
2521
2522		r = r600_bc_add_literal(ctx->bc, ctx->value);
2523		if (r)
2524			return r;
2525
2526		memset(&alu, 0, sizeof(struct r600_bc_alu));
2527
2528		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2529		alu.src[0].sel = ctx->temp_reg;
2530		alu.src[0].chan = 1;
2531
2532		alu.dst.sel = ctx->temp_reg;
2533		alu.dst.chan = 1;
2534		alu.dst.write = 1;
2535		alu.last = 1;
2536
2537		r = r600_bc_add_alu(ctx->bc, &alu);
2538		if (r)
2539			return r;
2540
2541		r = r600_bc_add_literal(ctx->bc, ctx->value);
2542		if (r)
2543			return r;
2544
2545		memset(&alu, 0, sizeof(struct r600_bc_alu));
2546
2547		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2548		alu.src[0].sel = ctx->temp_reg;
2549		alu.src[0].chan = 1;
2550
2551		alu.dst.sel = ctx->temp_reg;
2552		alu.dst.chan = 1;
2553		alu.dst.write = 1;
2554		alu.last = 1;
2555
2556		r = r600_bc_add_alu(ctx->bc, &alu);
2557		if (r)
2558			return r;
2559
2560		r = r600_bc_add_literal(ctx->bc, ctx->value);
2561		if (r)
2562			return r;
2563
2564		memset(&alu, 0, sizeof(struct r600_bc_alu));
2565
2566		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2567		alu.src[0].sel = ctx->temp_reg;
2568		alu.src[0].chan = 1;
2569
2570		alu.dst.sel = ctx->temp_reg;
2571		alu.dst.chan = 1;
2572		alu.dst.write = 1;
2573		alu.last = 1;
2574
2575		r = r600_bc_add_alu(ctx->bc, &alu);
2576		if (r)
2577			return r;
2578
2579		r = r600_bc_add_literal(ctx->bc, ctx->value);
2580		if (r)
2581			return r;
2582
2583		memset(&alu, 0, sizeof(struct r600_bc_alu));
2584
2585		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2586
2587		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2588		if (r)
2589			return r;
2590
2591		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2592
2593		alu.src[1].sel = ctx->temp_reg;
2594		alu.src[1].chan = 1;
2595
2596		alu.dst.sel = ctx->temp_reg;
2597		alu.dst.chan = 1;
2598		alu.dst.write = 1;
2599		alu.last = 1;
2600
2601		r = r600_bc_add_alu(ctx->bc, &alu);
2602		if (r)
2603			return r;
2604
2605		r = r600_bc_add_literal(ctx->bc, ctx->value);
2606		if (r)
2607			return r;
2608	}
2609
2610	/* result.z = log2(src);*/
2611	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2612		memset(&alu, 0, sizeof(struct r600_bc_alu));
2613
2614		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2615		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2616		if (r)
2617			return r;
2618
2619		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2620
2621		alu.dst.sel = ctx->temp_reg;
2622		alu.dst.write = 1;
2623		alu.dst.chan = 2;
2624		alu.last = 1;
2625
2626		r = r600_bc_add_alu(ctx->bc, &alu);
2627		if (r)
2628			return r;
2629
2630		r = r600_bc_add_literal(ctx->bc, ctx->value);
2631		if (r)
2632			return r;
2633	}
2634
2635	/* result.w = 1.0; */
2636	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2637		memset(&alu, 0, sizeof(struct r600_bc_alu));
2638
2639		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2640		alu.src[0].sel = V_SQ_ALU_SRC_1;
2641		alu.src[0].chan = 0;
2642
2643		alu.dst.sel = ctx->temp_reg;
2644		alu.dst.chan = 3;
2645		alu.dst.write = 1;
2646		alu.last = 1;
2647
2648		r = r600_bc_add_alu(ctx->bc, &alu);
2649		if (r)
2650			return r;
2651
2652		r = r600_bc_add_literal(ctx->bc, ctx->value);
2653		if (r)
2654			return r;
2655	}
2656
2657	return tgsi_helper_copy(ctx, inst);
2658}
2659
2660static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2661{
2662	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2663	struct r600_bc_alu alu;
2664	int r;
2665	memset(&alu, 0, sizeof(struct r600_bc_alu));
2666
2667	switch (inst->Instruction.Opcode) {
2668	case TGSI_OPCODE_ARL:
2669		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2670		break;
2671	case TGSI_OPCODE_ARR:
2672		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2673		break;
2674	default:
2675		assert(0);
2676		return -1;
2677	}
2678
2679	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2680	if (r)
2681		return r;
2682	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2683	alu.last = 1;
2684	alu.dst.chan = 0;
2685	alu.dst.sel = ctx->temp_reg;
2686	alu.dst.write = 1;
2687	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2688	if (r)
2689		return r;
2690	memset(&alu, 0, sizeof(struct r600_bc_alu));
2691	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2692	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2693	if (r)
2694		return r;
2695	alu.src[0].sel = ctx->temp_reg;
2696	alu.src[0].chan = 0;
2697	alu.last = 1;
2698	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2699	if (r)
2700		return r;
2701	return 0;
2702}
2703static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2704{
2705	/* TODO from r600c, ar values don't persist between clauses */
2706	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2707	struct r600_bc_alu alu;
2708	int r;
2709	memset(&alu, 0, sizeof(struct r600_bc_alu));
2710
2711	switch (inst->Instruction.Opcode) {
2712	case TGSI_OPCODE_ARL:
2713		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2714		break;
2715	case TGSI_OPCODE_ARR:
2716		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2717		break;
2718	default:
2719		assert(0);
2720		return -1;
2721	}
2722
2723
2724	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2725	if (r)
2726		return r;
2727	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2728
2729	alu.last = 1;
2730
2731	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2732	if (r)
2733		return r;
2734	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2735	return 0;
2736}
2737
2738static int tgsi_opdst(struct r600_shader_ctx *ctx)
2739{
2740	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2741	struct r600_bc_alu alu;
2742	int i, r = 0;
2743
2744	for (i = 0; i < 4; i++) {
2745		memset(&alu, 0, sizeof(struct r600_bc_alu));
2746
2747		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2748		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2749		if (r)
2750			return r;
2751
2752		if (i == 0 || i == 3) {
2753			alu.src[0].sel = V_SQ_ALU_SRC_1;
2754		} else {
2755			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2756			if (r)
2757				return r;
2758			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2759		}
2760
2761	        if (i == 0 || i == 2) {
2762			alu.src[1].sel = V_SQ_ALU_SRC_1;
2763		} else {
2764			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2765			if (r)
2766				return r;
2767			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2768		}
2769		if (i == 3)
2770			alu.last = 1;
2771		r = r600_bc_add_alu(ctx->bc, &alu);
2772		if (r)
2773			return r;
2774	}
2775	return 0;
2776}
2777
2778static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2779{
2780	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2781	struct r600_bc_alu alu;
2782	int r;
2783
2784	memset(&alu, 0, sizeof(struct r600_bc_alu));
2785	alu.inst = opcode;
2786	alu.predicate = 1;
2787
2788	alu.dst.sel = ctx->temp_reg;
2789	alu.dst.write = 1;
2790	alu.dst.chan = 0;
2791
2792	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2793	if (r)
2794		return r;
2795	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2796	alu.src[1].sel = V_SQ_ALU_SRC_0;
2797	alu.src[1].chan = 0;
2798
2799	alu.last = 1;
2800
2801	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2802	if (r)
2803		return r;
2804	return 0;
2805}
2806
2807static int pops(struct r600_shader_ctx *ctx, int pops)
2808{
2809	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2810	ctx->bc->cf_last->pop_count = pops;
2811	return 0;
2812}
2813
2814static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2815{
2816	switch(reason) {
2817	case FC_PUSH_VPM:
2818		ctx->bc->callstack[ctx->bc->call_sp].current--;
2819		break;
2820	case FC_PUSH_WQM:
2821	case FC_LOOP:
2822		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2823		break;
2824	case FC_REP:
2825		/* TOODO : for 16 vp asic should -= 2; */
2826		ctx->bc->callstack[ctx->bc->call_sp].current --;
2827		break;
2828	}
2829}
2830
2831static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2832{
2833	if (check_max_only) {
2834		int diff;
2835		switch (reason) {
2836		case FC_PUSH_VPM:
2837			diff = 1;
2838			break;
2839		case FC_PUSH_WQM:
2840			diff = 4;
2841			break;
2842		default:
2843			assert(0);
2844			diff = 0;
2845		}
2846		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2847		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2848			ctx->bc->callstack[ctx->bc->call_sp].max =
2849				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2850		}
2851		return;
2852	}
2853	switch (reason) {
2854	case FC_PUSH_VPM:
2855		ctx->bc->callstack[ctx->bc->call_sp].current++;
2856		break;
2857	case FC_PUSH_WQM:
2858	case FC_LOOP:
2859		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2860		break;
2861	case FC_REP:
2862		ctx->bc->callstack[ctx->bc->call_sp].current++;
2863		break;
2864	}
2865
2866	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2867	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2868		ctx->bc->callstack[ctx->bc->call_sp].max =
2869			ctx->bc->callstack[ctx->bc->call_sp].current;
2870	}
2871}
2872
2873static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2874{
2875	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2876
2877	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2878						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2879	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2880	sp->num_mid++;
2881}
2882
2883static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2884{
2885	ctx->bc->fc_sp++;
2886	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2887	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2888}
2889
2890static void fc_poplevel(struct r600_shader_ctx *ctx)
2891{
2892	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2893	if (sp->mid) {
2894		free(sp->mid);
2895		sp->mid = NULL;
2896	}
2897	sp->num_mid = 0;
2898	sp->start = NULL;
2899	sp->type = 0;
2900	ctx->bc->fc_sp--;
2901}
2902
2903#if 0
2904static int emit_return(struct r600_shader_ctx *ctx)
2905{
2906	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2907	return 0;
2908}
2909
2910static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2911{
2912
2913	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2914	ctx->bc->cf_last->pop_count = pops;
2915	/* TODO work out offset */
2916	return 0;
2917}
2918
2919static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2920{
2921	return 0;
2922}
2923
2924static void emit_testflag(struct r600_shader_ctx *ctx)
2925{
2926
2927}
2928
2929static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2930{
2931	emit_testflag(ctx);
2932	emit_jump_to_offset(ctx, 1, 4);
2933	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2934	pops(ctx, ifidx + 1);
2935	emit_return(ctx);
2936}
2937
2938static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2939{
2940	emit_testflag(ctx);
2941
2942	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2943	ctx->bc->cf_last->pop_count = 1;
2944
2945	fc_set_mid(ctx, fc_sp);
2946
2947	pops(ctx, 1);
2948}
2949#endif
2950
2951static int tgsi_if(struct r600_shader_ctx *ctx)
2952{
2953	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2954
2955	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2956
2957	fc_pushlevel(ctx, FC_IF);
2958
2959	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2960	return 0;
2961}
2962
2963static int tgsi_else(struct r600_shader_ctx *ctx)
2964{
2965	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2966	ctx->bc->cf_last->pop_count = 1;
2967
2968	fc_set_mid(ctx, ctx->bc->fc_sp);
2969	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2970	return 0;
2971}
2972
2973static int tgsi_endif(struct r600_shader_ctx *ctx)
2974{
2975	pops(ctx, 1);
2976	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2977		R600_ERR("if/endif unbalanced in shader\n");
2978		return -1;
2979	}
2980
2981	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2982		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2983		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2984	} else {
2985		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2986	}
2987	fc_poplevel(ctx);
2988
2989	callstack_decrease_current(ctx, FC_PUSH_VPM);
2990	return 0;
2991}
2992
2993static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2994{
2995	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2996
2997	fc_pushlevel(ctx, FC_LOOP);
2998
2999	/* check stack depth */
3000	callstack_check_depth(ctx, FC_LOOP, 0);
3001	return 0;
3002}
3003
3004static int tgsi_endloop(struct r600_shader_ctx *ctx)
3005{
3006	int i;
3007
3008	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3009
3010	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3011		R600_ERR("loop/endloop in shader code are not paired.\n");
3012		return -EINVAL;
3013	}
3014
3015	/* fixup loop pointers - from r600isa
3016	   LOOP END points to CF after LOOP START,
3017	   LOOP START point to CF after LOOP END
3018	   BRK/CONT point to LOOP END CF
3019	*/
3020	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3021
3022	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3023
3024	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3025		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3026	}
3027	/* TODO add LOOPRET support */
3028	fc_poplevel(ctx);
3029	callstack_decrease_current(ctx, FC_LOOP);
3030	return 0;
3031}
3032
3033static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3034{
3035	unsigned int fscp;
3036
3037	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3038	{
3039		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3040			break;
3041	}
3042
3043	if (fscp == 0) {
3044		R600_ERR("Break not inside loop/endloop pair\n");
3045		return -EINVAL;
3046	}
3047
3048	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3049	ctx->bc->cf_last->pop_count = 1;
3050
3051	fc_set_mid(ctx, fscp);
3052
3053	pops(ctx, 1);
3054	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3055	return 0;
3056}
3057
3058static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3059	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3060	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3061	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3062
3063	/* FIXME:
3064	 * For state trackers other than OpenGL, we'll want to use
3065	 * _RECIP_IEEE instead.
3066	 */
3067	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3068
3069	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3070	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3071	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3072	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3073	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3074	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3075	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3076	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3077	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3078	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3079	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3080	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3081	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3082	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3083	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3084	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	/* gap */
3086	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	/* gap */
3089	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3092	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3094	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3096	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3097	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3098	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3099	/* gap */
3100	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3102	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3104	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3105	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3106	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3107	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3108	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3114	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3116	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3117	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3118	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3119	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3121	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3123	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3130	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3134	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3135	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3136	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3137	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3140	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3141	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3142	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3143	/* gap */
3144	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3147	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3148	/* gap */
3149	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3157	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	/* gap */
3159	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3168	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3171	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3173	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	/* gap */
3175	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3176	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3178	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180	/* gap */
3181	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3182	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3183	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3184	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3186	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3187	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3188	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3190	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3191	/* gap */
3192	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3217	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3218	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3219	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3220};
3221
3222static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3223	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3224	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3225	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3226	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3227	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3228	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3229	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3231	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3232	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3233	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3234	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3235	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3236	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3237	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3238	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3239	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3240	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3241	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3242	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	/* gap */
3244	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	/* gap */
3247	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3249	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3250	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3252	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3254	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3255	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3256	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3257	/* gap */
3258	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3259	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3260	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3262	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3263	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3264	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3265	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3266	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3269	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3271	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3272	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3273	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3274	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3275	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3276	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3277	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3279	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3281	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3288	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3292	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3293	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3294	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3295	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3296	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3297	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3298	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3299	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3300	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3301	/* gap */
3302	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3305	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3306	/* gap */
3307	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3315	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316	/* gap */
3317	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3323	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3326	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3329	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3331	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332	/* gap */
3333	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3336	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338	/* gap */
3339	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3340	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3341	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3342	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3348	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3349	/* gap */
3350	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3371	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3373	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3374	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3375	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3376	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3377	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3378};
3379